1 | /* |
2 | * Copyright © 2018 Google, Inc. |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Google Author(s): Behdad Esfahbod |
25 | */ |
26 | |
27 | #ifndef HB_MAP_HH |
28 | #define HB_MAP_HH |
29 | |
30 | #include "hb.hh" |
31 | |
32 | #include "hb-set.hh" |
33 | |
34 | |
35 | /* |
36 | * hb_hashmap_t |
37 | */ |
38 | |
39 | extern HB_INTERNAL const hb_codepoint_t minus_1; |
40 | |
41 | template <typename K, typename V, |
42 | bool minus_one = false> |
43 | struct hb_hashmap_t |
44 | { |
45 | hb_hashmap_t () { init (); } |
46 | ~hb_hashmap_t () { fini (); } |
47 | |
48 | hb_hashmap_t (const hb_hashmap_t& o) : hb_hashmap_t () { alloc (o.population); hb_copy (o, *this); } |
49 | hb_hashmap_t (hb_hashmap_t&& o) : hb_hashmap_t () { hb_swap (*this, o); } |
50 | hb_hashmap_t& operator= (const hb_hashmap_t& o) { reset (); alloc (o.population); hb_copy (o, *this); return *this; } |
51 | hb_hashmap_t& operator= (hb_hashmap_t&& o) { hb_swap (*this, o); return *this; } |
52 | |
53 | hb_hashmap_t (std::initializer_list<hb_pair_t<K, V>> lst) : hb_hashmap_t () |
54 | { |
55 | for (auto&& item : lst) |
56 | set (item.first, item.second); |
57 | } |
58 | template <typename Iterable, |
59 | hb_requires (hb_is_iterable (Iterable))> |
60 | hb_hashmap_t (const Iterable &o) : hb_hashmap_t () |
61 | { |
62 | auto iter = hb_iter (o); |
63 | if (iter.is_random_access_iterator || iter.has_fast_len) |
64 | alloc (hb_len (iter)); |
65 | hb_copy (iter, *this); |
66 | } |
67 | |
68 | struct item_t |
69 | { |
70 | K key; |
71 | uint32_t is_real_ : 1; |
72 | uint32_t is_used_ : 1; |
73 | uint32_t hash : 30; |
74 | V value; |
75 | |
76 | item_t () : key (), |
77 | is_real_ (false), is_used_ (false), |
78 | hash (0), |
79 | value () {} |
80 | |
81 | // Needed for https://github.com/harfbuzz/harfbuzz/issues/4138 |
82 | K& get_key () { return key; } |
83 | V& get_value () { return value; } |
84 | |
85 | bool is_used () const { return is_used_; } |
86 | void set_used (bool is_used) { is_used_ = is_used; } |
87 | void set_real (bool is_real) { is_real_ = is_real; } |
88 | bool is_real () const { return is_real_; } |
89 | |
90 | template <bool v = minus_one, |
91 | hb_enable_if (v == false)> |
92 | static inline const V& default_value () { return Null(V); }; |
93 | template <bool v = minus_one, |
94 | hb_enable_if (v == true)> |
95 | static inline const V& default_value () |
96 | { |
97 | static_assert (hb_is_same (V, hb_codepoint_t), "" ); |
98 | return minus_1; |
99 | }; |
100 | |
101 | bool operator == (const K &o) const { return hb_deref (key) == hb_deref (o); } |
102 | bool operator == (const item_t &o) const { return *this == o.key; } |
103 | hb_pair_t<K, V> get_pair() const { return hb_pair_t<K, V> (key, value); } |
104 | hb_pair_t<const K &, V &> get_pair_ref() { return hb_pair_t<const K &, V &> (key, value); } |
105 | |
106 | uint32_t total_hash () const |
107 | { return (hash * 31) + hb_hash (value); } |
108 | |
109 | static constexpr bool is_trivial = std::is_trivially_constructible<K>::value && |
110 | std::is_trivially_destructible<K>::value && |
111 | std::is_trivially_constructible<V>::value && |
112 | std::is_trivially_destructible<V>::value; |
113 | }; |
114 | |
115 | hb_object_header_t ; |
116 | unsigned int successful : 1; /* Allocations successful */ |
117 | unsigned int population : 31; /* Not including tombstones. */ |
118 | unsigned int occupancy; /* Including tombstones. */ |
119 | unsigned int mask; |
120 | unsigned int prime; |
121 | unsigned int max_chain_length; |
122 | item_t *items; |
123 | |
124 | friend void swap (hb_hashmap_t& a, hb_hashmap_t& b) |
125 | { |
126 | if (unlikely (!a.successful || !b.successful)) |
127 | return; |
128 | unsigned tmp = a.population; |
129 | a.population = b.population; |
130 | b.population = tmp; |
131 | //hb_swap (a.population, b.population); |
132 | hb_swap (a.occupancy, b.occupancy); |
133 | hb_swap (a.mask, b.mask); |
134 | hb_swap (a.prime, b.prime); |
135 | hb_swap (a.max_chain_length, b.max_chain_length); |
136 | hb_swap (a.items, b.items); |
137 | } |
138 | void init () |
139 | { |
140 | hb_object_init (this); |
141 | |
142 | successful = true; |
143 | population = occupancy = 0; |
144 | mask = 0; |
145 | prime = 0; |
146 | max_chain_length = 0; |
147 | items = nullptr; |
148 | } |
149 | void fini () |
150 | { |
151 | hb_object_fini (this); |
152 | |
153 | if (likely (items)) |
154 | { |
155 | unsigned size = mask + 1; |
156 | if (!item_t::is_trivial) |
157 | for (unsigned i = 0; i < size; i++) |
158 | items[i].~item_t (); |
159 | hb_free (items); |
160 | items = nullptr; |
161 | } |
162 | population = occupancy = 0; |
163 | } |
164 | |
165 | void reset () |
166 | { |
167 | successful = true; |
168 | clear (); |
169 | } |
170 | |
171 | bool in_error () const { return !successful; } |
172 | |
173 | bool alloc (unsigned new_population = 0) |
174 | { |
175 | if (unlikely (!successful)) return false; |
176 | |
177 | if (new_population != 0 && (new_population + new_population / 2) < mask) return true; |
178 | |
179 | unsigned int power = hb_bit_storage (hb_max ((unsigned) population, new_population) * 2 + 8); |
180 | unsigned int new_size = 1u << power; |
181 | item_t *new_items = (item_t *) hb_malloc ((size_t) new_size * sizeof (item_t)); |
182 | if (unlikely (!new_items)) |
183 | { |
184 | successful = false; |
185 | return false; |
186 | } |
187 | if (!item_t::is_trivial) |
188 | for (auto &_ : hb_iter (new_items, new_size)) |
189 | new (&_) item_t (); |
190 | else |
191 | hb_memset (new_items, 0, (size_t) new_size * sizeof (item_t)); |
192 | |
193 | unsigned int old_size = size (); |
194 | item_t *old_items = items; |
195 | |
196 | /* Switch to new, empty, array. */ |
197 | population = occupancy = 0; |
198 | mask = new_size - 1; |
199 | prime = prime_for (power); |
200 | max_chain_length = power * 2; |
201 | items = new_items; |
202 | |
203 | /* Insert back old items. */ |
204 | for (unsigned int i = 0; i < old_size; i++) |
205 | { |
206 | if (old_items[i].is_real ()) |
207 | { |
208 | set_with_hash (std::move (old_items[i].key), |
209 | old_items[i].hash, |
210 | std::move (old_items[i].value)); |
211 | } |
212 | if (!item_t::is_trivial) |
213 | old_items[i].~item_t (); |
214 | } |
215 | |
216 | hb_free (old_items); |
217 | |
218 | return true; |
219 | } |
220 | |
221 | template <typename KK, typename VV> |
222 | bool set_with_hash (KK&& key, uint32_t hash, VV&& value, bool overwrite = true) |
223 | { |
224 | if (unlikely (!successful)) return false; |
225 | if (unlikely ((occupancy + occupancy / 2) >= mask && !alloc ())) return false; |
226 | |
227 | hash &= 0x3FFFFFFF; // We only store lower 30bit of hash |
228 | unsigned int tombstone = (unsigned int) -1; |
229 | unsigned int i = hash % prime; |
230 | unsigned length = 0; |
231 | unsigned step = 0; |
232 | while (items[i].is_used ()) |
233 | { |
234 | if ((std::is_integral<K>::value || items[i].hash == hash) && |
235 | items[i] == key) |
236 | { |
237 | if (!overwrite) |
238 | return false; |
239 | else |
240 | break; |
241 | } |
242 | if (!items[i].is_real () && tombstone == (unsigned) -1) |
243 | tombstone = i; |
244 | i = (i + ++step) & mask; |
245 | length++; |
246 | } |
247 | |
248 | item_t &item = items[tombstone == (unsigned) -1 ? i : tombstone]; |
249 | |
250 | if (item.is_used ()) |
251 | { |
252 | occupancy--; |
253 | population -= item.is_real (); |
254 | } |
255 | |
256 | item.key = std::forward<KK> (key); |
257 | item.value = std::forward<VV> (value); |
258 | item.hash = hash; |
259 | item.set_used (true); |
260 | item.set_real (true); |
261 | |
262 | occupancy++; |
263 | population++; |
264 | |
265 | if (unlikely (length > max_chain_length) && occupancy * 8 > mask) |
266 | alloc (mask - 8); // This ensures we jump to next larger size |
267 | |
268 | return true; |
269 | } |
270 | |
271 | template <typename VV> |
272 | bool set (const K &key, VV&& value, bool overwrite = true) { return set_with_hash (key, hb_hash (key), std::forward<VV> (value), overwrite); } |
273 | template <typename VV> |
274 | bool set (K &&key, VV&& value, bool overwrite = true) |
275 | { |
276 | uint32_t hash = hb_hash (key); |
277 | return set_with_hash (std::move (key), hash, std::forward<VV> (value), overwrite); |
278 | } |
279 | |
280 | const V& get_with_hash (const K &key, uint32_t hash) const |
281 | { |
282 | if (!items) return item_t::default_value (); |
283 | auto *item = fetch_item (key, hb_hash (key)); |
284 | if (item) |
285 | return item->value; |
286 | return item_t::default_value (); |
287 | } |
288 | const V& get (const K &key) const |
289 | { |
290 | if (!items) return item_t::default_value (); |
291 | return get_with_hash (key, hb_hash (key)); |
292 | } |
293 | |
294 | void del (const K &key) |
295 | { |
296 | if (!items) return; |
297 | auto *item = fetch_item (key, hb_hash (key)); |
298 | if (item) |
299 | { |
300 | item->set_real (false); |
301 | population--; |
302 | } |
303 | } |
304 | |
305 | /* Has interface. */ |
306 | const V& operator [] (K k) const { return get (k); } |
307 | template <typename VV=V> |
308 | bool has (const K &key, VV **vp = nullptr) const |
309 | { |
310 | if (!items) return false; |
311 | auto *item = fetch_item (key, hb_hash (key)); |
312 | if (item) |
313 | { |
314 | if (vp) *vp = std::addressof (item->value); |
315 | return true; |
316 | } |
317 | return false; |
318 | } |
319 | item_t *fetch_item (const K &key, uint32_t hash) const |
320 | { |
321 | hash &= 0x3FFFFFFF; // We only store lower 30bit of hash |
322 | unsigned int i = hash % prime; |
323 | unsigned step = 0; |
324 | while (items[i].is_used ()) |
325 | { |
326 | if ((std::is_integral<K>::value || items[i].hash == hash) && |
327 | items[i] == key) |
328 | { |
329 | if (items[i].is_real ()) |
330 | return &items[i]; |
331 | else |
332 | return nullptr; |
333 | } |
334 | i = (i + ++step) & mask; |
335 | } |
336 | return nullptr; |
337 | } |
338 | /* Projection. */ |
339 | const V& operator () (K k) const { return get (k); } |
340 | |
341 | unsigned size () const { return mask ? mask + 1 : 0; } |
342 | |
343 | void clear () |
344 | { |
345 | if (unlikely (!successful)) return; |
346 | |
347 | for (auto &_ : hb_iter (items, size ())) |
348 | { |
349 | /* Reconstruct items. */ |
350 | _.~item_t (); |
351 | new (&_) item_t (); |
352 | } |
353 | |
354 | population = occupancy = 0; |
355 | } |
356 | |
357 | bool is_empty () const { return population == 0; } |
358 | explicit operator bool () const { return !is_empty (); } |
359 | |
360 | uint32_t hash () const |
361 | { |
362 | return |
363 | + iter_items () |
364 | | hb_reduce ([] (uint32_t h, const item_t &_) { return h ^ _.total_hash (); }, (uint32_t) 0u) |
365 | ; |
366 | } |
367 | |
368 | bool is_equal (const hb_hashmap_t &other) const |
369 | { |
370 | if (population != other.population) return false; |
371 | |
372 | for (auto pair : iter ()) |
373 | if (other.get (pair.first) != pair.second) |
374 | return false; |
375 | |
376 | return true; |
377 | } |
378 | bool operator == (const hb_hashmap_t &other) const { return is_equal (other); } |
379 | bool operator != (const hb_hashmap_t &other) const { return !is_equal (other); } |
380 | |
381 | unsigned int get_population () const { return population; } |
382 | |
383 | void update (const hb_hashmap_t &other) |
384 | { |
385 | if (unlikely (!successful)) return; |
386 | |
387 | hb_copy (other, *this); |
388 | } |
389 | |
390 | /* |
391 | * Iterator |
392 | */ |
393 | |
394 | auto iter_items () const HB_AUTO_RETURN |
395 | ( |
396 | + hb_iter (items, size ()) |
397 | | hb_filter (&item_t::is_real) |
398 | ) |
399 | auto iter_ref () const HB_AUTO_RETURN |
400 | ( |
401 | + iter_items () |
402 | | hb_map (&item_t::get_pair_ref) |
403 | ) |
404 | auto iter () const HB_AUTO_RETURN |
405 | ( |
406 | + iter_items () |
407 | | hb_map (&item_t::get_pair) |
408 | ) |
409 | auto keys_ref () const HB_AUTO_RETURN |
410 | ( |
411 | + iter_items () |
412 | | hb_map (&item_t::get_key) |
413 | ) |
414 | auto keys () const HB_AUTO_RETURN |
415 | ( |
416 | + keys_ref () |
417 | | hb_map (hb_ridentity) |
418 | ) |
419 | auto values_ref () const HB_AUTO_RETURN |
420 | ( |
421 | + iter_items () |
422 | | hb_map (&item_t::get_value) |
423 | ) |
424 | auto values () const HB_AUTO_RETURN |
425 | ( |
426 | + values_ref () |
427 | | hb_map (hb_ridentity) |
428 | ) |
429 | |
430 | /* C iterator. */ |
431 | bool next (int *idx, |
432 | K *key, |
433 | V *value) const |
434 | { |
435 | unsigned i = (unsigned) (*idx + 1); |
436 | |
437 | unsigned count = size (); |
438 | while (i < count && !items[i].is_real ()) |
439 | i++; |
440 | |
441 | if (i >= count) |
442 | { |
443 | *idx = -1; |
444 | return false; |
445 | } |
446 | |
447 | *key = items[i].key; |
448 | *value = items[i].value; |
449 | |
450 | *idx = (signed) i; |
451 | return true; |
452 | } |
453 | |
454 | /* Sink interface. */ |
455 | hb_hashmap_t& operator << (const hb_pair_t<K, V>& v) |
456 | { set (v.first, v.second); return *this; } |
457 | hb_hashmap_t& operator << (const hb_pair_t<K, V&&>& v) |
458 | { set (v.first, std::move (v.second)); return *this; } |
459 | hb_hashmap_t& operator << (const hb_pair_t<K&&, V>& v) |
460 | { set (std::move (v.first), v.second); return *this; } |
461 | hb_hashmap_t& operator << (const hb_pair_t<K&&, V&&>& v) |
462 | { set (std::move (v.first), std::move (v.second)); return *this; } |
463 | |
464 | static unsigned int prime_for (unsigned int shift) |
465 | { |
466 | /* Following comment and table copied from glib. */ |
467 | /* Each table size has an associated prime modulo (the first prime |
468 | * lower than the table size) used to find the initial bucket. Probing |
469 | * then works modulo 2^n. The prime modulo is necessary to get a |
470 | * good distribution with poor hash functions. |
471 | */ |
472 | /* Not declaring static to make all kinds of compilers happy... */ |
473 | /*static*/ const unsigned int prime_mod [32] = |
474 | { |
475 | 1, /* For 1 << 0 */ |
476 | 2, |
477 | 3, |
478 | 7, |
479 | 13, |
480 | 31, |
481 | 61, |
482 | 127, |
483 | 251, |
484 | 509, |
485 | 1021, |
486 | 2039, |
487 | 4093, |
488 | 8191, |
489 | 16381, |
490 | 32749, |
491 | 65521, /* For 1 << 16 */ |
492 | 131071, |
493 | 262139, |
494 | 524287, |
495 | 1048573, |
496 | 2097143, |
497 | 4194301, |
498 | 8388593, |
499 | 16777213, |
500 | 33554393, |
501 | 67108859, |
502 | 134217689, |
503 | 268435399, |
504 | 536870909, |
505 | 1073741789, |
506 | 2147483647 /* For 1 << 31 */ |
507 | }; |
508 | |
509 | if (unlikely (shift >= ARRAY_LENGTH (prime_mod))) |
510 | return prime_mod[ARRAY_LENGTH (prime_mod) - 1]; |
511 | |
512 | return prime_mod[shift]; |
513 | } |
514 | }; |
515 | |
516 | /* |
517 | * hb_map_t |
518 | */ |
519 | |
520 | struct hb_map_t : hb_hashmap_t<hb_codepoint_t, |
521 | hb_codepoint_t, |
522 | true> |
523 | { |
524 | using hashmap = hb_hashmap_t<hb_codepoint_t, |
525 | hb_codepoint_t, |
526 | true>; |
527 | |
528 | ~hb_map_t () = default; |
529 | hb_map_t () : hashmap () {} |
530 | hb_map_t (const hb_map_t &o) : hashmap ((hashmap &) o) {} |
531 | hb_map_t (hb_map_t &&o) : hashmap (std::move ((hashmap &) o)) {} |
532 | hb_map_t& operator= (const hb_map_t&) = default; |
533 | hb_map_t& operator= (hb_map_t&&) = default; |
534 | hb_map_t (std::initializer_list<hb_codepoint_pair_t> lst) : hashmap (lst) {} |
535 | template <typename Iterable, |
536 | hb_requires (hb_is_iterable (Iterable))> |
537 | hb_map_t (const Iterable &o) : hashmap (o) {} |
538 | }; |
539 | |
540 | |
541 | #endif /* HB_MAP_HH */ |
542 | |