| 1 | /* |
| 2 | * Copyright © 2018 Google, Inc. |
| 3 | * |
| 4 | * This is part of HarfBuzz, a text shaping library. |
| 5 | * |
| 6 | * Permission is hereby granted, without written agreement and without |
| 7 | * license or royalty fees, to use, copy, modify, and distribute this |
| 8 | * software and its documentation for any purpose, provided that the |
| 9 | * above copyright notice and the following two paragraphs appear in |
| 10 | * all copies of this software. |
| 11 | * |
| 12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 16 | * DAMAGE. |
| 17 | * |
| 18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 23 | * |
| 24 | * Google Author(s): Behdad Esfahbod |
| 25 | */ |
| 26 | |
| 27 | #ifndef HB_MAP_HH |
| 28 | #define HB_MAP_HH |
| 29 | |
| 30 | #include "hb.hh" |
| 31 | |
| 32 | #include "hb-set.hh" |
| 33 | |
| 34 | |
| 35 | /* |
| 36 | * hb_hashmap_t |
| 37 | */ |
| 38 | |
| 39 | extern HB_INTERNAL const hb_codepoint_t minus_1; |
| 40 | |
| 41 | template <typename K, typename V, |
| 42 | bool minus_one = false> |
| 43 | struct hb_hashmap_t |
| 44 | { |
| 45 | hb_hashmap_t () { init (); } |
| 46 | ~hb_hashmap_t () { fini (); } |
| 47 | |
| 48 | hb_hashmap_t (const hb_hashmap_t& o) : hb_hashmap_t () { alloc (o.population); hb_copy (o, *this); } |
| 49 | hb_hashmap_t (hb_hashmap_t&& o) : hb_hashmap_t () { hb_swap (*this, o); } |
| 50 | hb_hashmap_t& operator= (const hb_hashmap_t& o) { reset (); alloc (o.population); hb_copy (o, *this); return *this; } |
| 51 | hb_hashmap_t& operator= (hb_hashmap_t&& o) { hb_swap (*this, o); return *this; } |
| 52 | |
| 53 | hb_hashmap_t (std::initializer_list<hb_pair_t<K, V>> lst) : hb_hashmap_t () |
| 54 | { |
| 55 | for (auto&& item : lst) |
| 56 | set (item.first, item.second); |
| 57 | } |
| 58 | template <typename Iterable, |
| 59 | hb_requires (hb_is_iterable (Iterable))> |
| 60 | hb_hashmap_t (const Iterable &o) : hb_hashmap_t () |
| 61 | { |
| 62 | auto iter = hb_iter (o); |
| 63 | if (iter.is_random_access_iterator || iter.has_fast_len) |
| 64 | alloc (hb_len (iter)); |
| 65 | hb_copy (iter, *this); |
| 66 | } |
| 67 | |
| 68 | struct item_t |
| 69 | { |
| 70 | K key; |
| 71 | uint32_t is_real_ : 1; |
| 72 | uint32_t is_used_ : 1; |
| 73 | uint32_t hash : 30; |
| 74 | V value; |
| 75 | |
| 76 | item_t () : key (), |
| 77 | is_real_ (false), is_used_ (false), |
| 78 | hash (0), |
| 79 | value () {} |
| 80 | |
| 81 | // Needed for https://github.com/harfbuzz/harfbuzz/issues/4138 |
| 82 | K& get_key () { return key; } |
| 83 | V& get_value () { return value; } |
| 84 | |
| 85 | bool is_used () const { return is_used_; } |
| 86 | void set_used (bool is_used) { is_used_ = is_used; } |
| 87 | void set_real (bool is_real) { is_real_ = is_real; } |
| 88 | bool is_real () const { return is_real_; } |
| 89 | |
| 90 | template <bool v = minus_one, |
| 91 | hb_enable_if (v == false)> |
| 92 | static inline const V& default_value () { return Null(V); }; |
| 93 | template <bool v = minus_one, |
| 94 | hb_enable_if (v == true)> |
| 95 | static inline const V& default_value () |
| 96 | { |
| 97 | static_assert (hb_is_same (V, hb_codepoint_t), "" ); |
| 98 | return minus_1; |
| 99 | }; |
| 100 | |
| 101 | bool operator == (const K &o) const { return hb_deref (key) == hb_deref (o); } |
| 102 | bool operator == (const item_t &o) const { return *this == o.key; } |
| 103 | hb_pair_t<K, V> get_pair() const { return hb_pair_t<K, V> (key, value); } |
| 104 | hb_pair_t<const K &, V &> get_pair_ref() { return hb_pair_t<const K &, V &> (key, value); } |
| 105 | |
| 106 | uint32_t total_hash () const |
| 107 | { return (hash * 31) + hb_hash (value); } |
| 108 | |
| 109 | static constexpr bool is_trivial = std::is_trivially_constructible<K>::value && |
| 110 | std::is_trivially_destructible<K>::value && |
| 111 | std::is_trivially_constructible<V>::value && |
| 112 | std::is_trivially_destructible<V>::value; |
| 113 | }; |
| 114 | |
| 115 | hb_object_header_t ; |
| 116 | unsigned int successful : 1; /* Allocations successful */ |
| 117 | unsigned int population : 31; /* Not including tombstones. */ |
| 118 | unsigned int occupancy; /* Including tombstones. */ |
| 119 | unsigned int mask; |
| 120 | unsigned int prime; |
| 121 | unsigned int max_chain_length; |
| 122 | item_t *items; |
| 123 | |
| 124 | friend void swap (hb_hashmap_t& a, hb_hashmap_t& b) |
| 125 | { |
| 126 | if (unlikely (!a.successful || !b.successful)) |
| 127 | return; |
| 128 | unsigned tmp = a.population; |
| 129 | a.population = b.population; |
| 130 | b.population = tmp; |
| 131 | //hb_swap (a.population, b.population); |
| 132 | hb_swap (a.occupancy, b.occupancy); |
| 133 | hb_swap (a.mask, b.mask); |
| 134 | hb_swap (a.prime, b.prime); |
| 135 | hb_swap (a.max_chain_length, b.max_chain_length); |
| 136 | hb_swap (a.items, b.items); |
| 137 | } |
| 138 | void init () |
| 139 | { |
| 140 | hb_object_init (this); |
| 141 | |
| 142 | successful = true; |
| 143 | population = occupancy = 0; |
| 144 | mask = 0; |
| 145 | prime = 0; |
| 146 | max_chain_length = 0; |
| 147 | items = nullptr; |
| 148 | } |
| 149 | void fini () |
| 150 | { |
| 151 | hb_object_fini (this); |
| 152 | |
| 153 | if (likely (items)) |
| 154 | { |
| 155 | unsigned size = mask + 1; |
| 156 | if (!item_t::is_trivial) |
| 157 | for (unsigned i = 0; i < size; i++) |
| 158 | items[i].~item_t (); |
| 159 | hb_free (items); |
| 160 | items = nullptr; |
| 161 | } |
| 162 | population = occupancy = 0; |
| 163 | } |
| 164 | |
| 165 | void reset () |
| 166 | { |
| 167 | successful = true; |
| 168 | clear (); |
| 169 | } |
| 170 | |
| 171 | bool in_error () const { return !successful; } |
| 172 | |
| 173 | bool alloc (unsigned new_population = 0) |
| 174 | { |
| 175 | if (unlikely (!successful)) return false; |
| 176 | |
| 177 | if (new_population != 0 && (new_population + new_population / 2) < mask) return true; |
| 178 | |
| 179 | unsigned int power = hb_bit_storage (hb_max ((unsigned) population, new_population) * 2 + 8); |
| 180 | unsigned int new_size = 1u << power; |
| 181 | item_t *new_items = (item_t *) hb_malloc ((size_t) new_size * sizeof (item_t)); |
| 182 | if (unlikely (!new_items)) |
| 183 | { |
| 184 | successful = false; |
| 185 | return false; |
| 186 | } |
| 187 | if (!item_t::is_trivial) |
| 188 | for (auto &_ : hb_iter (new_items, new_size)) |
| 189 | new (&_) item_t (); |
| 190 | else |
| 191 | hb_memset (new_items, 0, (size_t) new_size * sizeof (item_t)); |
| 192 | |
| 193 | unsigned int old_size = size (); |
| 194 | item_t *old_items = items; |
| 195 | |
| 196 | /* Switch to new, empty, array. */ |
| 197 | population = occupancy = 0; |
| 198 | mask = new_size - 1; |
| 199 | prime = prime_for (power); |
| 200 | max_chain_length = power * 2; |
| 201 | items = new_items; |
| 202 | |
| 203 | /* Insert back old items. */ |
| 204 | for (unsigned int i = 0; i < old_size; i++) |
| 205 | { |
| 206 | if (old_items[i].is_real ()) |
| 207 | { |
| 208 | set_with_hash (std::move (old_items[i].key), |
| 209 | old_items[i].hash, |
| 210 | std::move (old_items[i].value)); |
| 211 | } |
| 212 | if (!item_t::is_trivial) |
| 213 | old_items[i].~item_t (); |
| 214 | } |
| 215 | |
| 216 | hb_free (old_items); |
| 217 | |
| 218 | return true; |
| 219 | } |
| 220 | |
| 221 | template <typename KK, typename VV> |
| 222 | bool set_with_hash (KK&& key, uint32_t hash, VV&& value, bool overwrite = true) |
| 223 | { |
| 224 | if (unlikely (!successful)) return false; |
| 225 | if (unlikely ((occupancy + occupancy / 2) >= mask && !alloc ())) return false; |
| 226 | |
| 227 | hash &= 0x3FFFFFFF; // We only store lower 30bit of hash |
| 228 | unsigned int tombstone = (unsigned int) -1; |
| 229 | unsigned int i = hash % prime; |
| 230 | unsigned length = 0; |
| 231 | unsigned step = 0; |
| 232 | while (items[i].is_used ()) |
| 233 | { |
| 234 | if ((std::is_integral<K>::value || items[i].hash == hash) && |
| 235 | items[i] == key) |
| 236 | { |
| 237 | if (!overwrite) |
| 238 | return false; |
| 239 | else |
| 240 | break; |
| 241 | } |
| 242 | if (!items[i].is_real () && tombstone == (unsigned) -1) |
| 243 | tombstone = i; |
| 244 | i = (i + ++step) & mask; |
| 245 | length++; |
| 246 | } |
| 247 | |
| 248 | item_t &item = items[tombstone == (unsigned) -1 ? i : tombstone]; |
| 249 | |
| 250 | if (item.is_used ()) |
| 251 | { |
| 252 | occupancy--; |
| 253 | population -= item.is_real (); |
| 254 | } |
| 255 | |
| 256 | item.key = std::forward<KK> (key); |
| 257 | item.value = std::forward<VV> (value); |
| 258 | item.hash = hash; |
| 259 | item.set_used (true); |
| 260 | item.set_real (true); |
| 261 | |
| 262 | occupancy++; |
| 263 | population++; |
| 264 | |
| 265 | if (unlikely (length > max_chain_length) && occupancy * 8 > mask) |
| 266 | alloc (mask - 8); // This ensures we jump to next larger size |
| 267 | |
| 268 | return true; |
| 269 | } |
| 270 | |
| 271 | template <typename VV> |
| 272 | bool set (const K &key, VV&& value, bool overwrite = true) { return set_with_hash (key, hb_hash (key), std::forward<VV> (value), overwrite); } |
| 273 | template <typename VV> |
| 274 | bool set (K &&key, VV&& value, bool overwrite = true) |
| 275 | { |
| 276 | uint32_t hash = hb_hash (key); |
| 277 | return set_with_hash (std::move (key), hash, std::forward<VV> (value), overwrite); |
| 278 | } |
| 279 | |
| 280 | const V& get_with_hash (const K &key, uint32_t hash) const |
| 281 | { |
| 282 | if (!items) return item_t::default_value (); |
| 283 | auto *item = fetch_item (key, hb_hash (key)); |
| 284 | if (item) |
| 285 | return item->value; |
| 286 | return item_t::default_value (); |
| 287 | } |
| 288 | const V& get (const K &key) const |
| 289 | { |
| 290 | if (!items) return item_t::default_value (); |
| 291 | return get_with_hash (key, hb_hash (key)); |
| 292 | } |
| 293 | |
| 294 | void del (const K &key) |
| 295 | { |
| 296 | if (!items) return; |
| 297 | auto *item = fetch_item (key, hb_hash (key)); |
| 298 | if (item) |
| 299 | { |
| 300 | item->set_real (false); |
| 301 | population--; |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | /* Has interface. */ |
| 306 | const V& operator [] (K k) const { return get (k); } |
| 307 | template <typename VV=V> |
| 308 | bool has (const K &key, VV **vp = nullptr) const |
| 309 | { |
| 310 | if (!items) return false; |
| 311 | auto *item = fetch_item (key, hb_hash (key)); |
| 312 | if (item) |
| 313 | { |
| 314 | if (vp) *vp = std::addressof (item->value); |
| 315 | return true; |
| 316 | } |
| 317 | return false; |
| 318 | } |
| 319 | item_t *fetch_item (const K &key, uint32_t hash) const |
| 320 | { |
| 321 | hash &= 0x3FFFFFFF; // We only store lower 30bit of hash |
| 322 | unsigned int i = hash % prime; |
| 323 | unsigned step = 0; |
| 324 | while (items[i].is_used ()) |
| 325 | { |
| 326 | if ((std::is_integral<K>::value || items[i].hash == hash) && |
| 327 | items[i] == key) |
| 328 | { |
| 329 | if (items[i].is_real ()) |
| 330 | return &items[i]; |
| 331 | else |
| 332 | return nullptr; |
| 333 | } |
| 334 | i = (i + ++step) & mask; |
| 335 | } |
| 336 | return nullptr; |
| 337 | } |
| 338 | /* Projection. */ |
| 339 | const V& operator () (K k) const { return get (k); } |
| 340 | |
| 341 | unsigned size () const { return mask ? mask + 1 : 0; } |
| 342 | |
| 343 | void clear () |
| 344 | { |
| 345 | if (unlikely (!successful)) return; |
| 346 | |
| 347 | for (auto &_ : hb_iter (items, size ())) |
| 348 | { |
| 349 | /* Reconstruct items. */ |
| 350 | _.~item_t (); |
| 351 | new (&_) item_t (); |
| 352 | } |
| 353 | |
| 354 | population = occupancy = 0; |
| 355 | } |
| 356 | |
| 357 | bool is_empty () const { return population == 0; } |
| 358 | explicit operator bool () const { return !is_empty (); } |
| 359 | |
| 360 | uint32_t hash () const |
| 361 | { |
| 362 | return |
| 363 | + iter_items () |
| 364 | | hb_reduce ([] (uint32_t h, const item_t &_) { return h ^ _.total_hash (); }, (uint32_t) 0u) |
| 365 | ; |
| 366 | } |
| 367 | |
| 368 | bool is_equal (const hb_hashmap_t &other) const |
| 369 | { |
| 370 | if (population != other.population) return false; |
| 371 | |
| 372 | for (auto pair : iter ()) |
| 373 | if (other.get (pair.first) != pair.second) |
| 374 | return false; |
| 375 | |
| 376 | return true; |
| 377 | } |
| 378 | bool operator == (const hb_hashmap_t &other) const { return is_equal (other); } |
| 379 | bool operator != (const hb_hashmap_t &other) const { return !is_equal (other); } |
| 380 | |
| 381 | unsigned int get_population () const { return population; } |
| 382 | |
| 383 | void update (const hb_hashmap_t &other) |
| 384 | { |
| 385 | if (unlikely (!successful)) return; |
| 386 | |
| 387 | hb_copy (other, *this); |
| 388 | } |
| 389 | |
| 390 | /* |
| 391 | * Iterator |
| 392 | */ |
| 393 | |
| 394 | auto iter_items () const HB_AUTO_RETURN |
| 395 | ( |
| 396 | + hb_iter (items, size ()) |
| 397 | | hb_filter (&item_t::is_real) |
| 398 | ) |
| 399 | auto iter_ref () const HB_AUTO_RETURN |
| 400 | ( |
| 401 | + iter_items () |
| 402 | | hb_map (&item_t::get_pair_ref) |
| 403 | ) |
| 404 | auto iter () const HB_AUTO_RETURN |
| 405 | ( |
| 406 | + iter_items () |
| 407 | | hb_map (&item_t::get_pair) |
| 408 | ) |
| 409 | auto keys_ref () const HB_AUTO_RETURN |
| 410 | ( |
| 411 | + iter_items () |
| 412 | | hb_map (&item_t::get_key) |
| 413 | ) |
| 414 | auto keys () const HB_AUTO_RETURN |
| 415 | ( |
| 416 | + keys_ref () |
| 417 | | hb_map (hb_ridentity) |
| 418 | ) |
| 419 | auto values_ref () const HB_AUTO_RETURN |
| 420 | ( |
| 421 | + iter_items () |
| 422 | | hb_map (&item_t::get_value) |
| 423 | ) |
| 424 | auto values () const HB_AUTO_RETURN |
| 425 | ( |
| 426 | + values_ref () |
| 427 | | hb_map (hb_ridentity) |
| 428 | ) |
| 429 | |
| 430 | /* C iterator. */ |
| 431 | bool next (int *idx, |
| 432 | K *key, |
| 433 | V *value) const |
| 434 | { |
| 435 | unsigned i = (unsigned) (*idx + 1); |
| 436 | |
| 437 | unsigned count = size (); |
| 438 | while (i < count && !items[i].is_real ()) |
| 439 | i++; |
| 440 | |
| 441 | if (i >= count) |
| 442 | { |
| 443 | *idx = -1; |
| 444 | return false; |
| 445 | } |
| 446 | |
| 447 | *key = items[i].key; |
| 448 | *value = items[i].value; |
| 449 | |
| 450 | *idx = (signed) i; |
| 451 | return true; |
| 452 | } |
| 453 | |
| 454 | /* Sink interface. */ |
| 455 | hb_hashmap_t& operator << (const hb_pair_t<K, V>& v) |
| 456 | { set (v.first, v.second); return *this; } |
| 457 | hb_hashmap_t& operator << (const hb_pair_t<K, V&&>& v) |
| 458 | { set (v.first, std::move (v.second)); return *this; } |
| 459 | hb_hashmap_t& operator << (const hb_pair_t<K&&, V>& v) |
| 460 | { set (std::move (v.first), v.second); return *this; } |
| 461 | hb_hashmap_t& operator << (const hb_pair_t<K&&, V&&>& v) |
| 462 | { set (std::move (v.first), std::move (v.second)); return *this; } |
| 463 | |
| 464 | static unsigned int prime_for (unsigned int shift) |
| 465 | { |
| 466 | /* Following comment and table copied from glib. */ |
| 467 | /* Each table size has an associated prime modulo (the first prime |
| 468 | * lower than the table size) used to find the initial bucket. Probing |
| 469 | * then works modulo 2^n. The prime modulo is necessary to get a |
| 470 | * good distribution with poor hash functions. |
| 471 | */ |
| 472 | /* Not declaring static to make all kinds of compilers happy... */ |
| 473 | /*static*/ const unsigned int prime_mod [32] = |
| 474 | { |
| 475 | 1, /* For 1 << 0 */ |
| 476 | 2, |
| 477 | 3, |
| 478 | 7, |
| 479 | 13, |
| 480 | 31, |
| 481 | 61, |
| 482 | 127, |
| 483 | 251, |
| 484 | 509, |
| 485 | 1021, |
| 486 | 2039, |
| 487 | 4093, |
| 488 | 8191, |
| 489 | 16381, |
| 490 | 32749, |
| 491 | 65521, /* For 1 << 16 */ |
| 492 | 131071, |
| 493 | 262139, |
| 494 | 524287, |
| 495 | 1048573, |
| 496 | 2097143, |
| 497 | 4194301, |
| 498 | 8388593, |
| 499 | 16777213, |
| 500 | 33554393, |
| 501 | 67108859, |
| 502 | 134217689, |
| 503 | 268435399, |
| 504 | 536870909, |
| 505 | 1073741789, |
| 506 | 2147483647 /* For 1 << 31 */ |
| 507 | }; |
| 508 | |
| 509 | if (unlikely (shift >= ARRAY_LENGTH (prime_mod))) |
| 510 | return prime_mod[ARRAY_LENGTH (prime_mod) - 1]; |
| 511 | |
| 512 | return prime_mod[shift]; |
| 513 | } |
| 514 | }; |
| 515 | |
| 516 | /* |
| 517 | * hb_map_t |
| 518 | */ |
| 519 | |
| 520 | struct hb_map_t : hb_hashmap_t<hb_codepoint_t, |
| 521 | hb_codepoint_t, |
| 522 | true> |
| 523 | { |
| 524 | using hashmap = hb_hashmap_t<hb_codepoint_t, |
| 525 | hb_codepoint_t, |
| 526 | true>; |
| 527 | |
| 528 | ~hb_map_t () = default; |
| 529 | hb_map_t () : hashmap () {} |
| 530 | hb_map_t (const hb_map_t &o) : hashmap ((hashmap &) o) {} |
| 531 | hb_map_t (hb_map_t &&o) : hashmap (std::move ((hashmap &) o)) {} |
| 532 | hb_map_t& operator= (const hb_map_t&) = default; |
| 533 | hb_map_t& operator= (hb_map_t&&) = default; |
| 534 | hb_map_t (std::initializer_list<hb_codepoint_pair_t> lst) : hashmap (lst) {} |
| 535 | template <typename Iterable, |
| 536 | hb_requires (hb_is_iterable (Iterable))> |
| 537 | hb_map_t (const Iterable &o) : hashmap (o) {} |
| 538 | }; |
| 539 | |
| 540 | |
| 541 | #endif /* HB_MAP_HH */ |
| 542 | |