| 1 | // Copyright (c) 2005, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are |
| 6 | // met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright |
| 9 | // notice, this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above |
| 11 | // copyright notice, this list of conditions and the following disclaimer |
| 12 | // in the documentation and/or other materials provided with the |
| 13 | // distribution. |
| 14 | // * Neither the name of Google Inc. nor the names of its |
| 15 | // contributors may be used to endorse or promote products derived from |
| 16 | // this software without specific prior written permission. |
| 17 | // |
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | |
| 30 | // --- |
| 31 | // |
| 32 | // A dense hashtable is a particular implementation of |
| 33 | // a hashtable: one that is meant to minimize memory allocation. |
| 34 | // It does this by using an array to store all the data. We |
| 35 | // steal a value from the key space to indicate "empty" array |
| 36 | // elements (ie indices where no item lives) and another to indicate |
| 37 | // "deleted" elements. |
| 38 | // |
| 39 | // (Note it is possible to change the value of the delete key |
| 40 | // on the fly; you can even remove it, though after that point |
| 41 | // the hashtable is insert_only until you set it again. The empty |
| 42 | // value however can't be changed.) |
| 43 | // |
| 44 | // To minimize allocation and pointer overhead, we use internal |
| 45 | // probing, in which the hashtable is a single table, and collisions |
| 46 | // are resolved by trying to insert again in another bucket. The |
| 47 | // most cache-efficient internal probing schemes are linear probing |
| 48 | // (which suffers, alas, from clumping) and quadratic probing, which |
| 49 | // is what we implement by default. |
| 50 | // |
| 51 | // Type requirements: value_type is required to be Copy Constructible |
| 52 | // and Default Constructible. It is not required to be (and commonly |
| 53 | // isn't) Assignable. |
| 54 | // |
| 55 | // You probably shouldn't use this code directly. Use dense_hash_map<> |
| 56 | // or dense_hash_set<> instead. |
| 57 | |
| 58 | // You can change the following below: |
| 59 | // HT_OCCUPANCY_PCT -- how full before we double size |
| 60 | // HT_EMPTY_PCT -- how empty before we halve size |
| 61 | // HT_MIN_BUCKETS -- default smallest bucket size |
| 62 | // |
| 63 | // You can also change enlarge_factor (which defaults to |
| 64 | // HT_OCCUPANCY_PCT), and shrink_factor (which defaults to |
| 65 | // HT_EMPTY_PCT) with set_resizing_parameters(). |
| 66 | // |
| 67 | // How to decide what values to use? |
| 68 | // shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good. |
| 69 | // HT_MIN_BUCKETS is probably unnecessary since you can specify |
| 70 | // (indirectly) the starting number of buckets at construct-time. |
| 71 | // For enlarge_factor, you can use this chart to try to trade-off |
| 72 | // expected lookup time to the space taken up. By default, this |
| 73 | // code uses quadratic probing, though you can change it to linear |
| 74 | // via JUMP_ below if you really want to. |
| 75 | // |
| 76 | // From |
| 77 | // http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html |
| 78 | // NUMBER OF PROBES / LOOKUP Successful Unsuccessful |
| 79 | // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L) |
| 80 | // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2 |
| 81 | // |
| 82 | // -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99 |
| 83 | // QUADRATIC COLLISION RES. |
| 84 | // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11 |
| 85 | // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6 |
| 86 | // LINEAR COLLISION RES. |
| 87 | // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5 |
| 88 | // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0 |
| 89 | |
| 90 | #pragma once |
| 91 | |
| 92 | #include <assert.h> |
| 93 | #include <stdio.h> // for FILE, fwrite, fread |
| 94 | #include <algorithm> // For swap(), eg |
| 95 | #include <iterator> // For iterator tags |
| 96 | #include <limits> // for numeric_limits |
| 97 | #include <memory> // For uninitialized_fill |
| 98 | #include <utility> // for pair |
| 99 | #include <stdexcept> // For length_error |
| 100 | #include <type_traits> |
| 101 | #include <sparsehash/internal/hashtable-common.h> |
| 102 | #include <sparsehash/internal/libc_allocator_with_realloc.h> |
| 103 | |
| 104 | namespace google { |
| 105 | |
| 106 | // The probing method |
| 107 | // Linear probing |
| 108 | // #define JUMP_(key, num_probes) ( 1 ) |
| 109 | // Quadratic probing |
| 110 | #define JUMP_(key, num_probes) (num_probes) |
| 111 | |
| 112 | // Hashtable class, used to implement the hashed associative containers |
| 113 | // hash_set and hash_map. |
| 114 | |
| 115 | // Value: what is stored in the table (each bucket is a Value). |
| 116 | // Key: something in a 1-to-1 correspondence to a Value, that can be used |
| 117 | // to search for a Value in the table (find() takes a Key). |
| 118 | // HashFcn: Takes a Key and returns an integer, the more unique the better. |
| 119 | // ExtractKey: given a Value, returns the unique Key associated with it. |
| 120 | // Must inherit from unary_function, or at least have a |
| 121 | // result_type enum indicating the return type of operator(). |
| 122 | // SetKey: given a Value* and a Key, modifies the value such that |
| 123 | // ExtractKey(value) == key. We guarantee this is only called |
| 124 | // with key == deleted_key or key == empty_key. |
| 125 | // EqualKey: Given two Keys, says whether they are the same (that is, |
| 126 | // if they are both associated with the same Value). |
| 127 | // Alloc: STL allocator to use to allocate memory. |
| 128 | |
| 129 | template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey, |
| 130 | class EqualKey, class Alloc> |
| 131 | class dense_hashtable; |
| 132 | |
| 133 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 134 | struct dense_hashtable_iterator; |
| 135 | |
| 136 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 137 | struct dense_hashtable_const_iterator; |
| 138 | |
| 139 | // We're just an array, but we need to skip over empty and deleted elements |
| 140 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 141 | struct dense_hashtable_iterator { |
| 142 | private: |
| 143 | using value_alloc_type = |
| 144 | typename std::allocator_traits<A>::template rebind_alloc<V>; |
| 145 | |
| 146 | public: |
| 147 | typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator; |
| 148 | typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> |
| 149 | const_iterator; |
| 150 | |
| 151 | typedef std::forward_iterator_tag iterator_category; // very little defined! |
| 152 | typedef V value_type; |
| 153 | typedef typename value_alloc_type::difference_type difference_type; |
| 154 | typedef typename value_alloc_type::size_type size_type; |
| 155 | typedef typename value_alloc_type::reference reference; |
| 156 | typedef typename value_alloc_type::pointer pointer; |
| 157 | |
| 158 | // "Real" constructor and default constructor |
| 159 | dense_hashtable_iterator( |
| 160 | const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, pointer it, |
| 161 | pointer it_end, bool advance) |
| 162 | : ht(h), pos(it), end(it_end) { |
| 163 | if (advance) advance_past_empty_and_deleted(); |
| 164 | } |
| 165 | dense_hashtable_iterator() {} |
| 166 | // The default destructor is fine; we don't define one |
| 167 | // The default operator= is fine; we don't define one |
| 168 | |
| 169 | // Happy dereferencer |
| 170 | reference operator*() const { return *pos; } |
| 171 | pointer operator->() const { return &(operator*()); } |
| 172 | |
| 173 | // Arithmetic. The only hard part is making sure that |
| 174 | // we're not on an empty or marked-deleted array element |
| 175 | void advance_past_empty_and_deleted() { |
| 176 | while (pos != end && (ht->test_empty(*this) || ht->test_deleted(*this))) |
| 177 | ++pos; |
| 178 | } |
| 179 | iterator& operator++() { |
| 180 | assert(pos != end); |
| 181 | ++pos; |
| 182 | advance_past_empty_and_deleted(); |
| 183 | return *this; |
| 184 | } |
| 185 | iterator operator++(int) { |
| 186 | iterator tmp(*this); |
| 187 | ++*this; |
| 188 | return tmp; |
| 189 | } |
| 190 | |
| 191 | // Comparison. |
| 192 | bool operator==(const iterator& it) const { return pos == it.pos; } |
| 193 | bool operator!=(const iterator& it) const { return pos != it.pos; } |
| 194 | |
| 195 | // The actual data |
| 196 | const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht; |
| 197 | pointer pos, end; |
| 198 | }; |
| 199 | |
| 200 | // Now do it all again, but with const-ness! |
| 201 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 202 | struct dense_hashtable_const_iterator { |
| 203 | private: |
| 204 | using value_alloc_type = |
| 205 | typename std::allocator_traits<A>::template rebind_alloc<V>; |
| 206 | |
| 207 | public: |
| 208 | typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator; |
| 209 | typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> |
| 210 | const_iterator; |
| 211 | |
| 212 | typedef std::forward_iterator_tag iterator_category; // very little defined! |
| 213 | typedef V value_type; |
| 214 | typedef typename value_alloc_type::difference_type difference_type; |
| 215 | typedef typename value_alloc_type::size_type size_type; |
| 216 | typedef typename value_alloc_type::const_reference reference; |
| 217 | typedef typename value_alloc_type::const_pointer pointer; |
| 218 | |
| 219 | // "Real" constructor and default constructor |
| 220 | dense_hashtable_const_iterator( |
| 221 | const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, pointer it, |
| 222 | pointer it_end, bool advance) |
| 223 | : ht(h), pos(it), end(it_end) { |
| 224 | if (advance) advance_past_empty_and_deleted(); |
| 225 | } |
| 226 | dense_hashtable_const_iterator() : ht(NULL), pos(pointer()), end(pointer()) {} |
| 227 | // This lets us convert regular iterators to const iterators |
| 228 | dense_hashtable_const_iterator(const iterator& it) |
| 229 | : ht(it.ht), pos(it.pos), end(it.end) {} |
| 230 | // The default destructor is fine; we don't define one |
| 231 | // The default operator= is fine; we don't define one |
| 232 | |
| 233 | // Happy dereferencer |
| 234 | reference operator*() const { return *pos; } |
| 235 | pointer operator->() const { return &(operator*()); } |
| 236 | |
| 237 | // Arithmetic. The only hard part is making sure that |
| 238 | // we're not on an empty or marked-deleted array element |
| 239 | void advance_past_empty_and_deleted() { |
| 240 | while (pos != end && (ht->test_empty(*this) || ht->test_deleted(*this))) |
| 241 | ++pos; |
| 242 | } |
| 243 | const_iterator& operator++() { |
| 244 | assert(pos != end); |
| 245 | ++pos; |
| 246 | advance_past_empty_and_deleted(); |
| 247 | return *this; |
| 248 | } |
| 249 | const_iterator operator++(int) { |
| 250 | const_iterator tmp(*this); |
| 251 | ++*this; |
| 252 | return tmp; |
| 253 | } |
| 254 | |
| 255 | // Comparison. |
| 256 | bool operator==(const const_iterator& it) const { return pos == it.pos; } |
| 257 | bool operator!=(const const_iterator& it) const { return pos != it.pos; } |
| 258 | |
| 259 | // The actual data |
| 260 | const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht; |
| 261 | pointer pos, end; |
| 262 | }; |
| 263 | |
| 264 | template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey, |
| 265 | class EqualKey, class Alloc> |
| 266 | class dense_hashtable { |
| 267 | private: |
| 268 | using value_alloc_type = |
| 269 | typename std::allocator_traits<Alloc>::template rebind_alloc<Value>; |
| 270 | |
| 271 | public: |
| 272 | typedef Key key_type; |
| 273 | typedef Value value_type; |
| 274 | typedef HashFcn hasher; |
| 275 | typedef EqualKey key_equal; |
| 276 | typedef Alloc allocator_type; |
| 277 | |
| 278 | typedef typename value_alloc_type::size_type size_type; |
| 279 | typedef typename value_alloc_type::difference_type difference_type; |
| 280 | typedef typename value_alloc_type::reference reference; |
| 281 | typedef typename value_alloc_type::const_reference const_reference; |
| 282 | typedef typename value_alloc_type::pointer pointer; |
| 283 | typedef typename value_alloc_type::const_pointer const_pointer; |
| 284 | typedef dense_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey, |
| 285 | EqualKey, Alloc> iterator; |
| 286 | |
| 287 | typedef dense_hashtable_const_iterator< |
| 288 | Value, Key, HashFcn, ExtractKey, SetKey, EqualKey, Alloc> const_iterator; |
| 289 | |
| 290 | // These come from tr1. For us they're the same as regular iterators. |
| 291 | typedef iterator local_iterator; |
| 292 | typedef const_iterator const_local_iterator; |
| 293 | |
| 294 | // How full we let the table get before we resize, by default. |
| 295 | // Knuth says .8 is good -- higher causes us to probe too much, |
| 296 | // though it saves memory. |
| 297 | static const int HT_OCCUPANCY_PCT; // defined at the bottom of this file |
| 298 | |
| 299 | // How empty we let the table get before we resize lower, by default. |
| 300 | // (0.0 means never resize lower.) |
| 301 | // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing |
| 302 | static const int HT_EMPTY_PCT; // defined at the bottom of this file |
| 303 | |
| 304 | // Minimum size we're willing to let hashtables be. |
| 305 | // Must be a power of two, and at least 4. |
| 306 | // Note, however, that for a given hashtable, the initial size is a |
| 307 | // function of the first constructor arg, and may be >HT_MIN_BUCKETS. |
| 308 | static const size_type HT_MIN_BUCKETS = 4; |
| 309 | |
| 310 | // By default, if you don't specify a hashtable size at |
| 311 | // construction-time, we use this size. Must be a power of two, and |
| 312 | // at least HT_MIN_BUCKETS. |
| 313 | static const size_type HT_DEFAULT_STARTING_BUCKETS = 32; |
| 314 | |
| 315 | // ITERATOR FUNCTIONS |
| 316 | iterator begin() { return iterator(this, table, table + num_buckets, true); } |
| 317 | iterator end() { |
| 318 | return iterator(this, table + num_buckets, table + num_buckets, true); |
| 319 | } |
| 320 | const_iterator begin() const { |
| 321 | return const_iterator(this, table, table + num_buckets, true); |
| 322 | } |
| 323 | const_iterator end() const { |
| 324 | return const_iterator(this, table + num_buckets, table + num_buckets, true); |
| 325 | } |
| 326 | |
| 327 | // These come from tr1 unordered_map. They iterate over 'bucket' n. |
| 328 | // We'll just consider bucket n to be the n-th element of the table. |
| 329 | local_iterator begin(size_type i) { |
| 330 | return local_iterator(this, table + i, table + i + 1, false); |
| 331 | } |
| 332 | local_iterator end(size_type i) { |
| 333 | local_iterator it = begin(i); |
| 334 | if (!test_empty(i) && !test_deleted(i)) ++it; |
| 335 | return it; |
| 336 | } |
| 337 | const_local_iterator begin(size_type i) const { |
| 338 | return const_local_iterator(this, table + i, table + i + 1, false); |
| 339 | } |
| 340 | const_local_iterator end(size_type i) const { |
| 341 | const_local_iterator it = begin(i); |
| 342 | if (!test_empty(i) && !test_deleted(i)) ++it; |
| 343 | return it; |
| 344 | } |
| 345 | |
| 346 | // ACCESSOR FUNCTIONS for the things we templatize on, basically |
| 347 | hasher hash_funct() const { return settings; } |
| 348 | key_equal key_eq() const { return key_info; } |
| 349 | allocator_type get_allocator() const { return allocator_type(val_info); } |
| 350 | |
| 351 | // Accessor function for statistics gathering. |
| 352 | int num_table_copies() const { return settings.num_ht_copies(); } |
| 353 | |
| 354 | private: |
| 355 | // Annoyingly, we can't copy values around, because they might have |
| 356 | // const components (they're probably pair<const X, Y>). We use |
| 357 | // explicit destructor invocation and placement new to get around |
| 358 | // this. Arg. |
| 359 | template <typename... Args> |
| 360 | void set_value(pointer dst, Args&&... args) { |
| 361 | dst->~value_type(); // delete the old value, if any |
| 362 | new (dst) value_type(std::forward<Args>(args)...); |
| 363 | } |
| 364 | |
| 365 | void destroy_buckets(size_type first, size_type last) { |
| 366 | for (; first != last; ++first) table[first].~value_type(); |
| 367 | } |
| 368 | |
| 369 | // DELETE HELPER FUNCTIONS |
| 370 | // This lets the user describe a key that will indicate deleted |
| 371 | // table entries. This key should be an "impossible" entry -- |
| 372 | // if you try to insert it for real, you won't be able to retrieve it! |
| 373 | // (NB: while you pass in an entire value, only the key part is looked |
| 374 | // at. This is just because I don't know how to assign just a key.) |
| 375 | private: |
| 376 | void squash_deleted() { // gets rid of any deleted entries we have |
| 377 | if (num_deleted) { // get rid of deleted before writing |
| 378 | size_type resize_to = settings.min_buckets( |
| 379 | num_elements, bucket_count()); |
| 380 | dense_hashtable tmp(std::move(*this), resize_to); // copying will get rid of deleted |
| 381 | swap(tmp); // now we are tmp |
| 382 | } |
| 383 | assert(num_deleted == 0); |
| 384 | } |
| 385 | |
| 386 | // Test if the given key is the deleted indicator. Requires |
| 387 | // num_deleted > 0, for correctness of read(), and because that |
| 388 | // guarantees that key_info.delkey is valid. |
| 389 | bool test_deleted_key(const key_type& key) const { |
| 390 | assert(num_deleted > 0); |
| 391 | return equals(key_info.delkey, key); |
| 392 | } |
| 393 | |
| 394 | public: |
| 395 | void set_deleted_key(const key_type& key) { |
| 396 | // the empty indicator (if specified) and the deleted indicator |
| 397 | // must be different |
| 398 | assert( |
| 399 | (!settings.use_empty() || !equals(key, key_info.empty_key)) && |
| 400 | "Passed the empty-key to set_deleted_key" ); |
| 401 | // It's only safe to change what "deleted" means if we purge deleted guys |
| 402 | squash_deleted(); |
| 403 | settings.set_use_deleted(true); |
| 404 | key_info.delkey = key; |
| 405 | } |
| 406 | void clear_deleted_key() { |
| 407 | squash_deleted(); |
| 408 | settings.set_use_deleted(false); |
| 409 | } |
| 410 | key_type deleted_key() const { |
| 411 | assert(settings.use_deleted() && |
| 412 | "Must set deleted key before calling deleted_key" ); |
| 413 | return key_info.delkey; |
| 414 | } |
| 415 | |
| 416 | // These are public so the iterators can use them |
| 417 | // True if the item at position bucknum is "deleted" marker |
| 418 | bool test_deleted(size_type bucknum) const { |
| 419 | // Invariant: !use_deleted() implies num_deleted is 0. |
| 420 | assert(settings.use_deleted() || num_deleted == 0); |
| 421 | return num_deleted > 0 && test_deleted_key(get_key(table[bucknum])); |
| 422 | } |
| 423 | bool test_deleted(const iterator& it) const { |
| 424 | // Invariant: !use_deleted() implies num_deleted is 0. |
| 425 | assert(settings.use_deleted() || num_deleted == 0); |
| 426 | return num_deleted > 0 && test_deleted_key(get_key(*it)); |
| 427 | } |
| 428 | bool test_deleted(const const_iterator& it) const { |
| 429 | // Invariant: !use_deleted() implies num_deleted is 0. |
| 430 | assert(settings.use_deleted() || num_deleted == 0); |
| 431 | return num_deleted > 0 && test_deleted_key(get_key(*it)); |
| 432 | } |
| 433 | |
| 434 | private: |
| 435 | void check_use_deleted(const char* caller) { |
| 436 | (void)caller; // could log it if the assert failed |
| 437 | assert(settings.use_deleted()); |
| 438 | } |
| 439 | |
| 440 | // Set it so test_deleted is true. true if object didn't used to be deleted. |
| 441 | bool set_deleted(iterator& it) { |
| 442 | check_use_deleted("set_deleted()" ); |
| 443 | bool retval = !test_deleted(it); |
| 444 | // &* converts from iterator to value-type. |
| 445 | set_key(&(*it), key_info.delkey); |
| 446 | return retval; |
| 447 | } |
| 448 | // Set it so test_deleted is false. true if object used to be deleted. |
| 449 | bool clear_deleted(iterator& it) { |
| 450 | check_use_deleted("clear_deleted()" ); |
| 451 | // Happens automatically when we assign something else in its place. |
| 452 | return test_deleted(it); |
| 453 | } |
| 454 | |
| 455 | // We also allow to set/clear the deleted bit on a const iterator. |
| 456 | // We allow a const_iterator for the same reason you can delete a |
| 457 | // const pointer: it's convenient, and semantically you can't use |
| 458 | // 'it' after it's been deleted anyway, so its const-ness doesn't |
| 459 | // really matter. |
| 460 | bool set_deleted(const_iterator& it) { |
| 461 | check_use_deleted("set_deleted()" ); |
| 462 | bool retval = !test_deleted(it); |
| 463 | set_key(const_cast<pointer>(&(*it)), key_info.delkey); |
| 464 | return retval; |
| 465 | } |
| 466 | // Set it so test_deleted is false. true if object used to be deleted. |
| 467 | bool clear_deleted(const_iterator& it) { |
| 468 | check_use_deleted("clear_deleted()" ); |
| 469 | return test_deleted(it); |
| 470 | } |
| 471 | |
| 472 | // EMPTY HELPER FUNCTIONS |
| 473 | // This lets the user describe a key that will indicate empty (unused) |
| 474 | // table entries. This key should be an "impossible" entry -- |
| 475 | // if you try to insert it for real, you won't be able to retrieve it! |
| 476 | // (NB: while you pass in an entire value, only the key part is looked |
| 477 | // at. This is just because I don't know how to assign just a key.) |
| 478 | public: |
| 479 | // These are public so the iterators can use them |
| 480 | // True if the item at position bucknum is "empty" marker |
| 481 | bool test_empty(size_type bucknum) const { |
| 482 | assert(settings.use_empty()); // we always need to know what's empty! |
| 483 | return equals(key_info.empty_key, get_key(table[bucknum])); |
| 484 | } |
| 485 | bool test_empty(const iterator& it) const { |
| 486 | assert(settings.use_empty()); // we always need to know what's empty! |
| 487 | return equals(key_info.empty_key, get_key(*it)); |
| 488 | } |
| 489 | bool test_empty(const const_iterator& it) const { |
| 490 | assert(settings.use_empty()); // we always need to know what's empty! |
| 491 | return equals(key_info.empty_key, get_key(*it)); |
| 492 | } |
| 493 | |
| 494 | private: |
| 495 | void fill_range_with_empty(pointer table_start, size_type count) { |
| 496 | for (size_type i = 0; i < count; ++i) |
| 497 | { |
| 498 | construct_key(&table_start[i], key_info.empty_key); |
| 499 | } |
| 500 | } |
| 501 | |
| 502 | public: |
| 503 | void set_empty_key(const key_type& key) { |
| 504 | // Once you set the empty key, you can't change it |
| 505 | assert(!settings.use_empty() && "Calling set_empty_key multiple times" ); |
| 506 | // The deleted indicator (if specified) and the empty indicator |
| 507 | // must be different. |
| 508 | assert( |
| 509 | (!settings.use_deleted() || !equals(key, key_info.delkey)) && |
| 510 | "Setting the empty key the same as the deleted key" ); |
| 511 | settings.set_use_empty(true); |
| 512 | key_info.empty_key = key; |
| 513 | |
| 514 | assert(!table); // must set before first use |
| 515 | // num_buckets was set in constructor even though table was NULL |
| 516 | table = val_info.allocate(num_buckets); |
| 517 | assert(table); |
| 518 | fill_range_with_empty(table, num_buckets); |
| 519 | } |
| 520 | key_type empty_key() const { |
| 521 | assert(settings.use_empty()); |
| 522 | return key_info.empty_key; |
| 523 | } |
| 524 | |
| 525 | // FUNCTIONS CONCERNING SIZE |
| 526 | public: |
| 527 | size_type size() const { return num_elements - num_deleted; } |
| 528 | size_type max_size() const { return val_info.max_size(); } |
| 529 | bool empty() const { return size() == 0; } |
| 530 | size_type bucket_count() const { return num_buckets; } |
| 531 | size_type max_bucket_count() const { return max_size(); } |
| 532 | size_type nonempty_bucket_count() const { return num_elements; } |
| 533 | // These are tr1 methods. Their idea of 'bucket' doesn't map well to |
| 534 | // what we do. We just say every bucket has 0 or 1 items in it. |
| 535 | size_type bucket_size(size_type i) const { |
| 536 | return begin(i) == end(i) ? 0 : 1; |
| 537 | } |
| 538 | |
| 539 | private: |
| 540 | // Because of the above, size_type(-1) is never legal; use it for errors |
| 541 | static const size_type ILLEGAL_BUCKET = size_type(-1); |
| 542 | |
| 543 | // Used after a string of deletes. Returns true if we actually shrunk. |
| 544 | // TODO(csilvers): take a delta so we can take into account inserts |
| 545 | // done after shrinking. Maybe make part of the Settings class? |
| 546 | bool maybe_shrink() { |
| 547 | assert(num_elements >= num_deleted); |
| 548 | assert((bucket_count() & (bucket_count() - 1)) == 0); // is a power of two |
| 549 | assert(bucket_count() >= HT_MIN_BUCKETS); |
| 550 | bool retval = false; |
| 551 | |
| 552 | // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS, |
| 553 | // we'll never shrink until you get relatively big, and we'll never |
| 554 | // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something |
| 555 | // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will |
| 556 | // shrink us down to HT_MIN_BUCKETS buckets, which is too small. |
| 557 | const size_type num_remain = num_elements - num_deleted; |
| 558 | const size_type shrink_threshold = settings.shrink_threshold(); |
| 559 | if (shrink_threshold > 0 && num_remain < shrink_threshold && |
| 560 | bucket_count() > HT_DEFAULT_STARTING_BUCKETS) { |
| 561 | const float shrink_factor = settings.shrink_factor(); |
| 562 | size_type sz = bucket_count() / 2; // find how much we should shrink |
| 563 | while (sz > HT_DEFAULT_STARTING_BUCKETS && |
| 564 | num_remain < sz * shrink_factor) { |
| 565 | sz /= 2; // stay a power of 2 |
| 566 | } |
| 567 | dense_hashtable tmp(std::move(*this), sz); // Do the actual resizing |
| 568 | swap(tmp); // now we are tmp |
| 569 | retval = true; |
| 570 | } |
| 571 | settings.set_consider_shrink(false); // because we just considered it |
| 572 | return retval; |
| 573 | } |
| 574 | |
| 575 | // We'll let you resize a hashtable -- though this makes us copy all! |
| 576 | // When you resize, you say, "make it big enough for this many more elements" |
| 577 | // Returns true if we actually resized, false if size was already ok. |
| 578 | bool resize_delta(size_type delta) { |
| 579 | bool did_resize = false; |
| 580 | if (settings.consider_shrink()) { // see if lots of deletes happened |
| 581 | if (maybe_shrink()) did_resize = true; |
| 582 | } |
| 583 | if (num_elements >= (std::numeric_limits<size_type>::max)() - delta) { |
| 584 | throw std::length_error("resize overflow" ); |
| 585 | } |
| 586 | if (bucket_count() >= HT_MIN_BUCKETS && |
| 587 | (num_elements + delta) <= settings.enlarge_threshold()) |
| 588 | return did_resize; // we're ok as we are |
| 589 | |
| 590 | // Sometimes, we need to resize just to get rid of all the |
| 591 | // "deleted" buckets that are clogging up the hashtable. So when |
| 592 | // deciding whether to resize, count the deleted buckets (which |
| 593 | // are currently taking up room). But later, when we decide what |
| 594 | // size to resize to, *don't* count deleted buckets, since they |
| 595 | // get discarded during the resize. |
| 596 | size_type needed_size = settings.min_buckets(num_elements + delta, 0); |
| 597 | if (needed_size <= bucket_count()) // we have enough buckets |
| 598 | return did_resize; |
| 599 | |
| 600 | size_type resize_to = settings.min_buckets( |
| 601 | num_elements - num_deleted + delta, bucket_count()); |
| 602 | |
| 603 | // When num_deleted is large, we may still grow but we do not want to |
| 604 | // over expand. So we reduce needed_size by a portion of num_deleted |
| 605 | // (the exact portion does not matter). This is especially helpful |
| 606 | // when min_load_factor is zero (no shrink at all) to avoid doubling |
| 607 | // the bucket count to infinity. See also test ResizeWithoutShrink. |
| 608 | needed_size = settings.min_buckets(num_elements - num_deleted / 4 + delta, 0); |
| 609 | |
| 610 | if (resize_to < needed_size && // may double resize_to |
| 611 | resize_to < (std::numeric_limits<size_type>::max)() / 2) { |
| 612 | // This situation means that we have enough deleted elements, |
| 613 | // that once we purge them, we won't actually have needed to |
| 614 | // grow. But we may want to grow anyway: if we just purge one |
| 615 | // element, say, we'll have to grow anyway next time we |
| 616 | // insert. Might as well grow now, since we're already going |
| 617 | // through the trouble of copying (in order to purge the |
| 618 | // deleted elements). |
| 619 | const size_type target = |
| 620 | static_cast<size_type>(settings.shrink_size(resize_to * 2)); |
| 621 | if (num_elements - num_deleted + delta >= target) { |
| 622 | // Good, we won't be below the shrink threshhold even if we double. |
| 623 | resize_to *= 2; |
| 624 | } |
| 625 | } |
| 626 | dense_hashtable tmp(std::move(*this), resize_to); |
| 627 | swap(tmp); // now we are tmp |
| 628 | return true; |
| 629 | } |
| 630 | |
| 631 | // We require table be not-NULL and empty before calling this. |
| 632 | void resize_table(size_type /*old_size*/, size_type new_size, |
| 633 | std::true_type) { |
| 634 | table = val_info.realloc_or_die(table, new_size); |
| 635 | } |
| 636 | |
| 637 | void resize_table(size_type old_size, size_type new_size, std::false_type) { |
| 638 | val_info.deallocate(table, old_size); |
| 639 | table = val_info.allocate(new_size); |
| 640 | } |
| 641 | |
| 642 | // Used to actually do the rehashing when we grow/shrink a hashtable |
| 643 | template <typename Hashtable> |
| 644 | void copy_or_move_from(Hashtable&& ht, size_type min_buckets_wanted) { |
| 645 | clear_to_size(settings.min_buckets(ht.size(), min_buckets_wanted)); |
| 646 | |
| 647 | // We use a normal iterator to get non-deleted bcks from ht |
| 648 | // We could use insert() here, but since we know there are |
| 649 | // no duplicates and no deleted items, we can be more efficient |
| 650 | assert((bucket_count() & (bucket_count() - 1)) == 0); // a power of two |
| 651 | for (auto&& value : ht) { |
| 652 | size_type num_probes = 0; // how many times we've probed |
| 653 | size_type bucknum; |
| 654 | const size_type bucket_count_minus_one = bucket_count() - 1; |
| 655 | for (bucknum = hash(get_key(value)) & bucket_count_minus_one; |
| 656 | !test_empty(bucknum); // not empty |
| 657 | bucknum = |
| 658 | (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) { |
| 659 | ++num_probes; |
| 660 | assert(num_probes < bucket_count() && |
| 661 | "Hashtable is full: an error in key_equal<> or hash<>" ); |
| 662 | } |
| 663 | |
| 664 | using will_move = std::is_rvalue_reference<Hashtable&&>; |
| 665 | using value_t = typename std::conditional<will_move::value, value_type&&, const_reference>::type; |
| 666 | |
| 667 | set_value(&table[bucknum], std::forward<value_t>(value)); |
| 668 | num_elements++; |
| 669 | } |
| 670 | settings.inc_num_ht_copies(); |
| 671 | } |
| 672 | |
| 673 | // Required by the spec for hashed associative container |
| 674 | public: |
| 675 | // Though the docs say this should be num_buckets, I think it's much |
| 676 | // more useful as num_elements. As a special feature, calling with |
| 677 | // req_elements==0 will cause us to shrink if we can, saving space. |
| 678 | void resize(size_type req_elements) { // resize to this or larger |
| 679 | if (settings.consider_shrink() || req_elements == 0) maybe_shrink(); |
| 680 | if (req_elements > num_elements) resize_delta(req_elements - num_elements); |
| 681 | } |
| 682 | |
| 683 | // Get and change the value of shrink_factor and enlarge_factor. The |
| 684 | // description at the beginning of this file explains how to choose |
| 685 | // the values. Setting the shrink parameter to 0.0 ensures that the |
| 686 | // table never shrinks. |
| 687 | void get_resizing_parameters(float* shrink, float* grow) const { |
| 688 | *shrink = settings.shrink_factor(); |
| 689 | *grow = settings.enlarge_factor(); |
| 690 | } |
| 691 | void set_resizing_parameters(float shrink, float grow) { |
| 692 | settings.set_resizing_parameters(shrink, grow); |
| 693 | settings.reset_thresholds(bucket_count()); |
| 694 | } |
| 695 | |
| 696 | // CONSTRUCTORS -- as required by the specs, we take a size, |
| 697 | // but also let you specify a hashfunction, key comparator, |
| 698 | // and key extractor. We also define a copy constructor and =. |
| 699 | // DESTRUCTOR -- needs to free the table |
| 700 | explicit dense_hashtable(size_type expected_max_items_in_table = 0, |
| 701 | const HashFcn& hf = HashFcn(), |
| 702 | const EqualKey& eql = EqualKey(), |
| 703 | const ExtractKey& ext = ExtractKey(), |
| 704 | const SetKey& set = SetKey(), |
| 705 | const Alloc& alloc = Alloc()) |
| 706 | : settings(hf), |
| 707 | key_info(ext, set, eql), |
| 708 | num_deleted(0), |
| 709 | num_elements(0), |
| 710 | num_buckets(expected_max_items_in_table == 0 |
| 711 | ? HT_DEFAULT_STARTING_BUCKETS |
| 712 | : settings.min_buckets(expected_max_items_in_table, 0)), |
| 713 | val_info(alloc_impl<value_alloc_type>(alloc)), |
| 714 | table(NULL) { |
| 715 | // table is NULL until emptyval is set. However, we set num_buckets |
| 716 | // here so we know how much space to allocate once emptyval is set |
| 717 | settings.reset_thresholds(bucket_count()); |
| 718 | } |
| 719 | |
| 720 | // As a convenience for resize(), we allow an optional second argument |
| 721 | // which lets you make this new hashtable a different size than ht |
| 722 | dense_hashtable(const dense_hashtable& ht, |
| 723 | size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) |
| 724 | : settings(ht.settings), |
| 725 | key_info(ht.key_info), |
| 726 | num_deleted(0), |
| 727 | num_elements(0), |
| 728 | num_buckets(0), |
| 729 | val_info(ht.val_info), |
| 730 | table(NULL) { |
| 731 | if (!ht.settings.use_empty()) { |
| 732 | // If use_empty isn't set, copy_from will crash, so we do our own copying. |
| 733 | assert(ht.empty()); |
| 734 | num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted); |
| 735 | settings.reset_thresholds(bucket_count()); |
| 736 | return; |
| 737 | } |
| 738 | settings.reset_thresholds(bucket_count()); |
| 739 | copy_or_move_from(ht, min_buckets_wanted); // copy_or_move_from() ignores deleted entries |
| 740 | } |
| 741 | |
| 742 | dense_hashtable(dense_hashtable&& ht) |
| 743 | : dense_hashtable() { |
| 744 | swap(ht); |
| 745 | } |
| 746 | |
| 747 | dense_hashtable(dense_hashtable&& ht, |
| 748 | size_type min_buckets_wanted) |
| 749 | : settings(ht.settings), |
| 750 | key_info(ht.key_info), |
| 751 | num_deleted(0), |
| 752 | num_elements(0), |
| 753 | num_buckets(0), |
| 754 | val_info(std::move(ht.val_info)), |
| 755 | table(NULL) { |
| 756 | if (!ht.settings.use_empty()) { |
| 757 | // If use_empty isn't set, copy_or_move_from will crash, so we do our own copying. |
| 758 | assert(ht.empty()); |
| 759 | num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted); |
| 760 | settings.reset_thresholds(bucket_count()); |
| 761 | return; |
| 762 | } |
| 763 | settings.reset_thresholds(bucket_count()); |
| 764 | copy_or_move_from(std::move(ht), min_buckets_wanted); // copy_or_move_from() ignores deleted entries |
| 765 | } |
| 766 | |
| 767 | dense_hashtable& operator=(const dense_hashtable& ht) { |
| 768 | if (&ht == this) return *this; // don't copy onto ourselves |
| 769 | if (!ht.settings.use_empty()) { |
| 770 | assert(ht.empty()); |
| 771 | dense_hashtable empty_table(ht); // empty table with ht's thresholds |
| 772 | this->swap(empty_table); |
| 773 | return *this; |
| 774 | } |
| 775 | settings = ht.settings; |
| 776 | key_info = ht.key_info; |
| 777 | // copy_or_move_from() calls clear and sets num_deleted to 0 too |
| 778 | copy_or_move_from(ht, HT_MIN_BUCKETS); |
| 779 | // we purposefully don't copy the allocator, which may not be copyable |
| 780 | return *this; |
| 781 | } |
| 782 | |
| 783 | dense_hashtable& operator=(dense_hashtable&& ht) { |
| 784 | assert(&ht != this); // this should not happen |
| 785 | swap(ht); |
| 786 | return *this; |
| 787 | } |
| 788 | |
| 789 | ~dense_hashtable() { |
| 790 | if (table) { |
| 791 | destroy_buckets(0, num_buckets); |
| 792 | val_info.deallocate(table, num_buckets); |
| 793 | } |
| 794 | } |
| 795 | |
| 796 | // Many STL algorithms use swap instead of copy constructors |
| 797 | void swap(dense_hashtable& ht) { |
| 798 | std::swap(settings, ht.settings); |
| 799 | std::swap(key_info, ht.key_info); |
| 800 | std::swap(num_deleted, ht.num_deleted); |
| 801 | std::swap(num_elements, ht.num_elements); |
| 802 | std::swap(num_buckets, ht.num_buckets); |
| 803 | std::swap(table, ht.table); |
| 804 | settings.reset_thresholds(bucket_count()); // also resets consider_shrink |
| 805 | ht.settings.reset_thresholds(ht.bucket_count()); |
| 806 | // we purposefully don't swap the allocator, which may not be swap-able |
| 807 | } |
| 808 | |
| 809 | private: |
| 810 | void clear_to_size(size_type new_num_buckets) { |
| 811 | if (!table) { |
| 812 | table = val_info.allocate(new_num_buckets); |
| 813 | } else { |
| 814 | destroy_buckets(0, num_buckets); |
| 815 | if (new_num_buckets != num_buckets) { // resize, if necessary |
| 816 | typedef std::integral_constant< |
| 817 | bool, std::is_same<value_alloc_type, |
| 818 | libc_allocator_with_realloc<value_type>>::value> |
| 819 | realloc_ok; |
| 820 | resize_table(num_buckets, new_num_buckets, realloc_ok()); |
| 821 | } |
| 822 | } |
| 823 | assert(table); |
| 824 | fill_range_with_empty(table, new_num_buckets); |
| 825 | num_elements = 0; |
| 826 | num_deleted = 0; |
| 827 | num_buckets = new_num_buckets; // our new size |
| 828 | settings.reset_thresholds(bucket_count()); |
| 829 | } |
| 830 | |
| 831 | public: |
| 832 | // It's always nice to be able to clear a table without deallocating it |
| 833 | void clear() { |
| 834 | // If the table is already empty, and the number of buckets is |
| 835 | // already as we desire, there's nothing to do. |
| 836 | const size_type new_num_buckets = settings.min_buckets(0, 0); |
| 837 | if (num_elements == 0 && new_num_buckets == num_buckets) { |
| 838 | return; |
| 839 | } |
| 840 | clear_to_size(new_num_buckets); |
| 841 | } |
| 842 | |
| 843 | // Clear the table without resizing it. |
| 844 | // Mimicks the stl_hashtable's behaviour when clear()-ing in that it |
| 845 | // does not modify the bucket count |
| 846 | void clear_no_resize() { |
| 847 | if (num_elements > 0) { |
| 848 | assert(table); |
| 849 | destroy_buckets(0, num_buckets); |
| 850 | fill_range_with_empty(table, num_buckets); |
| 851 | } |
| 852 | // don't consider to shrink before another erase() |
| 853 | settings.reset_thresholds(bucket_count()); |
| 854 | num_elements = 0; |
| 855 | num_deleted = 0; |
| 856 | } |
| 857 | |
| 858 | // LOOKUP ROUTINES |
| 859 | private: |
| 860 | // Returns a pair of positions: 1st where the object is, 2nd where |
| 861 | // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET |
| 862 | // if object is not found; 2nd is ILLEGAL_BUCKET if it is. |
| 863 | // Note: because of deletions where-to-insert is not trivial: it's the |
| 864 | // first deleted bucket we see, as long as we don't find the key later |
| 865 | template <typename K> |
| 866 | std::pair<size_type, size_type> find_position(const K& key) const { |
| 867 | size_type num_probes = 0; // how many times we've probed |
| 868 | const size_type bucket_count_minus_one = bucket_count() - 1; |
| 869 | size_type bucknum = hash(key) & bucket_count_minus_one; |
| 870 | size_type insert_pos = ILLEGAL_BUCKET; // where we would insert |
| 871 | while (1) { // probe until something happens |
| 872 | if (test_empty(bucknum)) { // bucket is empty |
| 873 | if (insert_pos == ILLEGAL_BUCKET) // found no prior place to insert |
| 874 | return std::pair<size_type, size_type>(ILLEGAL_BUCKET, bucknum); |
| 875 | else |
| 876 | return std::pair<size_type, size_type>(ILLEGAL_BUCKET, insert_pos); |
| 877 | |
| 878 | } else if (test_deleted(bucknum)) { // keep searching, but mark to insert |
| 879 | if (insert_pos == ILLEGAL_BUCKET) insert_pos = bucknum; |
| 880 | |
| 881 | } else if (equals(key, get_key(table[bucknum]))) { |
| 882 | return std::pair<size_type, size_type>(bucknum, ILLEGAL_BUCKET); |
| 883 | } |
| 884 | ++num_probes; // we're doing another probe |
| 885 | bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; |
| 886 | assert(num_probes < bucket_count() && |
| 887 | "Hashtable is full: an error in key_equal<> or hash<>" ); |
| 888 | } |
| 889 | } |
| 890 | |
| 891 | public: |
| 892 | template <typename K> |
| 893 | iterator find(const K& key) { |
| 894 | if (size() == 0) return end(); |
| 895 | std::pair<size_type, size_type> pos = find_position(key); |
| 896 | if (pos.first == ILLEGAL_BUCKET) // alas, not there |
| 897 | return end(); |
| 898 | else |
| 899 | return iterator(this, table + pos.first, table + num_buckets, false); |
| 900 | } |
| 901 | |
| 902 | template <typename K> |
| 903 | const_iterator find(const K& key) const { |
| 904 | if (size() == 0) return end(); |
| 905 | std::pair<size_type, size_type> pos = find_position(key); |
| 906 | if (pos.first == ILLEGAL_BUCKET) // alas, not there |
| 907 | return end(); |
| 908 | else |
| 909 | return const_iterator(this, table + pos.first, table + num_buckets, |
| 910 | false); |
| 911 | } |
| 912 | |
| 913 | // This is a tr1 method: the bucket a given key is in, or what bucket |
| 914 | // it would be put in, if it were to be inserted. Shrug. |
| 915 | size_type bucket(const key_type& key) const { |
| 916 | std::pair<size_type, size_type> pos = find_position(key); |
| 917 | return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first; |
| 918 | } |
| 919 | |
| 920 | // Counts how many elements have key key. For maps, it's either 0 or 1. |
| 921 | template <typename K> |
| 922 | size_type count(const K& key) const { |
| 923 | std::pair<size_type, size_type> pos = find_position(key); |
| 924 | return pos.first == ILLEGAL_BUCKET ? 0 : 1; |
| 925 | } |
| 926 | |
| 927 | // Likewise, equal_range doesn't really make sense for us. Oh well. |
| 928 | template <typename K> |
| 929 | std::pair<iterator, iterator> equal_range(const K& key) { |
| 930 | iterator pos = find(key); // either an iterator or end |
| 931 | if (pos == end()) { |
| 932 | return std::pair<iterator, iterator>(pos, pos); |
| 933 | } else { |
| 934 | const iterator startpos = pos++; |
| 935 | return std::pair<iterator, iterator>(startpos, pos); |
| 936 | } |
| 937 | } |
| 938 | template <typename K> |
| 939 | std::pair<const_iterator, const_iterator> equal_range( |
| 940 | const K& key) const { |
| 941 | const_iterator pos = find(key); // either an iterator or end |
| 942 | if (pos == end()) { |
| 943 | return std::pair<const_iterator, const_iterator>(pos, pos); |
| 944 | } else { |
| 945 | const const_iterator startpos = pos++; |
| 946 | return std::pair<const_iterator, const_iterator>(startpos, pos); |
| 947 | } |
| 948 | } |
| 949 | |
| 950 | // INSERTION ROUTINES |
| 951 | private: |
| 952 | // Private method used by insert_noresize and find_or_insert. |
| 953 | template <typename... Args> |
| 954 | iterator insert_at(size_type pos, Args&&... args) { |
| 955 | if (size() >= max_size()) { |
| 956 | throw std::length_error("insert overflow" ); |
| 957 | } |
| 958 | if (test_deleted(pos)) { // just replace if it's been del. |
| 959 | // shrug: shouldn't need to be const. |
| 960 | const_iterator delpos(this, table + pos, table + num_buckets, false); |
| 961 | clear_deleted(delpos); |
| 962 | assert(num_deleted > 0); |
| 963 | --num_deleted; // used to be, now it isn't |
| 964 | } else { |
| 965 | ++num_elements; // replacing an empty bucket |
| 966 | } |
| 967 | set_value(&table[pos], std::forward<Args>(args)...); |
| 968 | return iterator(this, table + pos, table + num_buckets, false); |
| 969 | } |
| 970 | |
| 971 | // If you know *this is big enough to hold obj, use this routine |
| 972 | template <typename K, typename... Args> |
| 973 | std::pair<iterator, bool> insert_noresize(K&& key, Args&&... args) { |
| 974 | // First, double-check we're not inserting delkey or emptyval |
| 975 | assert(settings.use_empty() && "Inserting without empty key" ); |
| 976 | assert(!equals(std::forward<K>(key), key_info.empty_key) && "Inserting the empty key" ); |
| 977 | assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && "Inserting the deleted key" ); |
| 978 | |
| 979 | const std::pair<size_type, size_type> pos = find_position(key); |
| 980 | if (pos.first != ILLEGAL_BUCKET) { // object was already there |
| 981 | return std::pair<iterator, bool>( |
| 982 | iterator(this, table + pos.first, table + num_buckets, false), |
| 983 | false); // false: we didn't insert |
| 984 | } else { // pos.second says where to put it |
| 985 | return std::pair<iterator, bool>(insert_at(pos.second, std::forward<Args>(args)...), true); |
| 986 | } |
| 987 | } |
| 988 | |
| 989 | // Specializations of insert(it, it) depending on the power of the iterator: |
| 990 | // (1) Iterator supports operator-, resize before inserting |
| 991 | template <class ForwardIterator> |
| 992 | void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) { |
| 993 | size_t dist = std::distance(f, l); |
| 994 | if (dist >= (std::numeric_limits<size_type>::max)()) { |
| 995 | throw std::length_error("insert-range overflow" ); |
| 996 | } |
| 997 | resize_delta(static_cast<size_type>(dist)); |
| 998 | for (; dist > 0; --dist, ++f) { |
| 999 | insert_noresize(get_key(*f), *f); |
| 1000 | } |
| 1001 | } |
| 1002 | |
| 1003 | // (2) Arbitrary iterator, can't tell how much to resize |
| 1004 | template <class InputIterator> |
| 1005 | void insert(InputIterator f, InputIterator l, std::input_iterator_tag) { |
| 1006 | for (; f != l; ++f) insert(*f); |
| 1007 | } |
| 1008 | |
| 1009 | public: |
| 1010 | // This is the normal insert routine, used by the outside world |
| 1011 | template <typename Arg> |
| 1012 | std::pair<iterator, bool> insert(Arg&& obj) { |
| 1013 | resize_delta(1); // adding an object, grow if need be |
| 1014 | return insert_noresize(get_key(std::forward<Arg>(obj)), std::forward<Arg>(obj)); |
| 1015 | } |
| 1016 | |
| 1017 | template <typename K, typename... Args> |
| 1018 | std::pair<iterator, bool> emplace(K&& key, Args&&... args) { |
| 1019 | resize_delta(1); |
| 1020 | // here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself |
| 1021 | return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...); |
| 1022 | } |
| 1023 | |
| 1024 | template <typename K, typename... Args> |
| 1025 | std::pair<iterator, bool> emplace_hint(const_iterator hint, K&& key, Args&&... args) { |
| 1026 | resize_delta(1); |
| 1027 | |
| 1028 | if (equals(key, hint->first)) { |
| 1029 | return {iterator(this, const_cast<pointer>(hint.pos), const_cast<pointer>(hint.end), false), false}; |
| 1030 | } |
| 1031 | |
| 1032 | // here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself |
| 1033 | return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...); |
| 1034 | } |
| 1035 | |
| 1036 | // When inserting a lot at a time, we specialize on the type of iterator |
| 1037 | template <class InputIterator> |
| 1038 | void insert(InputIterator f, InputIterator l) { |
| 1039 | // specializes on iterator type |
| 1040 | insert(f, l, |
| 1041 | typename std::iterator_traits<InputIterator>::iterator_category()); |
| 1042 | } |
| 1043 | |
| 1044 | // DefaultValue is a functor that takes a key and returns a value_type |
| 1045 | // representing the default value to be inserted if none is found. |
| 1046 | template <class T, class K> |
| 1047 | value_type& find_or_insert(K&& key) { |
| 1048 | // First, double-check we're not inserting emptykey or delkey |
| 1049 | assert( |
| 1050 | (!settings.use_empty() || !equals(key, key_info.empty_key)) && |
| 1051 | "Inserting the empty key" ); |
| 1052 | assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && |
| 1053 | "Inserting the deleted key" ); |
| 1054 | const std::pair<size_type, size_type> pos = find_position(key); |
| 1055 | if (pos.first != ILLEGAL_BUCKET) { // object was already there |
| 1056 | return table[pos.first]; |
| 1057 | } else if (resize_delta(1)) { // needed to rehash to make room |
| 1058 | // Since we resized, we can't use pos, so recalculate where to insert. |
| 1059 | return *insert_noresize(std::forward<K>(key), std::forward<K>(key), T()).first; |
| 1060 | } else { // no need to rehash, insert right here |
| 1061 | return *insert_at(pos.second, std::forward<K>(key), T()); |
| 1062 | } |
| 1063 | } |
| 1064 | |
| 1065 | // DELETION ROUTINES |
| 1066 | size_type erase(const key_type& key) { |
| 1067 | // First, double-check we're not trying to erase delkey or emptyval. |
| 1068 | assert( |
| 1069 | (!settings.use_empty() || !equals(key, key_info.empty_key)) && |
| 1070 | "Erasing the empty key" ); |
| 1071 | assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && |
| 1072 | "Erasing the deleted key" ); |
| 1073 | const_iterator pos = find(key); // shrug: shouldn't need to be const |
| 1074 | if (pos != end()) { |
| 1075 | assert(!test_deleted(pos)); // or find() shouldn't have returned it |
| 1076 | set_deleted(pos); |
| 1077 | ++num_deleted; |
| 1078 | settings.set_consider_shrink( |
| 1079 | true); // will think about shrink after next insert |
| 1080 | return 1; // because we deleted one thing |
| 1081 | } else { |
| 1082 | return 0; // because we deleted nothing |
| 1083 | } |
| 1084 | } |
| 1085 | |
| 1086 | // We return the iterator past the deleted item. |
| 1087 | iterator erase(const_iterator pos) { |
| 1088 | if (pos == end()) return end(); // sanity check |
| 1089 | if (set_deleted(pos)) { // true if object has been newly deleted |
| 1090 | ++num_deleted; |
| 1091 | settings.set_consider_shrink( |
| 1092 | true); // will think about shrink after next insert |
| 1093 | } |
| 1094 | return iterator(this, const_cast<pointer>(pos.pos), const_cast<pointer>(pos.end), true); |
| 1095 | } |
| 1096 | |
| 1097 | iterator erase(const_iterator f, const_iterator l) { |
| 1098 | for (; f != l; ++f) { |
| 1099 | if (set_deleted(f)) // should always be true |
| 1100 | ++num_deleted; |
| 1101 | } |
| 1102 | settings.set_consider_shrink( |
| 1103 | true); // will think about shrink after next insert |
| 1104 | return iterator(this, const_cast<pointer>(f.pos), const_cast<pointer>(f.end), false); |
| 1105 | } |
| 1106 | |
| 1107 | // COMPARISON |
| 1108 | bool operator==(const dense_hashtable& ht) const { |
| 1109 | if (size() != ht.size()) { |
| 1110 | return false; |
| 1111 | } else if (this == &ht) { |
| 1112 | return true; |
| 1113 | } else { |
| 1114 | // Iterate through the elements in "this" and see if the |
| 1115 | // corresponding element is in ht |
| 1116 | for (const_iterator it = begin(); it != end(); ++it) { |
| 1117 | const_iterator it2 = ht.find(get_key(*it)); |
| 1118 | if ((it2 == ht.end()) || (*it != *it2)) { |
| 1119 | return false; |
| 1120 | } |
| 1121 | } |
| 1122 | return true; |
| 1123 | } |
| 1124 | } |
| 1125 | bool operator!=(const dense_hashtable& ht) const { return !(*this == ht); } |
| 1126 | |
| 1127 | // I/O |
| 1128 | // We support reading and writing hashtables to disk. Alas, since |
| 1129 | // I don't know how to write a hasher or key_equal, you have to make |
| 1130 | // sure everything but the table is the same. We compact before writing. |
| 1131 | private: |
| 1132 | // Every time the disk format changes, this should probably change too |
| 1133 | typedef unsigned long MagicNumberType; |
| 1134 | static const MagicNumberType MAGIC_NUMBER = 0x13578642; |
| 1135 | |
| 1136 | public: |
| 1137 | // I/O -- this is an add-on for writing hash table to disk |
| 1138 | // |
| 1139 | // INPUT and OUTPUT must be either a FILE, *or* a C++ stream |
| 1140 | // (istream, ostream, etc) *or* a class providing |
| 1141 | // Read(void*, size_t) and Write(const void*, size_t) |
| 1142 | // (respectively), which writes a buffer into a stream |
| 1143 | // (which the INPUT/OUTPUT instance presumably owns). |
| 1144 | |
| 1145 | typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer; |
| 1146 | |
| 1147 | // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&) |
| 1148 | template <typename ValueSerializer, typename OUTPUT> |
| 1149 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { |
| 1150 | squash_deleted(); // so we don't have to worry about delkey |
| 1151 | if (!sparsehash_internal::write_bigendian_number(fp, MAGIC_NUMBER, 4)) |
| 1152 | return false; |
| 1153 | if (!sparsehash_internal::write_bigendian_number(fp, num_buckets, 8)) |
| 1154 | return false; |
| 1155 | if (!sparsehash_internal::write_bigendian_number(fp, num_elements, 8)) |
| 1156 | return false; |
| 1157 | // Now write a bitmap of non-empty buckets. |
| 1158 | for (size_type i = 0; i < num_buckets; i += 8) { |
| 1159 | unsigned char bits = 0; |
| 1160 | for (int bit = 0; bit < 8; ++bit) { |
| 1161 | if (i + bit < num_buckets && !test_empty(i + bit)) bits |= (1 << bit); |
| 1162 | } |
| 1163 | if (!sparsehash_internal::write_data(fp, &bits, sizeof(bits))) |
| 1164 | return false; |
| 1165 | for (int bit = 0; bit < 8; ++bit) { |
| 1166 | if (bits & (1 << bit)) { |
| 1167 | if (!serializer(fp, table[i + bit])) return false; |
| 1168 | } |
| 1169 | } |
| 1170 | } |
| 1171 | return true; |
| 1172 | } |
| 1173 | |
| 1174 | // INPUT: anything we've written an overload of read_data() for. |
| 1175 | // ValueSerializer: a functor. operator()(INPUT*, value_type*) |
| 1176 | template <typename ValueSerializer, typename INPUT> |
| 1177 | bool unserialize(ValueSerializer serializer, INPUT* fp) { |
| 1178 | assert(settings.use_empty() && "empty_key not set for read" ); |
| 1179 | |
| 1180 | clear(); // just to be consistent |
| 1181 | MagicNumberType magic_read; |
| 1182 | if (!sparsehash_internal::read_bigendian_number(fp, &magic_read, 4)) |
| 1183 | return false; |
| 1184 | if (magic_read != MAGIC_NUMBER) { |
| 1185 | return false; |
| 1186 | } |
| 1187 | size_type new_num_buckets; |
| 1188 | if (!sparsehash_internal::read_bigendian_number(fp, &new_num_buckets, 8)) |
| 1189 | return false; |
| 1190 | clear_to_size(new_num_buckets); |
| 1191 | if (!sparsehash_internal::read_bigendian_number(fp, &num_elements, 8)) |
| 1192 | return false; |
| 1193 | |
| 1194 | // Read the bitmap of non-empty buckets. |
| 1195 | for (size_type i = 0; i < num_buckets; i += 8) { |
| 1196 | unsigned char bits; |
| 1197 | if (!sparsehash_internal::read_data(fp, &bits, sizeof(bits))) |
| 1198 | return false; |
| 1199 | for (int bit = 0; bit < 8; ++bit) { |
| 1200 | if (i + bit < num_buckets && (bits & (1 << bit))) { // not empty |
| 1201 | if (!serializer(fp, &table[i + bit])) return false; |
| 1202 | } |
| 1203 | } |
| 1204 | } |
| 1205 | return true; |
| 1206 | } |
| 1207 | |
| 1208 | private: |
| 1209 | template <class A> |
| 1210 | class alloc_impl : public A { |
| 1211 | public: |
| 1212 | typedef typename A::pointer pointer; |
| 1213 | typedef typename A::size_type size_type; |
| 1214 | |
| 1215 | // Convert a normal allocator to one that has realloc_or_die() |
| 1216 | alloc_impl(const A& a) : A(a) {} |
| 1217 | |
| 1218 | // realloc_or_die should only be used when using the default |
| 1219 | // allocator (libc_allocator_with_realloc). |
| 1220 | pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) { |
| 1221 | fprintf(stderr, |
| 1222 | "realloc_or_die is only supported for " |
| 1223 | "libc_allocator_with_realloc\n" ); |
| 1224 | exit(1); |
| 1225 | return NULL; |
| 1226 | } |
| 1227 | }; |
| 1228 | |
| 1229 | // A template specialization of alloc_impl for |
| 1230 | // libc_allocator_with_realloc that can handle realloc_or_die. |
| 1231 | template <class A> |
| 1232 | class alloc_impl<libc_allocator_with_realloc<A>> |
| 1233 | : public libc_allocator_with_realloc<A> { |
| 1234 | public: |
| 1235 | typedef typename libc_allocator_with_realloc<A>::pointer pointer; |
| 1236 | typedef typename libc_allocator_with_realloc<A>::size_type size_type; |
| 1237 | |
| 1238 | alloc_impl(const libc_allocator_with_realloc<A>& a) |
| 1239 | : libc_allocator_with_realloc<A>(a) {} |
| 1240 | |
| 1241 | pointer realloc_or_die(pointer ptr, size_type n) { |
| 1242 | pointer retval = this->reallocate(ptr, n); |
| 1243 | if (retval == NULL) { |
| 1244 | fprintf(stderr, |
| 1245 | "sparsehash: FATAL ERROR: failed to reallocate " |
| 1246 | "%lu elements for ptr %p" , |
| 1247 | static_cast<unsigned long>(n), static_cast<void*>(ptr)); |
| 1248 | exit(1); |
| 1249 | } |
| 1250 | return retval; |
| 1251 | } |
| 1252 | }; |
| 1253 | |
| 1254 | // Package allocator with emptyval to eliminate memory needed for |
| 1255 | // the zero-size allocator. |
| 1256 | // If new fields are added to this class, we should add them to |
| 1257 | // operator= and swap. |
| 1258 | class ValInfo : public alloc_impl<value_alloc_type> { |
| 1259 | public: |
| 1260 | typedef typename alloc_impl<value_alloc_type>::value_type value_type; |
| 1261 | |
| 1262 | ValInfo(const alloc_impl<value_alloc_type>& a) |
| 1263 | : alloc_impl<value_alloc_type>(a) {} |
| 1264 | }; |
| 1265 | |
| 1266 | // Package functors with another class to eliminate memory needed for |
| 1267 | // zero-size functors. Since ExtractKey and hasher's operator() might |
| 1268 | // have the same function signature, they must be packaged in |
| 1269 | // different classes. |
| 1270 | struct Settings |
| 1271 | : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type, |
| 1272 | HT_MIN_BUCKETS> { |
| 1273 | explicit Settings(const hasher& hf) |
| 1274 | : sparsehash_internal::sh_hashtable_settings<key_type, hasher, |
| 1275 | size_type, HT_MIN_BUCKETS>( |
| 1276 | hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {} |
| 1277 | }; |
| 1278 | |
| 1279 | // Packages ExtractKey and SetKey functors. |
| 1280 | class KeyInfo : public ExtractKey, public SetKey, public EqualKey { |
| 1281 | public: |
| 1282 | KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq) |
| 1283 | : ExtractKey(ek), SetKey(sk), EqualKey(eq) {} |
| 1284 | |
| 1285 | // We want to return the exact same type as ExtractKey: Key or const Key& |
| 1286 | template <typename V> |
| 1287 | typename ExtractKey::result_type get_key(V&& v) const { |
| 1288 | return ExtractKey::operator()(std::forward<V>(v)); |
| 1289 | } |
| 1290 | void set_key(pointer v, const key_type& k) const { |
| 1291 | SetKey::operator()(v, k); |
| 1292 | } |
| 1293 | void construct_key(pointer v, const key_type& k) const { |
| 1294 | SetKey::operator()(v, k, true); |
| 1295 | } |
| 1296 | template <typename K1, typename K2> |
| 1297 | bool equals(const K1& a, const K2& b) const { |
| 1298 | return EqualKey::operator()(a, b); |
| 1299 | } |
| 1300 | |
| 1301 | // Which key marks deleted entries. |
| 1302 | // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!) |
| 1303 | typename std::remove_const<key_type>::type delkey; |
| 1304 | typename std::remove_const<key_type>::type empty_key; |
| 1305 | }; |
| 1306 | |
| 1307 | // Utility functions to access the templated operators |
| 1308 | template <typename K> |
| 1309 | size_type hash(const K& v) const { return settings.hash(v); } |
| 1310 | template <typename K1, typename K2> |
| 1311 | bool equals(const K1& a, const K2& b) const { |
| 1312 | return key_info.equals(a, b); |
| 1313 | } |
| 1314 | template <typename V> |
| 1315 | typename ExtractKey::result_type get_key(V&& v) const { |
| 1316 | return key_info.get_key(std::forward<V>(v)); |
| 1317 | } |
| 1318 | void set_key(pointer v, const key_type& k) const { key_info.set_key(v, k); } |
| 1319 | void construct_key(pointer v, const key_type& k) const { key_info.construct_key(v, k); } |
| 1320 | |
| 1321 | private: |
| 1322 | // Actual data |
| 1323 | Settings settings; |
| 1324 | KeyInfo key_info; |
| 1325 | |
| 1326 | size_type num_deleted; // how many occupied buckets are marked deleted |
| 1327 | size_type num_elements; |
| 1328 | size_type num_buckets; |
| 1329 | ValInfo val_info; // holds emptyval, and also the allocator |
| 1330 | pointer table; |
| 1331 | }; |
| 1332 | |
| 1333 | // We need a global swap as well |
| 1334 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 1335 | inline void swap(dense_hashtable<V, K, HF, ExK, SetK, EqK, A>& x, |
| 1336 | dense_hashtable<V, K, HF, ExK, SetK, EqK, A>& y) { |
| 1337 | x.swap(y); |
| 1338 | } |
| 1339 | |
| 1340 | #undef JUMP_ |
| 1341 | |
| 1342 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 1343 | const typename dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::size_type |
| 1344 | dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::ILLEGAL_BUCKET; |
| 1345 | |
| 1346 | // How full we let the table get before we resize. Knuth says .8 is |
| 1347 | // good -- higher causes us to probe too much, though saves memory. |
| 1348 | // However, we go with .5, getting better performance at the cost of |
| 1349 | // more space (a trade-off densehashtable explicitly chooses to make). |
| 1350 | // Feel free to play around with different values, though, via |
| 1351 | // max_load_factor() and/or set_resizing_parameters(). |
| 1352 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 1353 | const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = 50; |
| 1354 | |
| 1355 | // How empty we let the table get before we resize lower. |
| 1356 | // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing. |
| 1357 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 1358 | const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT = |
| 1359 | static_cast<int>( |
| 1360 | 0.4 * dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT); |
| 1361 | |
| 1362 | } // namespace google |
| 1363 | |