| 1 | // Copyright (c) 2005, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are |
| 6 | // met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright |
| 9 | // notice, this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above |
| 11 | // copyright notice, this list of conditions and the following disclaimer |
| 12 | // in the documentation and/or other materials provided with the |
| 13 | // distribution. |
| 14 | // * Neither the name of Google Inc. nor the names of its |
| 15 | // contributors may be used to endorse or promote products derived from |
| 16 | // this software without specific prior written permission. |
| 17 | // |
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | |
| 30 | // --- |
| 31 | // |
| 32 | // A sparse hashtable is a particular implementation of |
| 33 | // a hashtable: one that is meant to minimize memory use. |
| 34 | // It does this by using a *sparse table* (cf sparsetable.h), |
| 35 | // which uses between 1 and 2 bits to store empty buckets |
| 36 | // (we may need another bit for hashtables that support deletion). |
| 37 | // |
| 38 | // When empty buckets are so cheap, an appealing hashtable |
| 39 | // implementation is internal probing, in which the hashtable |
| 40 | // is a single table, and collisions are resolved by trying |
| 41 | // to insert again in another bucket. The most cache-efficient |
| 42 | // internal probing schemes are linear probing (which suffers, |
| 43 | // alas, from clumping) and quadratic probing, which is what |
| 44 | // we implement by default. |
| 45 | // |
| 46 | // Deleted buckets are a bit of a pain. We have to somehow mark |
| 47 | // deleted buckets (the probing must distinguish them from empty |
| 48 | // buckets). The most principled way is to have another bitmap, |
| 49 | // but that's annoying and takes up space. Instead we let the |
| 50 | // user specify an "impossible" key. We set deleted buckets |
| 51 | // to have the impossible key. |
| 52 | // |
| 53 | // Note it is possible to change the value of the delete key |
| 54 | // on the fly; you can even remove it, though after that point |
| 55 | // the hashtable is insert_only until you set it again. |
| 56 | // |
| 57 | // You probably shouldn't use this code directly. Use |
| 58 | // sparse_hash_map<> or sparse_hash_set<> instead. |
| 59 | // |
| 60 | // You can modify the following, below: |
| 61 | // HT_OCCUPANCY_PCT -- how full before we double size |
| 62 | // HT_EMPTY_PCT -- how empty before we halve size |
| 63 | // HT_MIN_BUCKETS -- smallest bucket size |
| 64 | // HT_DEFAULT_STARTING_BUCKETS -- default bucket size at construct-time |
| 65 | // |
| 66 | // You can also change enlarge_factor (which defaults to |
| 67 | // HT_OCCUPANCY_PCT), and shrink_factor (which defaults to |
| 68 | // HT_EMPTY_PCT) with set_resizing_parameters(). |
| 69 | // |
| 70 | // How to decide what values to use? |
| 71 | // shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good. |
| 72 | // HT_MIN_BUCKETS is probably unnecessary since you can specify |
| 73 | // (indirectly) the starting number of buckets at construct-time. |
| 74 | // For enlarge_factor, you can use this chart to try to trade-off |
| 75 | // expected lookup time to the space taken up. By default, this |
| 76 | // code uses quadratic probing, though you can change it to linear |
| 77 | // via _JUMP below if you really want to. |
| 78 | // |
| 79 | // From |
| 80 | // http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html |
| 81 | // NUMBER OF PROBES / LOOKUP Successful Unsuccessful |
| 82 | // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L) |
| 83 | // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2 |
| 84 | // |
| 85 | // -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99 |
| 86 | // QUADRATIC COLLISION RES. |
| 87 | // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11 |
| 88 | // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6 |
| 89 | // LINEAR COLLISION RES. |
| 90 | // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5 |
| 91 | // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0 |
| 92 | // |
| 93 | // The value type is required to be copy constructible and default |
| 94 | // constructible, but it need not be (and commonly isn't) assignable. |
| 95 | |
| 96 | #pragma once |
| 97 | |
| 98 | #include <assert.h> |
| 99 | #include <algorithm> // For swap(), eg |
| 100 | #include <iterator> // for iterator tags |
| 101 | #include <limits> // for numeric_limits |
| 102 | #include <utility> // for pair |
| 103 | #include <type_traits> // for remove_const |
| 104 | #include <sparsehash/internal/hashtable-common.h> |
| 105 | #include <sparsehash/sparsetable> // IWYU pragma: export |
| 106 | #include <stdexcept> // For length_error |
| 107 | |
| 108 | namespace google { |
| 109 | |
| 110 | #ifndef SPARSEHASH_STAT_UPDATE |
| 111 | #define SPARSEHASH_STAT_UPDATE(x) ((void)0) |
| 112 | #endif |
| 113 | |
| 114 | // The probing method |
| 115 | // Linear probing |
| 116 | // #define JUMP_(key, num_probes) ( 1 ) |
| 117 | // Quadratic probing |
| 118 | #define JUMP_(key, num_probes) (num_probes) |
| 119 | |
| 120 | // The smaller this is, the faster lookup is (because the group bitmap is |
| 121 | // smaller) and the faster insert is, because there's less to move. |
| 122 | // On the other hand, there are more groups. Since group::size_type is |
| 123 | // a short, this number should be of the form 32*x + 16 to avoid waste. |
| 124 | static const uint16_t DEFAULT_GROUP_SIZE = 48; // fits in 1.5 words |
| 125 | |
| 126 | // Hashtable class, used to implement the hashed associative containers |
| 127 | // hash_set and hash_map. |
| 128 | // |
| 129 | // Value: what is stored in the table (each bucket is a Value). |
| 130 | // Key: something in a 1-to-1 correspondence to a Value, that can be used |
| 131 | // to search for a Value in the table (find() takes a Key). |
| 132 | // HashFcn: Takes a Key and returns an integer, the more unique the better. |
| 133 | // ExtractKey: given a Value, returns the unique Key associated with it. |
| 134 | // Must inherit from unary_function, or at least have a |
| 135 | // result_type enum indicating the return type of operator(). |
| 136 | // SetKey: given a Value* and a Key, modifies the value such that |
| 137 | // ExtractKey(value) == key. We guarantee this is only called |
| 138 | // with key == deleted_key. |
| 139 | // EqualKey: Given two Keys, says whether they are the same (that is, |
| 140 | // if they are both associated with the same Value). |
| 141 | // Alloc: STL allocator to use to allocate memory. |
| 142 | |
| 143 | template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey, |
| 144 | class EqualKey, class Alloc> |
| 145 | class sparse_hashtable; |
| 146 | |
| 147 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 148 | struct sparse_hashtable_iterator; |
| 149 | |
| 150 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 151 | struct sparse_hashtable_const_iterator; |
| 152 | |
| 153 | // As far as iterating, we're basically just a sparsetable |
| 154 | // that skips over deleted elements. |
| 155 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 156 | struct sparse_hashtable_iterator { |
| 157 | private: |
| 158 | using value_alloc_type = |
| 159 | typename std::allocator_traits<A>::template rebind_alloc<V>; |
| 160 | |
| 161 | public: |
| 162 | typedef sparse_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator; |
| 163 | typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> |
| 164 | const_iterator; |
| 165 | typedef typename sparsetable<V, DEFAULT_GROUP_SIZE, |
| 166 | value_alloc_type>::nonempty_iterator st_iterator; |
| 167 | |
| 168 | typedef std::forward_iterator_tag iterator_category; // very little defined! |
| 169 | typedef V value_type; |
| 170 | typedef typename value_alloc_type::difference_type difference_type; |
| 171 | typedef typename value_alloc_type::size_type size_type; |
| 172 | typedef typename value_alloc_type::reference reference; |
| 173 | typedef typename value_alloc_type::pointer pointer; |
| 174 | |
| 175 | // "Real" constructor and default constructor |
| 176 | sparse_hashtable_iterator( |
| 177 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it, |
| 178 | st_iterator it_end) |
| 179 | : ht(h), pos(it), end(it_end) { |
| 180 | advance_past_deleted(); |
| 181 | } |
| 182 | sparse_hashtable_iterator() {} // not ever used internally |
| 183 | // The default destructor is fine; we don't define one |
| 184 | // The default operator= is fine; we don't define one |
| 185 | |
| 186 | // Happy dereferencer |
| 187 | reference operator*() const { return *pos; } |
| 188 | pointer operator->() const { return &(operator*()); } |
| 189 | |
| 190 | // Arithmetic. The only hard part is making sure that |
| 191 | // we're not on a marked-deleted array element |
| 192 | void advance_past_deleted() { |
| 193 | while (pos != end && ht->test_deleted(*this)) ++pos; |
| 194 | } |
| 195 | iterator& operator++() { |
| 196 | assert(pos != end); |
| 197 | ++pos; |
| 198 | advance_past_deleted(); |
| 199 | return *this; |
| 200 | } |
| 201 | iterator operator++(int) { |
| 202 | iterator tmp(*this); |
| 203 | ++*this; |
| 204 | return tmp; |
| 205 | } |
| 206 | |
| 207 | // Comparison. |
| 208 | bool operator==(const iterator& it) const { return pos == it.pos; } |
| 209 | bool operator!=(const iterator& it) const { return pos != it.pos; } |
| 210 | |
| 211 | // The actual data |
| 212 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht; |
| 213 | st_iterator pos, end; |
| 214 | }; |
| 215 | |
| 216 | // Now do it all again, but with const-ness! |
| 217 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 218 | struct sparse_hashtable_const_iterator { |
| 219 | private: |
| 220 | using value_alloc_type = |
| 221 | typename std::allocator_traits<A>::template rebind_alloc<V>; |
| 222 | |
| 223 | public: |
| 224 | typedef sparse_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator; |
| 225 | typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> |
| 226 | const_iterator; |
| 227 | typedef typename sparsetable<V, DEFAULT_GROUP_SIZE, |
| 228 | value_alloc_type>::const_nonempty_iterator |
| 229 | st_iterator; |
| 230 | |
| 231 | typedef std::forward_iterator_tag iterator_category; // very little defined! |
| 232 | typedef V value_type; |
| 233 | typedef typename value_alloc_type::difference_type difference_type; |
| 234 | typedef typename value_alloc_type::size_type size_type; |
| 235 | typedef typename value_alloc_type::const_reference reference; |
| 236 | typedef typename value_alloc_type::const_pointer pointer; |
| 237 | |
| 238 | // "Real" constructor and default constructor |
| 239 | sparse_hashtable_const_iterator( |
| 240 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it, |
| 241 | st_iterator it_end) |
| 242 | : ht(h), pos(it), end(it_end) { |
| 243 | advance_past_deleted(); |
| 244 | } |
| 245 | // This lets us convert regular iterators to const iterators |
| 246 | sparse_hashtable_const_iterator() {} // never used internally |
| 247 | sparse_hashtable_const_iterator(const iterator& it) |
| 248 | : ht(it.ht), pos(it.pos), end(it.end) {} |
| 249 | // The default destructor is fine; we don't define one |
| 250 | // The default operator= is fine; we don't define one |
| 251 | |
| 252 | // Happy dereferencer |
| 253 | reference operator*() const { return *pos; } |
| 254 | pointer operator->() const { return &(operator*()); } |
| 255 | |
| 256 | // Arithmetic. The only hard part is making sure that |
| 257 | // we're not on a marked-deleted array element |
| 258 | void advance_past_deleted() { |
| 259 | while (pos != end && ht->test_deleted(*this)) ++pos; |
| 260 | } |
| 261 | const_iterator& operator++() { |
| 262 | assert(pos != end); |
| 263 | ++pos; |
| 264 | advance_past_deleted(); |
| 265 | return *this; |
| 266 | } |
| 267 | const_iterator operator++(int) { |
| 268 | const_iterator tmp(*this); |
| 269 | ++*this; |
| 270 | return tmp; |
| 271 | } |
| 272 | |
| 273 | // Comparison. |
| 274 | bool operator==(const const_iterator& it) const { return pos == it.pos; } |
| 275 | bool operator!=(const const_iterator& it) const { return pos != it.pos; } |
| 276 | |
| 277 | // The actual data |
| 278 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht; |
| 279 | st_iterator pos, end; |
| 280 | }; |
| 281 | |
| 282 | // And once again, but this time freeing up memory as we iterate |
| 283 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 284 | struct sparse_hashtable_destructive_iterator { |
| 285 | private: |
| 286 | using value_alloc_type = |
| 287 | typename std::allocator_traits<A>::template rebind_alloc<V>; |
| 288 | |
| 289 | public: |
| 290 | typedef sparse_hashtable_destructive_iterator<V, K, HF, ExK, SetK, EqK, A> |
| 291 | iterator; |
| 292 | typedef |
| 293 | typename sparsetable<V, DEFAULT_GROUP_SIZE, |
| 294 | value_alloc_type>::destructive_iterator st_iterator; |
| 295 | |
| 296 | typedef std::forward_iterator_tag iterator_category; // very little defined! |
| 297 | typedef V value_type; |
| 298 | typedef typename value_alloc_type::difference_type difference_type; |
| 299 | typedef typename value_alloc_type::size_type size_type; |
| 300 | typedef typename value_alloc_type::reference reference; |
| 301 | typedef typename value_alloc_type::pointer pointer; |
| 302 | |
| 303 | // "Real" constructor and default constructor |
| 304 | sparse_hashtable_destructive_iterator( |
| 305 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it, |
| 306 | st_iterator it_end) |
| 307 | : ht(h), pos(it), end(it_end) { |
| 308 | advance_past_deleted(); |
| 309 | } |
| 310 | sparse_hashtable_destructive_iterator() {} // never used internally |
| 311 | // The default destructor is fine; we don't define one |
| 312 | // The default operator= is fine; we don't define one |
| 313 | |
| 314 | // Happy dereferencer |
| 315 | reference operator*() const { return *pos; } |
| 316 | pointer operator->() const { return &(operator*()); } |
| 317 | |
| 318 | // Arithmetic. The only hard part is making sure that |
| 319 | // we're not on a marked-deleted array element |
| 320 | void advance_past_deleted() { |
| 321 | while (pos != end && ht->test_deleted(*this)) ++pos; |
| 322 | } |
| 323 | iterator& operator++() { |
| 324 | assert(pos != end); |
| 325 | ++pos; |
| 326 | advance_past_deleted(); |
| 327 | return *this; |
| 328 | } |
| 329 | iterator operator++(int) { |
| 330 | iterator tmp(*this); |
| 331 | ++*this; |
| 332 | return tmp; |
| 333 | } |
| 334 | |
| 335 | // Comparison. |
| 336 | bool operator==(const iterator& it) const { return pos == it.pos; } |
| 337 | bool operator!=(const iterator& it) const { return pos != it.pos; } |
| 338 | |
| 339 | // The actual data |
| 340 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht; |
| 341 | st_iterator pos, end; |
| 342 | }; |
| 343 | |
| 344 | template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey, |
| 345 | class EqualKey, class Alloc> |
| 346 | class sparse_hashtable { |
| 347 | private: |
| 348 | using value_alloc_type = |
| 349 | typename std::allocator_traits<Alloc>::template rebind_alloc<Value>; |
| 350 | |
| 351 | public: |
| 352 | typedef Key key_type; |
| 353 | typedef Value value_type; |
| 354 | typedef HashFcn hasher; |
| 355 | typedef EqualKey key_equal; |
| 356 | typedef Alloc allocator_type; |
| 357 | |
| 358 | typedef typename value_alloc_type::size_type size_type; |
| 359 | typedef typename value_alloc_type::difference_type difference_type; |
| 360 | typedef typename value_alloc_type::reference reference; |
| 361 | typedef typename value_alloc_type::const_reference const_reference; |
| 362 | typedef typename value_alloc_type::pointer pointer; |
| 363 | typedef typename value_alloc_type::const_pointer const_pointer; |
| 364 | typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey, |
| 365 | EqualKey, Alloc> iterator; |
| 366 | |
| 367 | typedef sparse_hashtable_const_iterator< |
| 368 | Value, Key, HashFcn, ExtractKey, SetKey, EqualKey, Alloc> const_iterator; |
| 369 | |
| 370 | typedef sparse_hashtable_destructive_iterator<Value, Key, HashFcn, ExtractKey, |
| 371 | SetKey, EqualKey, |
| 372 | Alloc> destructive_iterator; |
| 373 | |
| 374 | // These come from tr1. For us they're the same as regular iterators. |
| 375 | typedef iterator local_iterator; |
| 376 | typedef const_iterator const_local_iterator; |
| 377 | |
| 378 | // How full we let the table get before we resize, by default. |
| 379 | // Knuth says .8 is good -- higher causes us to probe too much, |
| 380 | // though it saves memory. |
| 381 | static const int HT_OCCUPANCY_PCT; // = 80 (out of 100); |
| 382 | |
| 383 | // How empty we let the table get before we resize lower, by default. |
| 384 | // (0.0 means never resize lower.) |
| 385 | // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing |
| 386 | static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT; |
| 387 | |
| 388 | // Minimum size we're willing to let hashtables be. |
| 389 | // Must be a power of two, and at least 4. |
| 390 | // Note, however, that for a given hashtable, the initial size is a |
| 391 | // function of the first constructor arg, and may be >HT_MIN_BUCKETS. |
| 392 | static const size_type HT_MIN_BUCKETS = 4; |
| 393 | |
| 394 | // By default, if you don't specify a hashtable size at |
| 395 | // construction-time, we use this size. Must be a power of two, and |
| 396 | // at least HT_MIN_BUCKETS. |
| 397 | static const size_type HT_DEFAULT_STARTING_BUCKETS = 32; |
| 398 | |
| 399 | // ITERATOR FUNCTIONS |
| 400 | iterator begin() { |
| 401 | return iterator(this, table.nonempty_begin(), table.nonempty_end()); |
| 402 | } |
| 403 | iterator end() { |
| 404 | return iterator(this, table.nonempty_end(), table.nonempty_end()); |
| 405 | } |
| 406 | const_iterator begin() const { |
| 407 | return const_iterator(this, table.nonempty_begin(), table.nonempty_end()); |
| 408 | } |
| 409 | const_iterator end() const { |
| 410 | return const_iterator(this, table.nonempty_end(), table.nonempty_end()); |
| 411 | } |
| 412 | |
| 413 | // These come from tr1 unordered_map. They iterate over 'bucket' n. |
| 414 | // For sparsehashtable, we could consider each 'group' to be a bucket, |
| 415 | // I guess, but I don't really see the point. We'll just consider |
| 416 | // bucket n to be the n-th element of the sparsetable, if it's occupied, |
| 417 | // or some empty element, otherwise. |
| 418 | local_iterator begin(size_type i) { |
| 419 | if (table.test(i)) |
| 420 | return local_iterator(this, table.get_iter(i), table.nonempty_end()); |
| 421 | else |
| 422 | return local_iterator(this, table.nonempty_end(), table.nonempty_end()); |
| 423 | } |
| 424 | local_iterator end(size_type i) { |
| 425 | local_iterator it = begin(i); |
| 426 | if (table.test(i) && !test_deleted(i)) ++it; |
| 427 | return it; |
| 428 | } |
| 429 | const_local_iterator begin(size_type i) const { |
| 430 | if (table.test(i)) |
| 431 | return const_local_iterator(this, table.get_iter(i), |
| 432 | table.nonempty_end()); |
| 433 | else |
| 434 | return const_local_iterator(this, table.nonempty_end(), |
| 435 | table.nonempty_end()); |
| 436 | } |
| 437 | const_local_iterator end(size_type i) const { |
| 438 | const_local_iterator it = begin(i); |
| 439 | if (table.test(i) && !test_deleted(i)) ++it; |
| 440 | return it; |
| 441 | } |
| 442 | |
| 443 | // This is used when resizing |
| 444 | destructive_iterator destructive_begin() { |
| 445 | return destructive_iterator(this, table.destructive_begin(), |
| 446 | table.destructive_end()); |
| 447 | } |
| 448 | destructive_iterator destructive_end() { |
| 449 | return destructive_iterator(this, table.destructive_end(), |
| 450 | table.destructive_end()); |
| 451 | } |
| 452 | |
| 453 | // ACCESSOR FUNCTIONS for the things we templatize on, basically |
| 454 | hasher hash_funct() const { return settings; } |
| 455 | key_equal key_eq() const { return key_info; } |
| 456 | allocator_type get_allocator() const { return table.get_allocator(); } |
| 457 | |
| 458 | // Accessor function for statistics gathering. |
| 459 | int num_table_copies() const { return settings.num_ht_copies(); } |
| 460 | |
| 461 | private: |
| 462 | // We need to copy values when we set the special marker for deleted |
| 463 | // elements, but, annoyingly, we can't just use the copy assignment |
| 464 | // operator because value_type might not be assignable (it's often |
| 465 | // pair<const X, Y>). We use explicit destructor invocation and |
| 466 | // placement new to get around this. Arg. |
| 467 | void set_value(pointer dst, const_reference src) { |
| 468 | dst->~value_type(); // delete the old value, if any |
| 469 | new (dst) value_type(src); |
| 470 | } |
| 471 | |
| 472 | // This is used as a tag for the copy constructor, saying to destroy its |
| 473 | // arg We have two ways of destructively copying: with potentially growing |
| 474 | // the hashtable as we copy, and without. To make sure the outside world |
| 475 | // can't do a destructive copy, we make the typename private. |
| 476 | enum MoveDontCopyT { MoveDontCopy, MoveDontGrow }; |
| 477 | |
| 478 | // DELETE HELPER FUNCTIONS |
| 479 | // This lets the user describe a key that will indicate deleted |
| 480 | // table entries. This key should be an "impossible" entry -- |
| 481 | // if you try to insert it for real, you won't be able to retrieve it! |
| 482 | // (NB: while you pass in an entire value, only the key part is looked |
| 483 | // at. This is just because I don't know how to assign just a key.) |
| 484 | private: |
| 485 | void squash_deleted() { // gets rid of any deleted entries we have |
| 486 | if (num_deleted) { // get rid of deleted before writing |
| 487 | sparse_hashtable tmp(MoveDontGrow, *this); |
| 488 | swap(tmp); // now we are tmp |
| 489 | } |
| 490 | assert(num_deleted == 0); |
| 491 | } |
| 492 | |
| 493 | // Test if the given key is the deleted indicator. Requires |
| 494 | // num_deleted > 0, for correctness of read(), and because that |
| 495 | // guarantees that key_info.delkey is valid. |
| 496 | bool test_deleted_key(const key_type& key) const { |
| 497 | assert(num_deleted > 0); |
| 498 | return equals(key_info.delkey, key); |
| 499 | } |
| 500 | |
| 501 | public: |
| 502 | void set_deleted_key(const key_type& key) { |
| 503 | // It's only safe to change what "deleted" means if we purge deleted |
| 504 | // guys |
| 505 | squash_deleted(); |
| 506 | settings.set_use_deleted(true); |
| 507 | key_info.delkey = key; |
| 508 | } |
| 509 | void clear_deleted_key() { |
| 510 | squash_deleted(); |
| 511 | settings.set_use_deleted(false); |
| 512 | } |
| 513 | key_type deleted_key() const { |
| 514 | assert(settings.use_deleted() && |
| 515 | "Must set deleted key before calling deleted_key" ); |
| 516 | return key_info.delkey; |
| 517 | } |
| 518 | |
| 519 | // These are public so the iterators can use them |
| 520 | // True if the item at position bucknum is "deleted" marker |
| 521 | bool test_deleted(size_type bucknum) const { |
| 522 | // Invariant: !use_deleted() implies num_deleted is 0. |
| 523 | assert(settings.use_deleted() || num_deleted == 0); |
| 524 | return num_deleted > 0 && table.test(bucknum) && |
| 525 | test_deleted_key(get_key(table.unsafe_get(bucknum))); |
| 526 | } |
| 527 | bool test_deleted(const iterator& it) const { |
| 528 | // Invariant: !use_deleted() implies num_deleted is 0. |
| 529 | assert(settings.use_deleted() || num_deleted == 0); |
| 530 | return num_deleted > 0 && test_deleted_key(get_key(*it)); |
| 531 | } |
| 532 | bool test_deleted(const const_iterator& it) const { |
| 533 | // Invariant: !use_deleted() implies num_deleted is 0. |
| 534 | assert(settings.use_deleted() || num_deleted == 0); |
| 535 | return num_deleted > 0 && test_deleted_key(get_key(*it)); |
| 536 | } |
| 537 | bool test_deleted(const destructive_iterator& it) const { |
| 538 | // Invariant: !use_deleted() implies num_deleted is 0. |
| 539 | assert(settings.use_deleted() || num_deleted == 0); |
| 540 | return num_deleted > 0 && test_deleted_key(get_key(*it)); |
| 541 | } |
| 542 | |
| 543 | private: |
| 544 | void check_use_deleted(const char* caller) { |
| 545 | (void)caller; // could log it if the assert failed |
| 546 | assert(settings.use_deleted()); |
| 547 | } |
| 548 | |
| 549 | // Set it so test_deleted is true. true if object didn't used to be |
| 550 | // deleted. |
| 551 | // TODO(csilvers): make these private (also in densehashtable.h) |
| 552 | bool set_deleted(iterator& it) { |
| 553 | check_use_deleted("set_deleted()" ); |
| 554 | bool retval = !test_deleted(it); |
| 555 | // &* converts from iterator to value-type. |
| 556 | set_key(&(*it), key_info.delkey); |
| 557 | return retval; |
| 558 | } |
| 559 | // Set it so test_deleted is false. true if object used to be deleted. |
| 560 | bool clear_deleted(iterator& it) { |
| 561 | check_use_deleted("clear_deleted()" ); |
| 562 | // Happens automatically when we assign something else in its place. |
| 563 | return test_deleted(it); |
| 564 | } |
| 565 | |
| 566 | // We also allow to set/clear the deleted bit on a const iterator. |
| 567 | // We allow a const_iterator for the same reason you can delete a |
| 568 | // const pointer: it's convenient, and semantically you can't use |
| 569 | // 'it' after it's been deleted anyway, so its const-ness doesn't |
| 570 | // really matter. |
| 571 | bool set_deleted(const_iterator& it) { |
| 572 | check_use_deleted("set_deleted()" ); |
| 573 | bool retval = !test_deleted(it); |
| 574 | set_key(const_cast<pointer>(&(*it)), key_info.delkey); |
| 575 | return retval; |
| 576 | } |
| 577 | // Set it so test_deleted is false. true if object used to be deleted. |
| 578 | bool clear_deleted(const_iterator& it) { |
| 579 | check_use_deleted("clear_deleted()" ); |
| 580 | return test_deleted(it); |
| 581 | } |
| 582 | |
| 583 | // FUNCTIONS CONCERNING SIZE |
| 584 | public: |
| 585 | size_type size() const { return table.num_nonempty() - num_deleted; } |
| 586 | size_type max_size() const { return table.max_size(); } |
| 587 | bool empty() const { return size() == 0; } |
| 588 | size_type bucket_count() const { return table.size(); } |
| 589 | size_type max_bucket_count() const { return max_size(); } |
| 590 | // These are tr1 methods. Their idea of 'bucket' doesn't map well to |
| 591 | // what we do. We just say every bucket has 0 or 1 items in it. |
| 592 | size_type bucket_size(size_type i) const { |
| 593 | return begin(i) == end(i) ? 0 : 1; |
| 594 | } |
| 595 | |
| 596 | private: |
| 597 | // Because of the above, size_type(-1) is never legal; use it for errors |
| 598 | static const size_type ILLEGAL_BUCKET = size_type(-1); |
| 599 | |
| 600 | // Used after a string of deletes. Returns true if we actually shrunk. |
| 601 | // TODO(csilvers): take a delta so we can take into account inserts |
| 602 | // done after shrinking. Maybe make part of the Settings class? |
| 603 | bool maybe_shrink() { |
| 604 | assert(table.num_nonempty() >= num_deleted); |
| 605 | assert((bucket_count() & (bucket_count() - 1)) == 0); // is a power of two |
| 606 | assert(bucket_count() >= HT_MIN_BUCKETS); |
| 607 | bool retval = false; |
| 608 | |
| 609 | // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS, |
| 610 | // we'll never shrink until you get relatively big, and we'll never |
| 611 | // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something |
| 612 | // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will |
| 613 | // shrink us down to HT_MIN_BUCKETS buckets, which is too small. |
| 614 | const size_type num_remain = table.num_nonempty() - num_deleted; |
| 615 | const size_type shrink_threshold = settings.shrink_threshold(); |
| 616 | if (shrink_threshold > 0 && num_remain < shrink_threshold && |
| 617 | bucket_count() > HT_DEFAULT_STARTING_BUCKETS) { |
| 618 | const float shrink_factor = settings.shrink_factor(); |
| 619 | size_type sz = bucket_count() / 2; // find how much we should shrink |
| 620 | while (sz > HT_DEFAULT_STARTING_BUCKETS && |
| 621 | num_remain < static_cast<size_type>(sz * shrink_factor)) { |
| 622 | sz /= 2; // stay a power of 2 |
| 623 | } |
| 624 | sparse_hashtable tmp(MoveDontCopy, *this, sz); |
| 625 | swap(tmp); // now we are tmp |
| 626 | retval = true; |
| 627 | } |
| 628 | settings.set_consider_shrink(false); // because we just considered it |
| 629 | return retval; |
| 630 | } |
| 631 | |
| 632 | // We'll let you resize a hashtable -- though this makes us copy all! |
| 633 | // When you resize, you say, "make it big enough for this many more |
| 634 | // elements" |
| 635 | // Returns true if we actually resized, false if size was already ok. |
| 636 | bool resize_delta(size_type delta) { |
| 637 | bool did_resize = false; |
| 638 | if (settings.consider_shrink()) { // see if lots of deletes happened |
| 639 | if (maybe_shrink()) did_resize = true; |
| 640 | } |
| 641 | if (table.num_nonempty() >= |
| 642 | (std::numeric_limits<size_type>::max)() - delta) { |
| 643 | throw std::length_error("resize overflow" ); |
| 644 | } |
| 645 | if (bucket_count() >= HT_MIN_BUCKETS && |
| 646 | (table.num_nonempty() + delta) <= settings.enlarge_threshold()) |
| 647 | return did_resize; // we're ok as we are |
| 648 | |
| 649 | // Sometimes, we need to resize just to get rid of all the |
| 650 | // "deleted" buckets that are clogging up the hashtable. So when |
| 651 | // deciding whether to resize, count the deleted buckets (which |
| 652 | // are currently taking up room). But later, when we decide what |
| 653 | // size to resize to, *don't* count deleted buckets, since they |
| 654 | // get discarded during the resize. |
| 655 | const size_type needed_size = |
| 656 | settings.min_buckets(table.num_nonempty() + delta, 0); |
| 657 | if (needed_size <= bucket_count()) // we have enough buckets |
| 658 | return did_resize; |
| 659 | |
| 660 | size_type resize_to = settings.min_buckets( |
| 661 | table.num_nonempty() - num_deleted + delta, bucket_count()); |
| 662 | if (resize_to < needed_size && // may double resize_to |
| 663 | resize_to < (std::numeric_limits<size_type>::max)() / 2) { |
| 664 | // This situation means that we have enough deleted elements, |
| 665 | // that once we purge them, we won't actually have needed to |
| 666 | // grow. But we may want to grow anyway: if we just purge one |
| 667 | // element, say, we'll have to grow anyway next time we |
| 668 | // insert. Might as well grow now, since we're already going |
| 669 | // through the trouble of copying (in order to purge the |
| 670 | // deleted elements). |
| 671 | const size_type target = |
| 672 | static_cast<size_type>(settings.shrink_size(resize_to * 2)); |
| 673 | if (table.num_nonempty() - num_deleted + delta >= target) { |
| 674 | // Good, we won't be below the shrink threshhold even if we |
| 675 | // double. |
| 676 | resize_to *= 2; |
| 677 | } |
| 678 | } |
| 679 | |
| 680 | sparse_hashtable tmp(MoveDontCopy, *this, resize_to); |
| 681 | swap(tmp); // now we are tmp |
| 682 | return true; |
| 683 | } |
| 684 | |
| 685 | // Used to actually do the rehashing when we grow/shrink a hashtable |
| 686 | void copy_from(const sparse_hashtable& ht, size_type min_buckets_wanted) { |
| 687 | clear(); // clear table, set num_deleted to 0 |
| 688 | |
| 689 | // If we need to change the size of our table, do it now |
| 690 | const size_type resize_to = |
| 691 | settings.min_buckets(ht.size(), min_buckets_wanted); |
| 692 | if (resize_to > bucket_count()) { // we don't have enough buckets |
| 693 | table.resize(resize_to); // sets the number of buckets |
| 694 | settings.reset_thresholds(bucket_count()); |
| 695 | } |
| 696 | |
| 697 | // We use a normal iterator to get non-deleted bcks from ht |
| 698 | // We could use insert() here, but since we know there are |
| 699 | // no duplicates and no deleted items, we can be more efficient |
| 700 | assert((bucket_count() & (bucket_count() - 1)) == 0); // a power of two |
| 701 | for (const_iterator it = ht.begin(); it != ht.end(); ++it) { |
| 702 | size_type num_probes = 0; // how many times we've probed |
| 703 | size_type bucknum; |
| 704 | const size_type bucket_count_minus_one = bucket_count() - 1; |
| 705 | for (bucknum = hash(get_key(*it)) & bucket_count_minus_one; |
| 706 | table.test(bucknum); // not empty |
| 707 | bucknum = |
| 708 | (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) { |
| 709 | ++num_probes; |
| 710 | assert(num_probes < bucket_count() && |
| 711 | "Hashtable is full: an error in key_equal<> or hash<>" ); |
| 712 | } |
| 713 | table.set(bucknum, *it); // copies the value to here |
| 714 | } |
| 715 | settings.inc_num_ht_copies(); |
| 716 | } |
| 717 | |
| 718 | // Implementation is like copy_from, but it destroys the table of the |
| 719 | // "from" guy by freeing sparsetable memory as we iterate. This is |
| 720 | // useful in resizing, since we're throwing away the "from" guy anyway. |
| 721 | void move_from(MoveDontCopyT mover, sparse_hashtable& ht, |
| 722 | size_type min_buckets_wanted) { |
| 723 | clear(); // clear table, set num_deleted to 0 |
| 724 | |
| 725 | // If we need to change the size of our table, do it now |
| 726 | size_type resize_to; |
| 727 | if (mover == MoveDontGrow) |
| 728 | resize_to = ht.bucket_count(); // keep same size as old ht |
| 729 | else // MoveDontCopy |
| 730 | resize_to = settings.min_buckets(ht.size(), min_buckets_wanted); |
| 731 | if (resize_to > bucket_count()) { // we don't have enough buckets |
| 732 | table.resize(resize_to); // sets the number of buckets |
| 733 | settings.reset_thresholds(bucket_count()); |
| 734 | } |
| 735 | |
| 736 | // We use a normal iterator to get non-deleted bcks from ht |
| 737 | // We could use insert() here, but since we know there are |
| 738 | // no duplicates and no deleted items, we can be more efficient |
| 739 | assert((bucket_count() & (bucket_count() - 1)) == 0); // a power of two |
| 740 | // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM(): |
| 741 | for (destructive_iterator it = ht.destructive_begin(); |
| 742 | it != ht.destructive_end(); ++it) { |
| 743 | size_type num_probes = 0; // how many times we've probed |
| 744 | size_type bucknum; |
| 745 | for (bucknum = hash(get_key(*it)) & (bucket_count() - 1); // h % buck_cnt |
| 746 | table.test(bucknum); // not empty |
| 747 | bucknum = |
| 748 | (bucknum + JUMP_(key, num_probes)) & (bucket_count() - 1)) { |
| 749 | ++num_probes; |
| 750 | assert(num_probes < bucket_count() && |
| 751 | "Hashtable is full: an error in key_equal<> or hash<>" ); |
| 752 | } |
| 753 | table.set(bucknum, *it); // copies the value to here |
| 754 | } |
| 755 | settings.inc_num_ht_copies(); |
| 756 | } |
| 757 | |
| 758 | // Required by the spec for hashed associative container |
| 759 | public: |
| 760 | // Though the docs say this should be num_buckets, I think it's much |
| 761 | // more useful as num_elements. As a special feature, calling with |
| 762 | // req_elements==0 will cause us to shrink if we can, saving space. |
| 763 | void resize(size_type req_elements) { // resize to this or larger |
| 764 | if (settings.consider_shrink() || req_elements == 0) maybe_shrink(); |
| 765 | if (req_elements > table.num_nonempty()) // we only grow |
| 766 | resize_delta(req_elements - table.num_nonempty()); |
| 767 | } |
| 768 | |
| 769 | // Get and change the value of shrink_factor and enlarge_factor. The |
| 770 | // description at the beginning of this file explains how to choose |
| 771 | // the values. Setting the shrink parameter to 0.0 ensures that the |
| 772 | // table never shrinks. |
| 773 | void get_resizing_parameters(float* shrink, float* grow) const { |
| 774 | *shrink = settings.shrink_factor(); |
| 775 | *grow = settings.enlarge_factor(); |
| 776 | } |
| 777 | void set_resizing_parameters(float shrink, float grow) { |
| 778 | settings.set_resizing_parameters(shrink, grow); |
| 779 | settings.reset_thresholds(bucket_count()); |
| 780 | } |
| 781 | |
| 782 | // CONSTRUCTORS -- as required by the specs, we take a size, |
| 783 | // but also let you specify a hashfunction, key comparator, |
| 784 | // and key extractor. We also define a copy constructor and =. |
| 785 | // DESTRUCTOR -- the default is fine, surprisingly. |
| 786 | explicit sparse_hashtable(size_type expected_max_items_in_table = 0, |
| 787 | const HashFcn& hf = HashFcn(), |
| 788 | const EqualKey& eql = EqualKey(), |
| 789 | const ExtractKey& ext = ExtractKey(), |
| 790 | const SetKey& set = SetKey(), |
| 791 | const Alloc& alloc = Alloc()) |
| 792 | : settings(hf), |
| 793 | key_info(ext, set, eql), |
| 794 | num_deleted(0), |
| 795 | table((expected_max_items_in_table == 0 |
| 796 | ? HT_DEFAULT_STARTING_BUCKETS |
| 797 | : settings.min_buckets(expected_max_items_in_table, 0)), |
| 798 | alloc) { |
| 799 | settings.reset_thresholds(bucket_count()); |
| 800 | } |
| 801 | |
| 802 | // As a convenience for resize(), we allow an optional second argument |
| 803 | // which lets you make this new hashtable a different size than ht. |
| 804 | // We also provide a mechanism of saying you want to "move" the ht argument |
| 805 | // into us instead of copying. |
| 806 | sparse_hashtable(const sparse_hashtable& ht, |
| 807 | size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) |
| 808 | : settings(ht.settings), |
| 809 | key_info(ht.key_info), |
| 810 | num_deleted(0), |
| 811 | table(0, ht.get_allocator()) { |
| 812 | settings.reset_thresholds(bucket_count()); |
| 813 | copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries |
| 814 | } |
| 815 | sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht, |
| 816 | size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) |
| 817 | : settings(ht.settings), |
| 818 | key_info(ht.key_info), |
| 819 | num_deleted(0), |
| 820 | table(0, ht.get_allocator()) { |
| 821 | settings.reset_thresholds(bucket_count()); |
| 822 | move_from(mover, ht, min_buckets_wanted); // ignores deleted entries |
| 823 | } |
| 824 | |
| 825 | sparse_hashtable& operator=(const sparse_hashtable& ht) { |
| 826 | if (&ht == this) return *this; // don't copy onto ourselves |
| 827 | settings = ht.settings; |
| 828 | key_info = ht.key_info; |
| 829 | num_deleted = ht.num_deleted; |
| 830 | // copy_from() calls clear and sets num_deleted to 0 too |
| 831 | copy_from(ht, HT_MIN_BUCKETS); |
| 832 | // we purposefully don't copy the allocator, which may not be copyable |
| 833 | return *this; |
| 834 | } |
| 835 | |
| 836 | // Many STL algorithms use swap instead of copy constructors |
| 837 | void swap(sparse_hashtable& ht) { |
| 838 | std::swap(settings, ht.settings); |
| 839 | std::swap(key_info, ht.key_info); |
| 840 | std::swap(num_deleted, ht.num_deleted); |
| 841 | table.swap(ht.table); |
| 842 | settings.reset_thresholds(bucket_count()); // also resets consider_shrink |
| 843 | ht.settings.reset_thresholds(ht.bucket_count()); |
| 844 | // we purposefully don't swap the allocator, which may not be swap-able |
| 845 | } |
| 846 | |
| 847 | // It's always nice to be able to clear a table without deallocating it |
| 848 | void clear() { |
| 849 | if (!empty() || (num_deleted != 0)) { |
| 850 | table.clear(); |
| 851 | } |
| 852 | settings.reset_thresholds(bucket_count()); |
| 853 | num_deleted = 0; |
| 854 | } |
| 855 | |
| 856 | // LOOKUP ROUTINES |
| 857 | private: |
| 858 | // Returns a pair of positions: 1st where the object is, 2nd where |
| 859 | // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET |
| 860 | // if object is not found; 2nd is ILLEGAL_BUCKET if it is. |
| 861 | // Note: because of deletions where-to-insert is not trivial: it's the |
| 862 | // first deleted bucket we see, as long as we don't find the key later |
| 863 | template <typename K> |
| 864 | std::pair<size_type, size_type> find_position(const K& key) const { |
| 865 | size_type num_probes = 0; // how many times we've probed |
| 866 | const size_type bucket_count_minus_one = bucket_count() - 1; |
| 867 | size_type bucknum = hash(key) & bucket_count_minus_one; |
| 868 | size_type insert_pos = ILLEGAL_BUCKET; // where we would insert |
| 869 | SPARSEHASH_STAT_UPDATE(total_lookups += 1); |
| 870 | while (1) { // probe until something happens |
| 871 | if (!table.test(bucknum)) { // bucket is empty |
| 872 | SPARSEHASH_STAT_UPDATE(total_probes += num_probes); |
| 873 | if (insert_pos == ILLEGAL_BUCKET) // found no prior place to insert |
| 874 | return std::pair<size_type, size_type>(ILLEGAL_BUCKET, bucknum); |
| 875 | else |
| 876 | return std::pair<size_type, size_type>(ILLEGAL_BUCKET, insert_pos); |
| 877 | } else if (test_deleted(bucknum)) { // keep searching, but mark to insert |
| 878 | if (insert_pos == ILLEGAL_BUCKET) insert_pos = bucknum; |
| 879 | } else if (equals(key, get_key(table.unsafe_get(bucknum)))) { |
| 880 | SPARSEHASH_STAT_UPDATE(total_probes += num_probes); |
| 881 | return std::pair<size_type, size_type>(bucknum, ILLEGAL_BUCKET); |
| 882 | } |
| 883 | ++num_probes; // we're doing another probe |
| 884 | bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; |
| 885 | assert(num_probes < bucket_count() && |
| 886 | "Hashtable is full: an error in key_equal<> or hash<>" ); |
| 887 | } |
| 888 | } |
| 889 | |
| 890 | public: |
| 891 | template <typename K> |
| 892 | iterator find(const K& key) { |
| 893 | if (size() == 0) return end(); |
| 894 | std::pair<size_type, size_type> pos = find_position(key); |
| 895 | if (pos.first == ILLEGAL_BUCKET) // alas, not there |
| 896 | return end(); |
| 897 | else |
| 898 | return iterator(this, table.get_iter(pos.first), table.nonempty_end()); |
| 899 | } |
| 900 | |
| 901 | template <typename K> |
| 902 | const_iterator find(const K& key) const { |
| 903 | if (size() == 0) return end(); |
| 904 | std::pair<size_type, size_type> pos = find_position(key); |
| 905 | if (pos.first == ILLEGAL_BUCKET) // alas, not there |
| 906 | return end(); |
| 907 | else |
| 908 | return const_iterator(this, table.get_iter(pos.first), |
| 909 | table.nonempty_end()); |
| 910 | } |
| 911 | |
| 912 | // This is a tr1 method: the bucket a given key is in, or what bucket |
| 913 | // it would be put in, if it were to be inserted. Shrug. |
| 914 | size_type bucket(const key_type& key) const { |
| 915 | std::pair<size_type, size_type> pos = find_position(key); |
| 916 | return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first; |
| 917 | } |
| 918 | |
| 919 | // Counts how many elements have key key. For maps, it's either 0 or 1. |
| 920 | template <typename K> |
| 921 | size_type count(const K& key) const { |
| 922 | std::pair<size_type, size_type> pos = find_position(key); |
| 923 | return pos.first == ILLEGAL_BUCKET ? 0 : 1; |
| 924 | } |
| 925 | |
| 926 | // Likewise, equal_range doesn't really make sense for us. Oh well. |
| 927 | template <typename K> |
| 928 | std::pair<iterator, iterator> equal_range(const K& key) { |
| 929 | iterator pos = find(key); // either an iterator or end |
| 930 | if (pos == end()) { |
| 931 | return std::pair<iterator, iterator>(pos, pos); |
| 932 | } else { |
| 933 | const iterator startpos = pos++; |
| 934 | return std::pair<iterator, iterator>(startpos, pos); |
| 935 | } |
| 936 | } |
| 937 | template <typename K> |
| 938 | std::pair<const_iterator, const_iterator> equal_range( |
| 939 | const K& key) const { |
| 940 | const_iterator pos = find(key); // either an iterator or end |
| 941 | if (pos == end()) { |
| 942 | return std::pair<const_iterator, const_iterator>(pos, pos); |
| 943 | } else { |
| 944 | const const_iterator startpos = pos++; |
| 945 | return std::pair<const_iterator, const_iterator>(startpos, pos); |
| 946 | } |
| 947 | } |
| 948 | |
| 949 | // INSERTION ROUTINES |
| 950 | private: |
| 951 | // Private method used by insert_noresize and find_or_insert. |
| 952 | iterator insert_at(const_reference obj, size_type pos) { |
| 953 | if (size() >= max_size()) { |
| 954 | throw std::length_error("insert overflow" ); |
| 955 | } |
| 956 | if (test_deleted(pos)) { // just replace if it's been deleted |
| 957 | // The set() below will undelete this object. We just worry about |
| 958 | // stats |
| 959 | assert(num_deleted > 0); |
| 960 | --num_deleted; // used to be, now it isn't |
| 961 | } |
| 962 | table.set(pos, obj); |
| 963 | return iterator(this, table.get_iter(pos), table.nonempty_end()); |
| 964 | } |
| 965 | |
| 966 | // If you know *this is big enough to hold obj, use this routine |
| 967 | std::pair<iterator, bool> insert_noresize(const_reference obj) { |
| 968 | // First, double-check we're not inserting delkey |
| 969 | assert( |
| 970 | (!settings.use_deleted() || !equals(get_key(obj), key_info.delkey)) && |
| 971 | "Inserting the deleted key" ); |
| 972 | const std::pair<size_type, size_type> pos = find_position(get_key(obj)); |
| 973 | if (pos.first != ILLEGAL_BUCKET) { // object was already there |
| 974 | return std::pair<iterator, bool>( |
| 975 | iterator(this, table.get_iter(pos.first), table.nonempty_end()), |
| 976 | false); // false: we didn't insert |
| 977 | } else { // pos.second says where to put it |
| 978 | return std::pair<iterator, bool>(insert_at(obj, pos.second), true); |
| 979 | } |
| 980 | } |
| 981 | |
| 982 | // Specializations of insert(it, it) depending on the power of the iterator: |
| 983 | // (1) Iterator supports operator-, resize before inserting |
| 984 | template <class ForwardIterator> |
| 985 | void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) { |
| 986 | size_t dist = std::distance(f, l); |
| 987 | if (dist >= (std::numeric_limits<size_type>::max)()) { |
| 988 | throw std::length_error("insert-range overflow" ); |
| 989 | } |
| 990 | resize_delta(static_cast<size_type>(dist)); |
| 991 | for (; dist > 0; --dist, ++f) { |
| 992 | insert_noresize(*f); |
| 993 | } |
| 994 | } |
| 995 | |
| 996 | // (2) Arbitrary iterator, can't tell how much to resize |
| 997 | template <class InputIterator> |
| 998 | void insert(InputIterator f, InputIterator l, std::input_iterator_tag) { |
| 999 | for (; f != l; ++f) insert(*f); |
| 1000 | } |
| 1001 | |
| 1002 | public: |
| 1003 | // This is the normal insert routine, used by the outside world |
| 1004 | std::pair<iterator, bool> insert(const_reference obj) { |
| 1005 | resize_delta(1); // adding an object, grow if need be |
| 1006 | return insert_noresize(obj); |
| 1007 | } |
| 1008 | |
| 1009 | // When inserting a lot at a time, we specialize on the type of iterator |
| 1010 | template <class InputIterator> |
| 1011 | void insert(InputIterator f, InputIterator l) { |
| 1012 | // specializes on iterator type |
| 1013 | insert(f, l, |
| 1014 | typename std::iterator_traits<InputIterator>::iterator_category()); |
| 1015 | } |
| 1016 | |
| 1017 | // DefaultValue is a functor that takes a key and returns a value_type |
| 1018 | // representing the default value to be inserted if none is found. |
| 1019 | template <class DefaultValue> |
| 1020 | value_type& find_or_insert(const key_type& key) { |
| 1021 | // First, double-check we're not inserting delkey |
| 1022 | assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && |
| 1023 | "Inserting the deleted key" ); |
| 1024 | const std::pair<size_type, size_type> pos = find_position(key); |
| 1025 | DefaultValue default_value; |
| 1026 | if (pos.first != ILLEGAL_BUCKET) { // object was already there |
| 1027 | return *table.get_iter(pos.first); |
| 1028 | } else if (resize_delta(1)) { // needed to rehash to make room |
| 1029 | // Since we resized, we can't use pos, so recalculate where to |
| 1030 | // insert. |
| 1031 | return *insert_noresize(default_value(key)).first; |
| 1032 | } else { // no need to rehash, insert right here |
| 1033 | return *insert_at(default_value(key), pos.second); |
| 1034 | } |
| 1035 | } |
| 1036 | |
| 1037 | // DELETION ROUTINES |
| 1038 | size_type erase(const key_type& key) { |
| 1039 | // First, double-check we're not erasing delkey. |
| 1040 | assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && |
| 1041 | "Erasing the deleted key" ); |
| 1042 | assert(!settings.use_deleted() || !equals(key, key_info.delkey)); |
| 1043 | const_iterator pos = find(key); // shrug: shouldn't need to be const |
| 1044 | if (pos != end()) { |
| 1045 | assert(!test_deleted(pos)); // or find() shouldn't have returned it |
| 1046 | set_deleted(pos); |
| 1047 | ++num_deleted; |
| 1048 | // will think about shrink after next insert |
| 1049 | settings.set_consider_shrink(true); |
| 1050 | return 1; // because we deleted one thing |
| 1051 | } else { |
| 1052 | return 0; // because we deleted nothing |
| 1053 | } |
| 1054 | } |
| 1055 | |
| 1056 | // We return the iterator past the deleted item. |
| 1057 | void erase(iterator pos) { |
| 1058 | if (pos == end()) return; // sanity check |
| 1059 | if (set_deleted(pos)) { // true if object has been newly deleted |
| 1060 | ++num_deleted; |
| 1061 | // will think about shrink after next insert |
| 1062 | settings.set_consider_shrink(true); |
| 1063 | } |
| 1064 | } |
| 1065 | |
| 1066 | void erase(iterator f, iterator l) { |
| 1067 | for (; f != l; ++f) { |
| 1068 | if (set_deleted(f)) // should always be true |
| 1069 | ++num_deleted; |
| 1070 | } |
| 1071 | // will think about shrink after next insert |
| 1072 | settings.set_consider_shrink(true); |
| 1073 | } |
| 1074 | |
| 1075 | // We allow you to erase a const_iterator just like we allow you to |
| 1076 | // erase an iterator. This is in parallel to 'delete': you can delete |
| 1077 | // a const pointer just like a non-const pointer. The logic is that |
| 1078 | // you can't use the object after it's erased anyway, so it doesn't matter |
| 1079 | // if it's const or not. |
| 1080 | void erase(const_iterator pos) { |
| 1081 | if (pos == end()) return; // sanity check |
| 1082 | if (set_deleted(pos)) { // true if object has been newly deleted |
| 1083 | ++num_deleted; |
| 1084 | // will think about shrink after next insert |
| 1085 | settings.set_consider_shrink(true); |
| 1086 | } |
| 1087 | } |
| 1088 | void erase(const_iterator f, const_iterator l) { |
| 1089 | for (; f != l; ++f) { |
| 1090 | if (set_deleted(f)) // should always be true |
| 1091 | ++num_deleted; |
| 1092 | } |
| 1093 | // will think about shrink after next insert |
| 1094 | settings.set_consider_shrink(true); |
| 1095 | } |
| 1096 | |
| 1097 | // COMPARISON |
| 1098 | bool operator==(const sparse_hashtable& ht) const { |
| 1099 | if (size() != ht.size()) { |
| 1100 | return false; |
| 1101 | } else if (this == &ht) { |
| 1102 | return true; |
| 1103 | } else { |
| 1104 | // Iterate through the elements in "this" and see if the |
| 1105 | // corresponding element is in ht |
| 1106 | for (const_iterator it = begin(); it != end(); ++it) { |
| 1107 | const_iterator it2 = ht.find(get_key(*it)); |
| 1108 | if ((it2 == ht.end()) || (*it != *it2)) { |
| 1109 | return false; |
| 1110 | } |
| 1111 | } |
| 1112 | return true; |
| 1113 | } |
| 1114 | } |
| 1115 | bool operator!=(const sparse_hashtable& ht) const { return !(*this == ht); } |
| 1116 | |
| 1117 | // I/O |
| 1118 | // We support reading and writing hashtables to disk. NOTE that |
| 1119 | // this only stores the hashtable metadata, not the stuff you've |
| 1120 | // actually put in the hashtable! Alas, since I don't know how to |
| 1121 | // write a hasher or key_equal, you have to make sure everything |
| 1122 | // but the table is the same. We compact before writing. |
| 1123 | // |
| 1124 | // The OUTPUT type needs to support a Write() operation. File and |
| 1125 | // OutputBuffer are appropriate types to pass in. |
| 1126 | // |
| 1127 | // The INPUT type needs to support a Read() operation. File and |
| 1128 | // InputBuffer are appropriate types to pass in. |
| 1129 | template <typename OUTPUT> |
| 1130 | bool write_metadata(OUTPUT* fp) { |
| 1131 | squash_deleted(); // so we don't have to worry about delkey |
| 1132 | return table.write_metadata(fp); |
| 1133 | } |
| 1134 | |
| 1135 | template <typename INPUT> |
| 1136 | bool read_metadata(INPUT* fp) { |
| 1137 | num_deleted = 0; // since we got rid before writing |
| 1138 | const bool result = table.read_metadata(fp); |
| 1139 | settings.reset_thresholds(bucket_count()); |
| 1140 | return result; |
| 1141 | } |
| 1142 | |
| 1143 | // Only meaningful if value_type is a POD. |
| 1144 | template <typename OUTPUT> |
| 1145 | bool write_nopointer_data(OUTPUT* fp) { |
| 1146 | return table.write_nopointer_data(fp); |
| 1147 | } |
| 1148 | |
| 1149 | // Only meaningful if value_type is a POD. |
| 1150 | template <typename INPUT> |
| 1151 | bool read_nopointer_data(INPUT* fp) { |
| 1152 | return table.read_nopointer_data(fp); |
| 1153 | } |
| 1154 | |
| 1155 | // INPUT and OUTPUT must be either a FILE, *or* a C++ stream |
| 1156 | // (istream, ostream, etc) *or* a class providing |
| 1157 | // Read(void*, size_t) and Write(const void*, size_t) |
| 1158 | // (respectively), which writes a buffer into a stream |
| 1159 | // (which the INPUT/OUTPUT instance presumably owns). |
| 1160 | |
| 1161 | typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer; |
| 1162 | |
| 1163 | // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&) |
| 1164 | template <typename ValueSerializer, typename OUTPUT> |
| 1165 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { |
| 1166 | squash_deleted(); // so we don't have to worry about delkey |
| 1167 | return table.serialize(serializer, fp); |
| 1168 | } |
| 1169 | |
| 1170 | // ValueSerializer: a functor. operator()(INPUT*, value_type*) |
| 1171 | template <typename ValueSerializer, typename INPUT> |
| 1172 | bool unserialize(ValueSerializer serializer, INPUT* fp) { |
| 1173 | num_deleted = 0; // since we got rid before writing |
| 1174 | const bool result = table.unserialize(serializer, fp); |
| 1175 | settings.reset_thresholds(bucket_count()); |
| 1176 | return result; |
| 1177 | } |
| 1178 | |
| 1179 | private: |
| 1180 | // Table is the main storage class. |
| 1181 | typedef sparsetable<value_type, DEFAULT_GROUP_SIZE, value_alloc_type> Table; |
| 1182 | |
| 1183 | // Package templated functors with the other types to eliminate memory |
| 1184 | // needed for storing these zero-size operators. Since ExtractKey and |
| 1185 | // hasher's operator() might have the same function signature, they |
| 1186 | // must be packaged in different classes. |
| 1187 | struct Settings |
| 1188 | : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type, |
| 1189 | HT_MIN_BUCKETS> { |
| 1190 | explicit Settings(const hasher& hf) |
| 1191 | : sparsehash_internal::sh_hashtable_settings<key_type, hasher, |
| 1192 | size_type, HT_MIN_BUCKETS>( |
| 1193 | hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {} |
| 1194 | }; |
| 1195 | |
| 1196 | // KeyInfo stores delete key and packages zero-size functors: |
| 1197 | // ExtractKey and SetKey. |
| 1198 | class KeyInfo : public ExtractKey, public SetKey, public EqualKey { |
| 1199 | public: |
| 1200 | KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq) |
| 1201 | : ExtractKey(ek), SetKey(sk), EqualKey(eq) {} |
| 1202 | // We want to return the exact same type as ExtractKey: Key or const |
| 1203 | // Key& |
| 1204 | typename ExtractKey::result_type get_key(const_reference v) const { |
| 1205 | return ExtractKey::operator()(v); |
| 1206 | } |
| 1207 | void set_key(pointer v, const key_type& k) const { |
| 1208 | SetKey::operator()(v, k); |
| 1209 | } |
| 1210 | template <typename K1, typename K2> |
| 1211 | bool equals(const K1& a, const K2& b) const { |
| 1212 | return EqualKey::operator()(a, b); |
| 1213 | } |
| 1214 | |
| 1215 | // Which key marks deleted entries. |
| 1216 | // TODO(csilvers): make a pointer, and get rid of use_deleted |
| 1217 | // (benchmark!) |
| 1218 | typename std::remove_const<key_type>::type delkey; |
| 1219 | }; |
| 1220 | |
| 1221 | // Utility functions to access the templated operators |
| 1222 | template <typename K> |
| 1223 | size_type hash(const K& v) const { return settings.hash(v); } |
| 1224 | template <typename K1, typename K2> |
| 1225 | bool equals(const K1& a, const K2& b) const { |
| 1226 | return key_info.equals(a, b); |
| 1227 | } |
| 1228 | typename ExtractKey::result_type get_key(const_reference v) const { |
| 1229 | return key_info.get_key(v); |
| 1230 | } |
| 1231 | void set_key(pointer v, const key_type& k) const { key_info.set_key(v, k); } |
| 1232 | |
| 1233 | private: |
| 1234 | // Actual data |
| 1235 | Settings settings; |
| 1236 | KeyInfo key_info; |
| 1237 | size_type num_deleted; // how many occupied buckets are marked deleted |
| 1238 | Table table; // holds num_buckets and num_elements too |
| 1239 | }; |
| 1240 | |
| 1241 | // We need a global swap as well |
| 1242 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 1243 | inline void swap(sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>& x, |
| 1244 | sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>& y) { |
| 1245 | x.swap(y); |
| 1246 | } |
| 1247 | |
| 1248 | #undef JUMP_ |
| 1249 | |
| 1250 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 1251 | const typename sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::size_type |
| 1252 | sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::ILLEGAL_BUCKET; |
| 1253 | |
| 1254 | // How full we let the table get before we resize. Knuth says .8 is |
| 1255 | // good -- higher causes us to probe too much, though saves memory |
| 1256 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 1257 | const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = 80; |
| 1258 | |
| 1259 | // How empty we let the table get before we resize lower. |
| 1260 | // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing |
| 1261 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
| 1262 | const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT = |
| 1263 | static_cast<int>( |
| 1264 | 0.4 * sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT); |
| 1265 | } |
| 1266 | |