| 1 | // Copyright (c) 2005, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are |
| 6 | // met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright |
| 9 | // notice, this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above |
| 11 | // copyright notice, this list of conditions and the following disclaimer |
| 12 | // in the documentation and/or other materials provided with the |
| 13 | // distribution. |
| 14 | // * Neither the name of Google Inc. nor the names of its |
| 15 | // contributors may be used to endorse or promote products derived from |
| 16 | // this software without specific prior written permission. |
| 17 | // |
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | |
| 30 | // --- |
| 31 | // |
| 32 | // This is just a very thin wrapper over densehashtable.h, just |
| 33 | // like sgi stl's stl_hash_set is a very thin wrapper over |
| 34 | // stl_hashtable. The major thing we define is operator[], because |
| 35 | // we have a concept of a data_type which stl_hashtable doesn't |
| 36 | // (it only has a key and a value). |
| 37 | // |
| 38 | // This is more different from dense_hash_map than you might think, |
| 39 | // because all iterators for sets are const (you obviously can't |
| 40 | // change the key, and for sets there is no value). |
| 41 | // |
| 42 | // NOTE: this is exactly like sparse_hash_set.h, with the word |
| 43 | // "sparse" replaced by "dense", except for the addition of |
| 44 | // set_empty_key(). |
| 45 | // |
| 46 | // YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION. |
| 47 | // |
| 48 | // Otherwise your program will die in mysterious ways. (Note if you |
| 49 | // use the constructor that takes an InputIterator range, you pass in |
| 50 | // the empty key in the constructor, rather than after. As a result, |
| 51 | // this constructor differs from the standard STL version.) |
| 52 | // |
| 53 | // In other respects, we adhere mostly to the STL semantics for |
| 54 | // hash-map. One important exception is that insert() may invalidate |
| 55 | // iterators entirely -- STL semantics are that insert() may reorder |
| 56 | // iterators, but they all still refer to something valid in the |
| 57 | // hashtable. Not so for us. Likewise, insert() may invalidate |
| 58 | // pointers into the hashtable. (Whether insert invalidates iterators |
| 59 | // and pointers depends on whether it results in a hashtable resize). |
| 60 | // On the plus side, delete() doesn't invalidate iterators or pointers |
| 61 | // at all, or even change the ordering of elements. |
| 62 | // |
| 63 | // Here are a few "power user" tips: |
| 64 | // |
| 65 | // 1) set_deleted_key(): |
| 66 | // If you want to use erase() you must call set_deleted_key(), |
| 67 | // in addition to set_empty_key(), after construction. |
| 68 | // The deleted and empty keys must differ. |
| 69 | // |
| 70 | // 2) resize(0): |
| 71 | // When an item is deleted, its memory isn't freed right |
| 72 | // away. This allows you to iterate over a hashtable, |
| 73 | // and call erase(), without invalidating the iterator. |
| 74 | // To force the memory to be freed, call resize(0). |
| 75 | // For tr1 compatibility, this can also be called as rehash(0). |
| 76 | // |
| 77 | // 3) min_load_factor(0.0) |
| 78 | // Setting the minimum load factor to 0.0 guarantees that |
| 79 | // the hash table will never shrink. |
| 80 | // |
| 81 | // Roughly speaking: |
| 82 | // (1) dense_hash_set: fastest, uses the most memory unless entries are small |
| 83 | // (2) sparse_hash_set: slowest, uses the least memory |
| 84 | // (3) hash_set / unordered_set (STL): in the middle |
| 85 | // |
| 86 | // Typically I use sparse_hash_set when I care about space and/or when |
| 87 | // I need to save the hashtable on disk. I use hash_set otherwise. I |
| 88 | // don't personally use dense_hash_set ever; some people use it for |
| 89 | // small sets with lots of lookups. |
| 90 | // |
| 91 | // - dense_hash_set has, typically, about 78% memory overhead (if your |
| 92 | // data takes up X bytes, the hash_set uses .78X more bytes in overhead). |
| 93 | // - sparse_hash_set has about 4 bits overhead per entry. |
| 94 | // - sparse_hash_set can be 3-7 times slower than the others for lookup and, |
| 95 | // especially, inserts. See time_hash_map.cc for details. |
| 96 | // |
| 97 | // See /usr/(local/)?doc/sparsehash-*/dense_hash_set.html |
| 98 | // for information about how to use this class. |
| 99 | |
| 100 | #pragma once |
| 101 | |
| 102 | #include <algorithm> // needed by stl_alloc |
| 103 | #include <functional> // for equal_to<>, select1st<>, etc |
| 104 | #include <initializer_list> // for initializer_list |
| 105 | #include <memory> // for alloc |
| 106 | #include <utility> // for pair<> |
| 107 | #include <sparsehash/internal/densehashtable.h> // IWYU pragma: export |
| 108 | #include <sparsehash/internal/libc_allocator_with_realloc.h> |
| 109 | |
| 110 | namespace google { |
| 111 | |
| 112 | template <class Value, class HashFcn = std::hash<Value>, |
| 113 | class EqualKey = std::equal_to<Value>, |
| 114 | class Alloc = libc_allocator_with_realloc<Value>> |
| 115 | class dense_hash_set { |
| 116 | private: |
| 117 | // Apparently identity is not stl-standard, so we define our own |
| 118 | struct Identity { |
| 119 | typedef const Value& result_type; |
| 120 | template <typename V> |
| 121 | const Value& operator()(V&& v) const { return v; } |
| 122 | }; |
| 123 | struct SetKey { |
| 124 | void operator()(Value* value, const Value& new_key) const { |
| 125 | *value = new_key; |
| 126 | } |
| 127 | void operator()(Value* value, const Value& new_key, bool) const { |
| 128 | new(value) Value(new_key); |
| 129 | } |
| 130 | }; |
| 131 | |
| 132 | // The actual data |
| 133 | typedef typename sparsehash_internal::key_equal_chosen<HashFcn, EqualKey>::type EqualKeyChosen; |
| 134 | typedef dense_hashtable<Value, Value, HashFcn, Identity, SetKey, EqualKeyChosen, |
| 135 | Alloc> ht; |
| 136 | ht rep; |
| 137 | |
| 138 | static_assert(!sparsehash_internal::has_transparent_key_equal<HashFcn>::value |
| 139 | || std::is_same<EqualKey, std::equal_to<Value>>::value |
| 140 | || std::is_same<EqualKey, EqualKeyChosen>::value, |
| 141 | "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash" ); |
| 142 | |
| 143 | public: |
| 144 | typedef typename ht::key_type key_type; |
| 145 | typedef typename ht::value_type value_type; |
| 146 | typedef typename ht::hasher hasher; |
| 147 | typedef typename ht::key_equal key_equal; |
| 148 | typedef Alloc allocator_type; |
| 149 | |
| 150 | typedef typename ht::size_type size_type; |
| 151 | typedef typename ht::difference_type difference_type; |
| 152 | typedef typename ht::const_pointer pointer; |
| 153 | typedef typename ht::const_pointer const_pointer; |
| 154 | typedef typename ht::const_reference reference; |
| 155 | typedef typename ht::const_reference const_reference; |
| 156 | |
| 157 | typedef typename ht::const_iterator iterator; |
| 158 | typedef typename ht::const_iterator const_iterator; |
| 159 | typedef typename ht::const_local_iterator local_iterator; |
| 160 | typedef typename ht::const_local_iterator const_local_iterator; |
| 161 | |
| 162 | // Iterator functions -- recall all iterators are const |
| 163 | iterator begin() const { return rep.begin(); } |
| 164 | iterator end() const { return rep.end(); } |
| 165 | const_iterator cbegin() const { return rep.begin(); } |
| 166 | const_iterator cend() const { return rep.end(); } |
| 167 | |
| 168 | // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements. |
| 169 | local_iterator begin(size_type i) const { return rep.begin(i); } |
| 170 | local_iterator end(size_type i) const { return rep.end(i); } |
| 171 | local_iterator cbegin(size_type i) const { return rep.begin(i); } |
| 172 | local_iterator cend(size_type i) const { return rep.end(i); } |
| 173 | |
| 174 | // Accessor functions |
| 175 | allocator_type get_allocator() const { return rep.get_allocator(); } |
| 176 | hasher hash_funct() const { return rep.hash_funct(); } |
| 177 | hasher hash_function() const { return hash_funct(); } // tr1 name |
| 178 | key_equal key_eq() const { return rep.key_eq(); } |
| 179 | |
| 180 | // Constructors |
| 181 | explicit dense_hash_set(size_type expected_max_items_in_table = 0, |
| 182 | const hasher& hf = hasher(), |
| 183 | const key_equal& eql = key_equal(), |
| 184 | const allocator_type& alloc = allocator_type()) |
| 185 | : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { |
| 186 | } |
| 187 | |
| 188 | template <class InputIterator> |
| 189 | dense_hash_set(InputIterator f, InputIterator l, |
| 190 | const key_type& empty_key_val, |
| 191 | size_type expected_max_items_in_table = 0, |
| 192 | const hasher& hf = hasher(), |
| 193 | const key_equal& eql = key_equal(), |
| 194 | const allocator_type& alloc = allocator_type()) |
| 195 | : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { |
| 196 | set_empty_key(empty_key_val); |
| 197 | rep.insert(f, l); |
| 198 | } |
| 199 | // We use the default copy constructor |
| 200 | // We use the default operator=() |
| 201 | // We use the default destructor |
| 202 | |
| 203 | void clear() { rep.clear(); } |
| 204 | // This clears the hash set without resizing it down to the minimum |
| 205 | // bucket count, but rather keeps the number of buckets constant |
| 206 | void clear_no_resize() { rep.clear_no_resize(); } |
| 207 | void swap(dense_hash_set& hs) { rep.swap(hs.rep); } |
| 208 | |
| 209 | // Functions concerning size |
| 210 | size_type size() const { return rep.size(); } |
| 211 | size_type max_size() const { return rep.max_size(); } |
| 212 | bool empty() const { return rep.empty(); } |
| 213 | size_type bucket_count() const { return rep.bucket_count(); } |
| 214 | size_type max_bucket_count() const { return rep.max_bucket_count(); } |
| 215 | |
| 216 | // These are tr1 methods. bucket() is the bucket the key is or would be in. |
| 217 | size_type bucket_size(size_type i) const { return rep.bucket_size(i); } |
| 218 | size_type bucket(const key_type& key) const { return rep.bucket(key); } |
| 219 | float load_factor() const { return size() * 1.0f / bucket_count(); } |
| 220 | float max_load_factor() const { |
| 221 | float shrink, grow; |
| 222 | rep.get_resizing_parameters(&shrink, &grow); |
| 223 | return grow; |
| 224 | } |
| 225 | void max_load_factor(float new_grow) { |
| 226 | float shrink, grow; |
| 227 | rep.get_resizing_parameters(&shrink, &grow); |
| 228 | rep.set_resizing_parameters(shrink, new_grow); |
| 229 | } |
| 230 | // These aren't tr1 methods but perhaps ought to be. |
| 231 | float min_load_factor() const { |
| 232 | float shrink, grow; |
| 233 | rep.get_resizing_parameters(&shrink, &grow); |
| 234 | return shrink; |
| 235 | } |
| 236 | void min_load_factor(float new_shrink) { |
| 237 | float shrink, grow; |
| 238 | rep.get_resizing_parameters(&shrink, &grow); |
| 239 | rep.set_resizing_parameters(new_shrink, grow); |
| 240 | } |
| 241 | // Deprecated; use min_load_factor() or max_load_factor() instead. |
| 242 | void set_resizing_parameters(float shrink, float grow) { |
| 243 | rep.set_resizing_parameters(shrink, grow); |
| 244 | } |
| 245 | |
| 246 | void resize(size_type hint) { rep.resize(hint); } |
| 247 | void rehash(size_type hint) { resize(hint); } // the tr1 name |
| 248 | |
| 249 | // Lookup routines |
| 250 | iterator find(const key_type& key) const { return rep.find(key); } |
| 251 | |
| 252 | template <typename K> |
| 253 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, iterator>::type |
| 254 | find(const K& key) const { return rep.find(key); } |
| 255 | |
| 256 | size_type count(const key_type& key) const { return rep.count(key); } |
| 257 | |
| 258 | template <typename K> |
| 259 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, size_type>::type |
| 260 | count(const K& key) const { return rep.count(key); } |
| 261 | |
| 262 | std::pair<iterator, iterator> equal_range(const key_type& key) const { |
| 263 | return rep.equal_range(key); |
| 264 | } |
| 265 | |
| 266 | template<typename K> |
| 267 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<iterator, iterator>>::type |
| 268 | equal_range(const K& key) const { |
| 269 | return rep.equal_range(key); |
| 270 | } |
| 271 | |
| 272 | // Insertion routines |
| 273 | std::pair<iterator, bool> insert(const value_type& obj) { |
| 274 | std::pair<typename ht::iterator, bool> p = rep.insert(obj); |
| 275 | return std::pair<iterator, bool>(p.first, p.second); // const to non-const |
| 276 | } |
| 277 | |
| 278 | std::pair<iterator, bool> insert(value_type&& obj) { |
| 279 | std::pair<typename ht::iterator, bool> p = rep.insert(std::move(obj)); |
| 280 | return std::pair<iterator, bool>(p.first, p.second); // const to non-const |
| 281 | } |
| 282 | |
| 283 | template <typename... Args> |
| 284 | std::pair<iterator, bool> emplace(Args&&... args) { |
| 285 | return rep.emplace(std::forward<Args>(args)...); |
| 286 | } |
| 287 | |
| 288 | template <typename... Args> |
| 289 | std::pair<iterator, bool> emplace_hint(const_iterator hint, Args&&... args) { |
| 290 | return rep.emplace_hint(hint, std::forward<Args>(args)...); |
| 291 | } |
| 292 | |
| 293 | template <class InputIterator> |
| 294 | void insert(InputIterator f, InputIterator l) { |
| 295 | rep.insert(f, l); |
| 296 | } |
| 297 | void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } |
| 298 | void insert(std::initializer_list<value_type> ilist) { rep.insert(ilist.begin(), ilist.end()); } |
| 299 | // Required for std::insert_iterator; the passed-in iterator is ignored. |
| 300 | iterator insert(const_iterator, const value_type& obj) { return insert(obj).first; } |
| 301 | iterator insert(const_iterator, value_type&& obj) { return insert(std::move(obj)).first; } |
| 302 | |
| 303 | // Deletion and empty routines |
| 304 | // THESE ARE NON-STANDARD! I make you specify an "impossible" key |
| 305 | // value to identify deleted and empty buckets. You can change the |
| 306 | // deleted key as time goes on, or get rid of it entirely to be insert-only. |
| 307 | void set_empty_key(const key_type& key) { rep.set_empty_key(key); } |
| 308 | key_type empty_key() const { return rep.empty_key(); } |
| 309 | |
| 310 | void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } |
| 311 | void clear_deleted_key() { rep.clear_deleted_key(); } |
| 312 | key_type deleted_key() const { return rep.deleted_key(); } |
| 313 | |
| 314 | // These are standard |
| 315 | size_type erase(const key_type& key) { return rep.erase(key); } |
| 316 | iterator erase(const_iterator it) { return rep.erase(it); } |
| 317 | iterator erase(const_iterator f, const_iterator l) { return rep.erase(f, l); } |
| 318 | |
| 319 | // Comparison |
| 320 | bool operator==(const dense_hash_set& hs) const { return rep == hs.rep; } |
| 321 | bool operator!=(const dense_hash_set& hs) const { return rep != hs.rep; } |
| 322 | |
| 323 | // I/O -- this is an add-on for writing metainformation to disk |
| 324 | // |
| 325 | // For maximum flexibility, this does not assume a particular |
| 326 | // file type (though it will probably be a FILE *). We just pass |
| 327 | // the fp through to rep. |
| 328 | |
| 329 | // If your keys and values are simple enough, you can pass this |
| 330 | // serializer to serialize()/unserialize(). "Simple enough" means |
| 331 | // value_type is a POD type that contains no pointers. Note, |
| 332 | // however, we don't try to normalize endianness. |
| 333 | typedef typename ht::NopointerSerializer NopointerSerializer; |
| 334 | |
| 335 | // serializer: a class providing operator()(OUTPUT*, const value_type&) |
| 336 | // (writing value_type to OUTPUT). You can specify a |
| 337 | // NopointerSerializer object if appropriate (see above). |
| 338 | // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a |
| 339 | // pointer to a class providing size_t Write(const void*, size_t), |
| 340 | // which writes a buffer into a stream (which fp presumably |
| 341 | // owns) and returns the number of bytes successfully written. |
| 342 | // Note basic_ostream<not_char> is not currently supported. |
| 343 | template <typename ValueSerializer, typename OUTPUT> |
| 344 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { |
| 345 | return rep.serialize(serializer, fp); |
| 346 | } |
| 347 | |
| 348 | // serializer: a functor providing operator()(INPUT*, value_type*) |
| 349 | // (reading from INPUT and into value_type). You can specify a |
| 350 | // NopointerSerializer object if appropriate (see above). |
| 351 | // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a |
| 352 | // pointer to a class providing size_t Read(void*, size_t), |
| 353 | // which reads into a buffer from a stream (which fp presumably |
| 354 | // owns) and returns the number of bytes successfully read. |
| 355 | // Note basic_istream<not_char> is not currently supported. |
| 356 | template <typename ValueSerializer, typename INPUT> |
| 357 | bool unserialize(ValueSerializer serializer, INPUT* fp) { |
| 358 | return rep.unserialize(serializer, fp); |
| 359 | } |
| 360 | }; |
| 361 | |
| 362 | template <class Val, class HashFcn, class EqualKey, class Alloc> |
| 363 | inline void swap(dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1, |
| 364 | dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) { |
| 365 | hs1.swap(hs2); |
| 366 | } |
| 367 | |
| 368 | } // namespace google |
| 369 | |