1 | // Copyright (c) 2005, Google Inc. |
2 | // All rights reserved. |
3 | // |
4 | // Redistribution and use in source and binary forms, with or without |
5 | // modification, are permitted provided that the following conditions are |
6 | // met: |
7 | // |
8 | // * Redistributions of source code must retain the above copyright |
9 | // notice, this list of conditions and the following disclaimer. |
10 | // * Redistributions in binary form must reproduce the above |
11 | // copyright notice, this list of conditions and the following disclaimer |
12 | // in the documentation and/or other materials provided with the |
13 | // distribution. |
14 | // * Neither the name of Google Inc. nor the names of its |
15 | // contributors may be used to endorse or promote products derived from |
16 | // this software without specific prior written permission. |
17 | // |
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | |
30 | // --- |
31 | // |
32 | // This is just a very thin wrapper over densehashtable.h, just |
33 | // like sgi stl's stl_hash_set is a very thin wrapper over |
34 | // stl_hashtable. The major thing we define is operator[], because |
35 | // we have a concept of a data_type which stl_hashtable doesn't |
36 | // (it only has a key and a value). |
37 | // |
38 | // This is more different from dense_hash_map than you might think, |
39 | // because all iterators for sets are const (you obviously can't |
40 | // change the key, and for sets there is no value). |
41 | // |
42 | // NOTE: this is exactly like sparse_hash_set.h, with the word |
43 | // "sparse" replaced by "dense", except for the addition of |
44 | // set_empty_key(). |
45 | // |
46 | // YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION. |
47 | // |
48 | // Otherwise your program will die in mysterious ways. (Note if you |
49 | // use the constructor that takes an InputIterator range, you pass in |
50 | // the empty key in the constructor, rather than after. As a result, |
51 | // this constructor differs from the standard STL version.) |
52 | // |
53 | // In other respects, we adhere mostly to the STL semantics for |
54 | // hash-map. One important exception is that insert() may invalidate |
55 | // iterators entirely -- STL semantics are that insert() may reorder |
56 | // iterators, but they all still refer to something valid in the |
57 | // hashtable. Not so for us. Likewise, insert() may invalidate |
58 | // pointers into the hashtable. (Whether insert invalidates iterators |
59 | // and pointers depends on whether it results in a hashtable resize). |
60 | // On the plus side, delete() doesn't invalidate iterators or pointers |
61 | // at all, or even change the ordering of elements. |
62 | // |
63 | // Here are a few "power user" tips: |
64 | // |
65 | // 1) set_deleted_key(): |
66 | // If you want to use erase() you must call set_deleted_key(), |
67 | // in addition to set_empty_key(), after construction. |
68 | // The deleted and empty keys must differ. |
69 | // |
70 | // 2) resize(0): |
71 | // When an item is deleted, its memory isn't freed right |
72 | // away. This allows you to iterate over a hashtable, |
73 | // and call erase(), without invalidating the iterator. |
74 | // To force the memory to be freed, call resize(0). |
75 | // For tr1 compatibility, this can also be called as rehash(0). |
76 | // |
77 | // 3) min_load_factor(0.0) |
78 | // Setting the minimum load factor to 0.0 guarantees that |
79 | // the hash table will never shrink. |
80 | // |
81 | // Roughly speaking: |
82 | // (1) dense_hash_set: fastest, uses the most memory unless entries are small |
83 | // (2) sparse_hash_set: slowest, uses the least memory |
84 | // (3) hash_set / unordered_set (STL): in the middle |
85 | // |
86 | // Typically I use sparse_hash_set when I care about space and/or when |
87 | // I need to save the hashtable on disk. I use hash_set otherwise. I |
88 | // don't personally use dense_hash_set ever; some people use it for |
89 | // small sets with lots of lookups. |
90 | // |
91 | // - dense_hash_set has, typically, about 78% memory overhead (if your |
92 | // data takes up X bytes, the hash_set uses .78X more bytes in overhead). |
93 | // - sparse_hash_set has about 4 bits overhead per entry. |
94 | // - sparse_hash_set can be 3-7 times slower than the others for lookup and, |
95 | // especially, inserts. See time_hash_map.cc for details. |
96 | // |
97 | // See /usr/(local/)?doc/sparsehash-*/dense_hash_set.html |
98 | // for information about how to use this class. |
99 | |
100 | #pragma once |
101 | |
102 | #include <algorithm> // needed by stl_alloc |
103 | #include <functional> // for equal_to<>, select1st<>, etc |
104 | #include <initializer_list> // for initializer_list |
105 | #include <memory> // for alloc |
106 | #include <utility> // for pair<> |
107 | #include <sparsehash/internal/densehashtable.h> // IWYU pragma: export |
108 | #include <sparsehash/internal/libc_allocator_with_realloc.h> |
109 | |
110 | namespace google { |
111 | |
112 | template <class Value, class HashFcn = std::hash<Value>, |
113 | class EqualKey = std::equal_to<Value>, |
114 | class Alloc = libc_allocator_with_realloc<Value>> |
115 | class dense_hash_set { |
116 | private: |
117 | // Apparently identity is not stl-standard, so we define our own |
118 | struct Identity { |
119 | typedef const Value& result_type; |
120 | template <typename V> |
121 | const Value& operator()(V&& v) const { return v; } |
122 | }; |
123 | struct SetKey { |
124 | void operator()(Value* value, const Value& new_key) const { |
125 | *value = new_key; |
126 | } |
127 | void operator()(Value* value, const Value& new_key, bool) const { |
128 | new(value) Value(new_key); |
129 | } |
130 | }; |
131 | |
132 | // The actual data |
133 | typedef typename sparsehash_internal::key_equal_chosen<HashFcn, EqualKey>::type EqualKeyChosen; |
134 | typedef dense_hashtable<Value, Value, HashFcn, Identity, SetKey, EqualKeyChosen, |
135 | Alloc> ht; |
136 | ht rep; |
137 | |
138 | static_assert(!sparsehash_internal::has_transparent_key_equal<HashFcn>::value |
139 | || std::is_same<EqualKey, std::equal_to<Value>>::value |
140 | || std::is_same<EqualKey, EqualKeyChosen>::value, |
141 | "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash" ); |
142 | |
143 | public: |
144 | typedef typename ht::key_type key_type; |
145 | typedef typename ht::value_type value_type; |
146 | typedef typename ht::hasher hasher; |
147 | typedef typename ht::key_equal key_equal; |
148 | typedef Alloc allocator_type; |
149 | |
150 | typedef typename ht::size_type size_type; |
151 | typedef typename ht::difference_type difference_type; |
152 | typedef typename ht::const_pointer pointer; |
153 | typedef typename ht::const_pointer const_pointer; |
154 | typedef typename ht::const_reference reference; |
155 | typedef typename ht::const_reference const_reference; |
156 | |
157 | typedef typename ht::const_iterator iterator; |
158 | typedef typename ht::const_iterator const_iterator; |
159 | typedef typename ht::const_local_iterator local_iterator; |
160 | typedef typename ht::const_local_iterator const_local_iterator; |
161 | |
162 | // Iterator functions -- recall all iterators are const |
163 | iterator begin() const { return rep.begin(); } |
164 | iterator end() const { return rep.end(); } |
165 | const_iterator cbegin() const { return rep.begin(); } |
166 | const_iterator cend() const { return rep.end(); } |
167 | |
168 | // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements. |
169 | local_iterator begin(size_type i) const { return rep.begin(i); } |
170 | local_iterator end(size_type i) const { return rep.end(i); } |
171 | local_iterator cbegin(size_type i) const { return rep.begin(i); } |
172 | local_iterator cend(size_type i) const { return rep.end(i); } |
173 | |
174 | // Accessor functions |
175 | allocator_type get_allocator() const { return rep.get_allocator(); } |
176 | hasher hash_funct() const { return rep.hash_funct(); } |
177 | hasher hash_function() const { return hash_funct(); } // tr1 name |
178 | key_equal key_eq() const { return rep.key_eq(); } |
179 | |
180 | // Constructors |
181 | explicit dense_hash_set(size_type expected_max_items_in_table = 0, |
182 | const hasher& hf = hasher(), |
183 | const key_equal& eql = key_equal(), |
184 | const allocator_type& alloc = allocator_type()) |
185 | : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { |
186 | } |
187 | |
188 | template <class InputIterator> |
189 | dense_hash_set(InputIterator f, InputIterator l, |
190 | const key_type& empty_key_val, |
191 | size_type expected_max_items_in_table = 0, |
192 | const hasher& hf = hasher(), |
193 | const key_equal& eql = key_equal(), |
194 | const allocator_type& alloc = allocator_type()) |
195 | : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { |
196 | set_empty_key(empty_key_val); |
197 | rep.insert(f, l); |
198 | } |
199 | // We use the default copy constructor |
200 | // We use the default operator=() |
201 | // We use the default destructor |
202 | |
203 | void clear() { rep.clear(); } |
204 | // This clears the hash set without resizing it down to the minimum |
205 | // bucket count, but rather keeps the number of buckets constant |
206 | void clear_no_resize() { rep.clear_no_resize(); } |
207 | void swap(dense_hash_set& hs) { rep.swap(hs.rep); } |
208 | |
209 | // Functions concerning size |
210 | size_type size() const { return rep.size(); } |
211 | size_type max_size() const { return rep.max_size(); } |
212 | bool empty() const { return rep.empty(); } |
213 | size_type bucket_count() const { return rep.bucket_count(); } |
214 | size_type max_bucket_count() const { return rep.max_bucket_count(); } |
215 | |
216 | // These are tr1 methods. bucket() is the bucket the key is or would be in. |
217 | size_type bucket_size(size_type i) const { return rep.bucket_size(i); } |
218 | size_type bucket(const key_type& key) const { return rep.bucket(key); } |
219 | float load_factor() const { return size() * 1.0f / bucket_count(); } |
220 | float max_load_factor() const { |
221 | float shrink, grow; |
222 | rep.get_resizing_parameters(&shrink, &grow); |
223 | return grow; |
224 | } |
225 | void max_load_factor(float new_grow) { |
226 | float shrink, grow; |
227 | rep.get_resizing_parameters(&shrink, &grow); |
228 | rep.set_resizing_parameters(shrink, new_grow); |
229 | } |
230 | // These aren't tr1 methods but perhaps ought to be. |
231 | float min_load_factor() const { |
232 | float shrink, grow; |
233 | rep.get_resizing_parameters(&shrink, &grow); |
234 | return shrink; |
235 | } |
236 | void min_load_factor(float new_shrink) { |
237 | float shrink, grow; |
238 | rep.get_resizing_parameters(&shrink, &grow); |
239 | rep.set_resizing_parameters(new_shrink, grow); |
240 | } |
241 | // Deprecated; use min_load_factor() or max_load_factor() instead. |
242 | void set_resizing_parameters(float shrink, float grow) { |
243 | rep.set_resizing_parameters(shrink, grow); |
244 | } |
245 | |
246 | void resize(size_type hint) { rep.resize(hint); } |
247 | void rehash(size_type hint) { resize(hint); } // the tr1 name |
248 | |
249 | // Lookup routines |
250 | iterator find(const key_type& key) const { return rep.find(key); } |
251 | |
252 | template <typename K> |
253 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, iterator>::type |
254 | find(const K& key) const { return rep.find(key); } |
255 | |
256 | size_type count(const key_type& key) const { return rep.count(key); } |
257 | |
258 | template <typename K> |
259 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, size_type>::type |
260 | count(const K& key) const { return rep.count(key); } |
261 | |
262 | std::pair<iterator, iterator> equal_range(const key_type& key) const { |
263 | return rep.equal_range(key); |
264 | } |
265 | |
266 | template<typename K> |
267 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<iterator, iterator>>::type |
268 | equal_range(const K& key) const { |
269 | return rep.equal_range(key); |
270 | } |
271 | |
272 | // Insertion routines |
273 | std::pair<iterator, bool> insert(const value_type& obj) { |
274 | std::pair<typename ht::iterator, bool> p = rep.insert(obj); |
275 | return std::pair<iterator, bool>(p.first, p.second); // const to non-const |
276 | } |
277 | |
278 | std::pair<iterator, bool> insert(value_type&& obj) { |
279 | std::pair<typename ht::iterator, bool> p = rep.insert(std::move(obj)); |
280 | return std::pair<iterator, bool>(p.first, p.second); // const to non-const |
281 | } |
282 | |
283 | template <typename... Args> |
284 | std::pair<iterator, bool> emplace(Args&&... args) { |
285 | return rep.emplace(std::forward<Args>(args)...); |
286 | } |
287 | |
288 | template <typename... Args> |
289 | std::pair<iterator, bool> emplace_hint(const_iterator hint, Args&&... args) { |
290 | return rep.emplace_hint(hint, std::forward<Args>(args)...); |
291 | } |
292 | |
293 | template <class InputIterator> |
294 | void insert(InputIterator f, InputIterator l) { |
295 | rep.insert(f, l); |
296 | } |
297 | void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } |
298 | void insert(std::initializer_list<value_type> ilist) { rep.insert(ilist.begin(), ilist.end()); } |
299 | // Required for std::insert_iterator; the passed-in iterator is ignored. |
300 | iterator insert(const_iterator, const value_type& obj) { return insert(obj).first; } |
301 | iterator insert(const_iterator, value_type&& obj) { return insert(std::move(obj)).first; } |
302 | |
303 | // Deletion and empty routines |
304 | // THESE ARE NON-STANDARD! I make you specify an "impossible" key |
305 | // value to identify deleted and empty buckets. You can change the |
306 | // deleted key as time goes on, or get rid of it entirely to be insert-only. |
307 | void set_empty_key(const key_type& key) { rep.set_empty_key(key); } |
308 | key_type empty_key() const { return rep.empty_key(); } |
309 | |
310 | void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } |
311 | void clear_deleted_key() { rep.clear_deleted_key(); } |
312 | key_type deleted_key() const { return rep.deleted_key(); } |
313 | |
314 | // These are standard |
315 | size_type erase(const key_type& key) { return rep.erase(key); } |
316 | iterator erase(const_iterator it) { return rep.erase(it); } |
317 | iterator erase(const_iterator f, const_iterator l) { return rep.erase(f, l); } |
318 | |
319 | // Comparison |
320 | bool operator==(const dense_hash_set& hs) const { return rep == hs.rep; } |
321 | bool operator!=(const dense_hash_set& hs) const { return rep != hs.rep; } |
322 | |
323 | // I/O -- this is an add-on for writing metainformation to disk |
324 | // |
325 | // For maximum flexibility, this does not assume a particular |
326 | // file type (though it will probably be a FILE *). We just pass |
327 | // the fp through to rep. |
328 | |
329 | // If your keys and values are simple enough, you can pass this |
330 | // serializer to serialize()/unserialize(). "Simple enough" means |
331 | // value_type is a POD type that contains no pointers. Note, |
332 | // however, we don't try to normalize endianness. |
333 | typedef typename ht::NopointerSerializer NopointerSerializer; |
334 | |
335 | // serializer: a class providing operator()(OUTPUT*, const value_type&) |
336 | // (writing value_type to OUTPUT). You can specify a |
337 | // NopointerSerializer object if appropriate (see above). |
338 | // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a |
339 | // pointer to a class providing size_t Write(const void*, size_t), |
340 | // which writes a buffer into a stream (which fp presumably |
341 | // owns) and returns the number of bytes successfully written. |
342 | // Note basic_ostream<not_char> is not currently supported. |
343 | template <typename ValueSerializer, typename OUTPUT> |
344 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { |
345 | return rep.serialize(serializer, fp); |
346 | } |
347 | |
348 | // serializer: a functor providing operator()(INPUT*, value_type*) |
349 | // (reading from INPUT and into value_type). You can specify a |
350 | // NopointerSerializer object if appropriate (see above). |
351 | // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a |
352 | // pointer to a class providing size_t Read(void*, size_t), |
353 | // which reads into a buffer from a stream (which fp presumably |
354 | // owns) and returns the number of bytes successfully read. |
355 | // Note basic_istream<not_char> is not currently supported. |
356 | template <typename ValueSerializer, typename INPUT> |
357 | bool unserialize(ValueSerializer serializer, INPUT* fp) { |
358 | return rep.unserialize(serializer, fp); |
359 | } |
360 | }; |
361 | |
362 | template <class Val, class HashFcn, class EqualKey, class Alloc> |
363 | inline void swap(dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1, |
364 | dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) { |
365 | hs1.swap(hs2); |
366 | } |
367 | |
368 | } // namespace google |
369 | |