1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31//
32// This is just a very thin wrapper over densehashtable.h, just
33// like sgi stl's stl_hash_set is a very thin wrapper over
34// stl_hashtable. The major thing we define is operator[], because
35// we have a concept of a data_type which stl_hashtable doesn't
36// (it only has a key and a value).
37//
38// This is more different from dense_hash_map than you might think,
39// because all iterators for sets are const (you obviously can't
40// change the key, and for sets there is no value).
41//
42// NOTE: this is exactly like sparse_hash_set.h, with the word
43// "sparse" replaced by "dense", except for the addition of
44// set_empty_key().
45//
46// YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION.
47//
48// Otherwise your program will die in mysterious ways. (Note if you
49// use the constructor that takes an InputIterator range, you pass in
50// the empty key in the constructor, rather than after. As a result,
51// this constructor differs from the standard STL version.)
52//
53// In other respects, we adhere mostly to the STL semantics for
54// hash-map. One important exception is that insert() may invalidate
55// iterators entirely -- STL semantics are that insert() may reorder
56// iterators, but they all still refer to something valid in the
57// hashtable. Not so for us. Likewise, insert() may invalidate
58// pointers into the hashtable. (Whether insert invalidates iterators
59// and pointers depends on whether it results in a hashtable resize).
60// On the plus side, delete() doesn't invalidate iterators or pointers
61// at all, or even change the ordering of elements.
62//
63// Here are a few "power user" tips:
64//
65// 1) set_deleted_key():
66// If you want to use erase() you must call set_deleted_key(),
67// in addition to set_empty_key(), after construction.
68// The deleted and empty keys must differ.
69//
70// 2) resize(0):
71// When an item is deleted, its memory isn't freed right
72// away. This allows you to iterate over a hashtable,
73// and call erase(), without invalidating the iterator.
74// To force the memory to be freed, call resize(0).
75// For tr1 compatibility, this can also be called as rehash(0).
76//
77// 3) min_load_factor(0.0)
78// Setting the minimum load factor to 0.0 guarantees that
79// the hash table will never shrink.
80//
81// Roughly speaking:
82// (1) dense_hash_set: fastest, uses the most memory unless entries are small
83// (2) sparse_hash_set: slowest, uses the least memory
84// (3) hash_set / unordered_set (STL): in the middle
85//
86// Typically I use sparse_hash_set when I care about space and/or when
87// I need to save the hashtable on disk. I use hash_set otherwise. I
88// don't personally use dense_hash_set ever; some people use it for
89// small sets with lots of lookups.
90//
91// - dense_hash_set has, typically, about 78% memory overhead (if your
92// data takes up X bytes, the hash_set uses .78X more bytes in overhead).
93// - sparse_hash_set has about 4 bits overhead per entry.
94// - sparse_hash_set can be 3-7 times slower than the others for lookup and,
95// especially, inserts. See time_hash_map.cc for details.
96//
97// See /usr/(local/)?doc/sparsehash-*/dense_hash_set.html
98// for information about how to use this class.
99
100#pragma once
101
102#include <algorithm> // needed by stl_alloc
103#include <functional> // for equal_to<>, select1st<>, etc
104#include <initializer_list> // for initializer_list
105#include <memory> // for alloc
106#include <utility> // for pair<>
107#include <sparsehash/internal/densehashtable.h> // IWYU pragma: export
108#include <sparsehash/internal/libc_allocator_with_realloc.h>
109
110namespace google {
111
112template <class Value, class HashFcn = std::hash<Value>,
113 class EqualKey = std::equal_to<Value>,
114 class Alloc = libc_allocator_with_realloc<Value>>
115class dense_hash_set {
116 private:
117 // Apparently identity is not stl-standard, so we define our own
118 struct Identity {
119 typedef const Value& result_type;
120 template <typename V>
121 const Value& operator()(V&& v) const { return v; }
122 };
123 struct SetKey {
124 void operator()(Value* value, const Value& new_key) const {
125 *value = new_key;
126 }
127 void operator()(Value* value, const Value& new_key, bool) const {
128 new(value) Value(new_key);
129 }
130 };
131
132 // The actual data
133 typedef typename sparsehash_internal::key_equal_chosen<HashFcn, EqualKey>::type EqualKeyChosen;
134 typedef dense_hashtable<Value, Value, HashFcn, Identity, SetKey, EqualKeyChosen,
135 Alloc> ht;
136 ht rep;
137
138 static_assert(!sparsehash_internal::has_transparent_key_equal<HashFcn>::value
139 || std::is_same<EqualKey, std::equal_to<Value>>::value
140 || std::is_same<EqualKey, EqualKeyChosen>::value,
141 "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash");
142
143 public:
144 typedef typename ht::key_type key_type;
145 typedef typename ht::value_type value_type;
146 typedef typename ht::hasher hasher;
147 typedef typename ht::key_equal key_equal;
148 typedef Alloc allocator_type;
149
150 typedef typename ht::size_type size_type;
151 typedef typename ht::difference_type difference_type;
152 typedef typename ht::const_pointer pointer;
153 typedef typename ht::const_pointer const_pointer;
154 typedef typename ht::const_reference reference;
155 typedef typename ht::const_reference const_reference;
156
157 typedef typename ht::const_iterator iterator;
158 typedef typename ht::const_iterator const_iterator;
159 typedef typename ht::const_local_iterator local_iterator;
160 typedef typename ht::const_local_iterator const_local_iterator;
161
162 // Iterator functions -- recall all iterators are const
163 iterator begin() const { return rep.begin(); }
164 iterator end() const { return rep.end(); }
165 const_iterator cbegin() const { return rep.begin(); }
166 const_iterator cend() const { return rep.end(); }
167
168 // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements.
169 local_iterator begin(size_type i) const { return rep.begin(i); }
170 local_iterator end(size_type i) const { return rep.end(i); }
171 local_iterator cbegin(size_type i) const { return rep.begin(i); }
172 local_iterator cend(size_type i) const { return rep.end(i); }
173
174 // Accessor functions
175 allocator_type get_allocator() const { return rep.get_allocator(); }
176 hasher hash_funct() const { return rep.hash_funct(); }
177 hasher hash_function() const { return hash_funct(); } // tr1 name
178 key_equal key_eq() const { return rep.key_eq(); }
179
180 // Constructors
181 explicit dense_hash_set(size_type expected_max_items_in_table = 0,
182 const hasher& hf = hasher(),
183 const key_equal& eql = key_equal(),
184 const allocator_type& alloc = allocator_type())
185 : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) {
186 }
187
188 template <class InputIterator>
189 dense_hash_set(InputIterator f, InputIterator l,
190 const key_type& empty_key_val,
191 size_type expected_max_items_in_table = 0,
192 const hasher& hf = hasher(),
193 const key_equal& eql = key_equal(),
194 const allocator_type& alloc = allocator_type())
195 : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) {
196 set_empty_key(empty_key_val);
197 rep.insert(f, l);
198 }
199 // We use the default copy constructor
200 // We use the default operator=()
201 // We use the default destructor
202
203 void clear() { rep.clear(); }
204 // This clears the hash set without resizing it down to the minimum
205 // bucket count, but rather keeps the number of buckets constant
206 void clear_no_resize() { rep.clear_no_resize(); }
207 void swap(dense_hash_set& hs) { rep.swap(hs.rep); }
208
209 // Functions concerning size
210 size_type size() const { return rep.size(); }
211 size_type max_size() const { return rep.max_size(); }
212 bool empty() const { return rep.empty(); }
213 size_type bucket_count() const { return rep.bucket_count(); }
214 size_type max_bucket_count() const { return rep.max_bucket_count(); }
215
216 // These are tr1 methods. bucket() is the bucket the key is or would be in.
217 size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
218 size_type bucket(const key_type& key) const { return rep.bucket(key); }
219 float load_factor() const { return size() * 1.0f / bucket_count(); }
220 float max_load_factor() const {
221 float shrink, grow;
222 rep.get_resizing_parameters(&shrink, &grow);
223 return grow;
224 }
225 void max_load_factor(float new_grow) {
226 float shrink, grow;
227 rep.get_resizing_parameters(&shrink, &grow);
228 rep.set_resizing_parameters(shrink, new_grow);
229 }
230 // These aren't tr1 methods but perhaps ought to be.
231 float min_load_factor() const {
232 float shrink, grow;
233 rep.get_resizing_parameters(&shrink, &grow);
234 return shrink;
235 }
236 void min_load_factor(float new_shrink) {
237 float shrink, grow;
238 rep.get_resizing_parameters(&shrink, &grow);
239 rep.set_resizing_parameters(new_shrink, grow);
240 }
241 // Deprecated; use min_load_factor() or max_load_factor() instead.
242 void set_resizing_parameters(float shrink, float grow) {
243 rep.set_resizing_parameters(shrink, grow);
244 }
245
246 void resize(size_type hint) { rep.resize(hint); }
247 void rehash(size_type hint) { resize(hint); } // the tr1 name
248
249 // Lookup routines
250 iterator find(const key_type& key) const { return rep.find(key); }
251
252 template <typename K>
253 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, iterator>::type
254 find(const K& key) const { return rep.find(key); }
255
256 size_type count(const key_type& key) const { return rep.count(key); }
257
258 template <typename K>
259 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, size_type>::type
260 count(const K& key) const { return rep.count(key); }
261
262 std::pair<iterator, iterator> equal_range(const key_type& key) const {
263 return rep.equal_range(key);
264 }
265
266 template<typename K>
267 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<iterator, iterator>>::type
268 equal_range(const K& key) const {
269 return rep.equal_range(key);
270 }
271
272 // Insertion routines
273 std::pair<iterator, bool> insert(const value_type& obj) {
274 std::pair<typename ht::iterator, bool> p = rep.insert(obj);
275 return std::pair<iterator, bool>(p.first, p.second); // const to non-const
276 }
277
278 std::pair<iterator, bool> insert(value_type&& obj) {
279 std::pair<typename ht::iterator, bool> p = rep.insert(std::move(obj));
280 return std::pair<iterator, bool>(p.first, p.second); // const to non-const
281 }
282
283 template <typename... Args>
284 std::pair<iterator, bool> emplace(Args&&... args) {
285 return rep.emplace(std::forward<Args>(args)...);
286 }
287
288 template <typename... Args>
289 std::pair<iterator, bool> emplace_hint(const_iterator hint, Args&&... args) {
290 return rep.emplace_hint(hint, std::forward<Args>(args)...);
291 }
292
293 template <class InputIterator>
294 void insert(InputIterator f, InputIterator l) {
295 rep.insert(f, l);
296 }
297 void insert(const_iterator f, const_iterator l) { rep.insert(f, l); }
298 void insert(std::initializer_list<value_type> ilist) { rep.insert(ilist.begin(), ilist.end()); }
299 // Required for std::insert_iterator; the passed-in iterator is ignored.
300 iterator insert(const_iterator, const value_type& obj) { return insert(obj).first; }
301 iterator insert(const_iterator, value_type&& obj) { return insert(std::move(obj)).first; }
302
303 // Deletion and empty routines
304 // THESE ARE NON-STANDARD! I make you specify an "impossible" key
305 // value to identify deleted and empty buckets. You can change the
306 // deleted key as time goes on, or get rid of it entirely to be insert-only.
307 void set_empty_key(const key_type& key) { rep.set_empty_key(key); }
308 key_type empty_key() const { return rep.empty_key(); }
309
310 void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
311 void clear_deleted_key() { rep.clear_deleted_key(); }
312 key_type deleted_key() const { return rep.deleted_key(); }
313
314 // These are standard
315 size_type erase(const key_type& key) { return rep.erase(key); }
316 iterator erase(const_iterator it) { return rep.erase(it); }
317 iterator erase(const_iterator f, const_iterator l) { return rep.erase(f, l); }
318
319 // Comparison
320 bool operator==(const dense_hash_set& hs) const { return rep == hs.rep; }
321 bool operator!=(const dense_hash_set& hs) const { return rep != hs.rep; }
322
323 // I/O -- this is an add-on for writing metainformation to disk
324 //
325 // For maximum flexibility, this does not assume a particular
326 // file type (though it will probably be a FILE *). We just pass
327 // the fp through to rep.
328
329 // If your keys and values are simple enough, you can pass this
330 // serializer to serialize()/unserialize(). "Simple enough" means
331 // value_type is a POD type that contains no pointers. Note,
332 // however, we don't try to normalize endianness.
333 typedef typename ht::NopointerSerializer NopointerSerializer;
334
335 // serializer: a class providing operator()(OUTPUT*, const value_type&)
336 // (writing value_type to OUTPUT). You can specify a
337 // NopointerSerializer object if appropriate (see above).
338 // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
339 // pointer to a class providing size_t Write(const void*, size_t),
340 // which writes a buffer into a stream (which fp presumably
341 // owns) and returns the number of bytes successfully written.
342 // Note basic_ostream<not_char> is not currently supported.
343 template <typename ValueSerializer, typename OUTPUT>
344 bool serialize(ValueSerializer serializer, OUTPUT* fp) {
345 return rep.serialize(serializer, fp);
346 }
347
348 // serializer: a functor providing operator()(INPUT*, value_type*)
349 // (reading from INPUT and into value_type). You can specify a
350 // NopointerSerializer object if appropriate (see above).
351 // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
352 // pointer to a class providing size_t Read(void*, size_t),
353 // which reads into a buffer from a stream (which fp presumably
354 // owns) and returns the number of bytes successfully read.
355 // Note basic_istream<not_char> is not currently supported.
356 template <typename ValueSerializer, typename INPUT>
357 bool unserialize(ValueSerializer serializer, INPUT* fp) {
358 return rep.unserialize(serializer, fp);
359 }
360};
361
362template <class Val, class HashFcn, class EqualKey, class Alloc>
363inline void swap(dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
364 dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) {
365 hs1.swap(hs2);
366}
367
368} // namespace google
369