1 | // Copyright (c) 2005, Google Inc. |
2 | // All rights reserved. |
3 | // |
4 | // Redistribution and use in source and binary forms, with or without |
5 | // modification, are permitted provided that the following conditions are |
6 | // met: |
7 | // |
8 | // * Redistributions of source code must retain the above copyright |
9 | // notice, this list of conditions and the following disclaimer. |
10 | // * Redistributions in binary form must reproduce the above |
11 | // copyright notice, this list of conditions and the following disclaimer |
12 | // in the documentation and/or other materials provided with the |
13 | // distribution. |
14 | // * Neither the name of Google Inc. nor the names of its |
15 | // contributors may be used to endorse or promote products derived from |
16 | // this software without specific prior written permission. |
17 | // |
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | |
30 | // ---- |
31 | // |
32 | // This is just a very thin wrapper over densehashtable.h, just |
33 | // like sgi stl's stl_hash_map is a very thin wrapper over |
34 | // stl_hashtable. The major thing we define is operator[], because |
35 | // we have a concept of a data_type which stl_hashtable doesn't |
36 | // (it only has a key and a value). |
37 | // |
38 | // NOTE: this is exactly like sparse_hash_map.h, with the word |
39 | // "sparse" replaced by "dense", except for the addition of |
40 | // set_empty_key(). |
41 | // |
42 | // YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION. |
43 | // |
44 | // Otherwise your program will die in mysterious ways. (Note if you |
45 | // use the constructor that takes an InputIterator range, you pass in |
46 | // the empty key in the constructor, rather than after. As a result, |
47 | // this constructor differs from the standard STL version.) |
48 | // |
49 | // In other respects, we adhere mostly to the STL semantics for |
50 | // hash-map. One important exception is that insert() may invalidate |
51 | // iterators entirely -- STL semantics are that insert() may reorder |
52 | // iterators, but they all still refer to something valid in the |
53 | // hashtable. Not so for us. Likewise, insert() may invalidate |
54 | // pointers into the hashtable. (Whether insert invalidates iterators |
55 | // and pointers depends on whether it results in a hashtable resize). |
56 | // On the plus side, delete() doesn't invalidate iterators or pointers |
57 | // at all, or even change the ordering of elements. |
58 | // |
59 | // Here are a few "power user" tips: |
60 | // |
61 | // 1) set_deleted_key(): |
62 | // If you want to use erase() you *must* call set_deleted_key(), |
63 | // in addition to set_empty_key(), after construction. |
64 | // The deleted and empty keys must differ. |
65 | // |
66 | // 2) resize(0): |
67 | // When an item is deleted, its memory isn't freed right |
68 | // away. This allows you to iterate over a hashtable, |
69 | // and call erase(), without invalidating the iterator. |
70 | // To force the memory to be freed, call resize(0). |
71 | // For tr1 compatibility, this can also be called as rehash(0). |
72 | // |
73 | // 3) min_load_factor(0.0) |
74 | // Setting the minimum load factor to 0.0 guarantees that |
75 | // the hash table will never shrink. |
76 | // |
77 | // Roughly speaking: |
78 | // (1) dense_hash_map: fastest, uses the most memory unless entries are small |
79 | // (2) sparse_hash_map: slowest, uses the least memory |
80 | // (3) hash_map / unordered_map (STL): in the middle |
81 | // |
82 | // Typically I use sparse_hash_map when I care about space and/or when |
83 | // I need to save the hashtable on disk. I use hash_map otherwise. I |
84 | // don't personally use dense_hash_set ever; some people use it for |
85 | // small sets with lots of lookups. |
86 | // |
87 | // - dense_hash_map has, typically, about 78% memory overhead (if your |
88 | // data takes up X bytes, the hash_map uses .78X more bytes in overhead). |
89 | // - sparse_hash_map has about 4 bits overhead per entry. |
90 | // - sparse_hash_map can be 3-7 times slower than the others for lookup and, |
91 | // especially, inserts. See time_hash_map.cc for details. |
92 | // |
93 | // See /usr/(local/)?doc/sparsehash-*/dense_hash_map.html |
94 | // for information about how to use this class. |
95 | |
96 | #pragma once |
97 | |
98 | #include <algorithm> // needed by stl_alloc |
99 | #include <functional> // for equal_to<>, select1st<>, etc |
100 | #include <initializer_list> // for initializer_list |
101 | #include <memory> // for alloc |
102 | #include <utility> // for pair<> |
103 | #include <tuple> // forward_as_tuple |
104 | #include <type_traits> // for enable_if, is_constructible, etc |
105 | #include <sparsehash/internal/densehashtable.h> // IWYU pragma: export |
106 | #include <sparsehash/internal/libc_allocator_with_realloc.h> |
107 | |
108 | namespace google { |
109 | |
110 | template <class Key, class T, class HashFcn = std::hash<Key>, |
111 | class EqualKey = std::equal_to<Key>, |
112 | class Alloc = libc_allocator_with_realloc<std::pair<const Key, T>>> |
113 | class dense_hash_map { |
114 | private: |
115 | // Apparently select1st is not stl-standard, so we define our own |
116 | struct SelectKey { |
117 | typedef const Key& result_type; |
118 | template <typename Pair> |
119 | const Key& operator()(Pair&& p) const { |
120 | return p.first; |
121 | } |
122 | }; |
123 | struct SetKey { |
124 | void operator()(std::pair<const Key, T>* value, const Key& new_key) const { |
125 | using NCKey = typename std::remove_cv<Key>::type; |
126 | *const_cast<NCKey*>(&value->first) = new_key; |
127 | |
128 | // It would be nice to clear the rest of value here as well, in |
129 | // case it's taking up a lot of memory. We do this by clearing |
130 | // the value. This assumes T has a zero-arg constructor! |
131 | value->second = T(); |
132 | } |
133 | void operator()(std::pair<const Key, T>* value, const Key& new_key, bool) const { |
134 | new(value) std::pair<const Key, T>(std::piecewise_construct, std::forward_as_tuple(new_key), std::forward_as_tuple()); |
135 | } |
136 | }; |
137 | |
138 | // The actual data |
139 | typedef typename sparsehash_internal::key_equal_chosen<HashFcn, EqualKey>::type EqualKeyChosen; |
140 | typedef dense_hashtable<std::pair<const Key, T>, Key, HashFcn, SelectKey, |
141 | SetKey, EqualKeyChosen, Alloc> ht; |
142 | ht rep; |
143 | |
144 | static_assert(!sparsehash_internal::has_transparent_key_equal<HashFcn>::value |
145 | || std::is_same<EqualKey, std::equal_to<Key>>::value |
146 | || std::is_same<EqualKey, EqualKeyChosen>::value, |
147 | "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash" ); |
148 | |
149 | public: |
150 | typedef typename ht::key_type key_type; |
151 | typedef T data_type; |
152 | typedef T mapped_type; |
153 | typedef typename ht::value_type value_type; |
154 | typedef typename ht::hasher hasher; |
155 | typedef typename ht::key_equal key_equal; |
156 | typedef Alloc allocator_type; |
157 | |
158 | typedef typename ht::size_type size_type; |
159 | typedef typename ht::difference_type difference_type; |
160 | typedef typename ht::pointer pointer; |
161 | typedef typename ht::const_pointer const_pointer; |
162 | typedef typename ht::reference reference; |
163 | typedef typename ht::const_reference const_reference; |
164 | |
165 | typedef typename ht::iterator iterator; |
166 | typedef typename ht::const_iterator const_iterator; |
167 | typedef typename ht::local_iterator local_iterator; |
168 | typedef typename ht::const_local_iterator const_local_iterator; |
169 | |
170 | // Iterator functions |
171 | iterator begin() { return rep.begin(); } |
172 | iterator end() { return rep.end(); } |
173 | const_iterator begin() const { return rep.begin(); } |
174 | const_iterator end() const { return rep.end(); } |
175 | const_iterator cbegin() const { return rep.begin(); } |
176 | const_iterator cend() const { return rep.end(); } |
177 | |
178 | // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements. |
179 | local_iterator begin(size_type i) { return rep.begin(i); } |
180 | local_iterator end(size_type i) { return rep.end(i); } |
181 | const_local_iterator begin(size_type i) const { return rep.begin(i); } |
182 | const_local_iterator end(size_type i) const { return rep.end(i); } |
183 | const_local_iterator cbegin(size_type i) const { return rep.begin(i); } |
184 | const_local_iterator cend(size_type i) const { return rep.end(i); } |
185 | |
186 | // Accessor functions |
187 | allocator_type get_allocator() const { return rep.get_allocator(); } |
188 | hasher hash_funct() const { return rep.hash_funct(); } |
189 | hasher hash_function() const { return hash_funct(); } |
190 | key_equal key_eq() const { return rep.key_eq(); } |
191 | |
192 | // Constructors |
193 | explicit dense_hash_map(size_type expected_max_items_in_table = 0, |
194 | const hasher& hf = hasher(), |
195 | const key_equal& eql = key_equal(), |
196 | const allocator_type& alloc = allocator_type()) |
197 | : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), |
198 | alloc) {} |
199 | |
200 | template <class InputIterator> |
201 | dense_hash_map(InputIterator f, InputIterator l, |
202 | const key_type& empty_key_val, |
203 | size_type expected_max_items_in_table = 0, |
204 | const hasher& hf = hasher(), |
205 | const key_equal& eql = key_equal(), |
206 | const allocator_type& alloc = allocator_type()) |
207 | : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), |
208 | alloc) { |
209 | set_empty_key(empty_key_val); |
210 | rep.insert(f, l); |
211 | } |
212 | // We use the default copy constructor |
213 | // We use the default operator=() |
214 | // We use the default destructor |
215 | |
216 | void clear() { rep.clear(); } |
217 | // This clears the hash map without resizing it down to the minimum |
218 | // bucket count, but rather keeps the number of buckets constant |
219 | void clear_no_resize() { rep.clear_no_resize(); } |
220 | void swap(dense_hash_map& hs) { rep.swap(hs.rep); } |
221 | |
222 | // Functions concerning size |
223 | size_type size() const { return rep.size(); } |
224 | size_type max_size() const { return rep.max_size(); } |
225 | bool empty() const { return rep.empty(); } |
226 | size_type bucket_count() const { return rep.bucket_count(); } |
227 | size_type max_bucket_count() const { return rep.max_bucket_count(); } |
228 | |
229 | // These are tr1 methods. bucket() is the bucket the key is or would be in. |
230 | size_type bucket_size(size_type i) const { return rep.bucket_size(i); } |
231 | size_type bucket(const key_type& key) const { return rep.bucket(key); } |
232 | float load_factor() const { return size() * 1.0f / bucket_count(); } |
233 | float max_load_factor() const { |
234 | float shrink, grow; |
235 | rep.get_resizing_parameters(&shrink, &grow); |
236 | return grow; |
237 | } |
238 | void max_load_factor(float new_grow) { |
239 | float shrink, grow; |
240 | rep.get_resizing_parameters(&shrink, &grow); |
241 | rep.set_resizing_parameters(shrink, new_grow); |
242 | } |
243 | // These aren't tr1 methods but perhaps ought to be. |
244 | float min_load_factor() const { |
245 | float shrink, grow; |
246 | rep.get_resizing_parameters(&shrink, &grow); |
247 | return shrink; |
248 | } |
249 | void min_load_factor(float new_shrink) { |
250 | float shrink, grow; |
251 | rep.get_resizing_parameters(&shrink, &grow); |
252 | rep.set_resizing_parameters(new_shrink, grow); |
253 | } |
254 | // Deprecated; use min_load_factor() or max_load_factor() instead. |
255 | void set_resizing_parameters(float shrink, float grow) { |
256 | rep.set_resizing_parameters(shrink, grow); |
257 | } |
258 | |
259 | void resize(size_type hint) { rep.resize(hint); } |
260 | void rehash(size_type hint) { resize(hint); } // the tr1 name |
261 | |
262 | // Lookup routines |
263 | iterator find(const key_type& key) { return rep.find(key); } |
264 | const_iterator find(const key_type& key) const { return rep.find(key); } |
265 | |
266 | template <typename K> |
267 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, iterator>::type |
268 | find(const K& key) { return rep.find(key); } |
269 | template <typename K> |
270 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, const_iterator>::type |
271 | find(const K& key) const { return rep.find(key); } |
272 | |
273 | data_type& operator[](const key_type& key) { // This is our value-add! |
274 | // If key is in the hashtable, returns find(key)->second, |
275 | // otherwise returns insert(value_type(key, T()).first->second. |
276 | // Note it does not create an empty T unless the find fails. |
277 | return rep.template find_or_insert<data_type>(key).second; |
278 | } |
279 | |
280 | data_type& operator[](key_type&& key) { |
281 | return rep.template find_or_insert<data_type>(std::move(key)).second; |
282 | } |
283 | |
284 | size_type count(const key_type& key) const { return rep.count(key); } |
285 | |
286 | template <typename K> |
287 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, size_type>::type |
288 | count(const K& key) const { return rep.count(key); } |
289 | |
290 | std::pair<iterator, iterator> equal_range(const key_type& key) { |
291 | return rep.equal_range(key); |
292 | } |
293 | std::pair<const_iterator, const_iterator> equal_range( |
294 | const key_type& key) const { |
295 | return rep.equal_range(key); |
296 | } |
297 | |
298 | template<typename K> |
299 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<iterator, iterator>>::type |
300 | equal_range(const K& key) { |
301 | return rep.equal_range(key); |
302 | } |
303 | template<typename K> |
304 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<const_iterator, const_iterator>>::type |
305 | equal_range(const K& key) const { |
306 | return rep.equal_range(key); |
307 | } |
308 | |
309 | // Insertion routines |
310 | std::pair<iterator, bool> insert(const value_type& obj) { |
311 | return rep.insert(obj); |
312 | } |
313 | |
314 | template <typename Pair, typename = typename std::enable_if<std::is_constructible<value_type, Pair&&>::value>::type> |
315 | std::pair<iterator, bool> insert(Pair&& obj) { |
316 | return rep.insert(std::forward<Pair>(obj)); |
317 | } |
318 | |
319 | // overload to allow {} syntax: .insert( { {key}, {args} } ) |
320 | std::pair<iterator, bool> insert(value_type&& obj) { |
321 | return rep.insert(std::move(obj)); |
322 | } |
323 | |
324 | template <typename... Args> |
325 | std::pair<iterator, bool> emplace(Args&&... args) { |
326 | return rep.emplace(std::forward<Args>(args)...); |
327 | } |
328 | |
329 | template <typename... Args> |
330 | std::pair<iterator, bool> emplace_hint(const_iterator hint, Args&&... args) { |
331 | return rep.emplace_hint(hint, std::forward<Args>(args)...); |
332 | } |
333 | |
334 | |
335 | template <class InputIterator> |
336 | void insert(InputIterator f, InputIterator l) { |
337 | rep.insert(f, l); |
338 | } |
339 | void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } |
340 | void insert(std::initializer_list<value_type> ilist) { rep.insert(ilist.begin(), ilist.end()); } |
341 | // Required for std::insert_iterator; the passed-in iterator is ignored. |
342 | iterator insert(const_iterator, const value_type& obj) { return insert(obj).first; } |
343 | iterator insert(const_iterator, value_type&& obj) { return insert(std::move(obj)).first; } |
344 | template <class P, class = typename std::enable_if< |
345 | std::is_constructible<value_type, P&&>::value && |
346 | !std::is_same<value_type, P>::value |
347 | >::type> |
348 | iterator insert(const_iterator, P&& obj) { return insert(std::forward<P>(obj)).first; } |
349 | |
350 | // Deletion and empty routines |
351 | // THESE ARE NON-STANDARD! I make you specify an "impossible" key |
352 | // value to identify deleted and empty buckets. You can change the |
353 | // deleted key as time goes on, or get rid of it entirely to be insert-only. |
354 | // YOU MUST CALL THIS! |
355 | void set_empty_key(const key_type& key) { rep.set_empty_key(key); } |
356 | key_type empty_key() const { return rep.empty_key(); } |
357 | |
358 | void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } |
359 | void clear_deleted_key() { rep.clear_deleted_key(); } |
360 | key_type deleted_key() const { return rep.deleted_key(); } |
361 | |
362 | // These are standard |
363 | size_type erase(const key_type& key) { return rep.erase(key); } |
364 | iterator erase(const_iterator it) { return rep.erase(it); } |
365 | iterator erase(const_iterator f, const_iterator l) { return rep.erase(f, l); } |
366 | |
367 | // Comparison |
368 | bool operator==(const dense_hash_map& hs) const { return rep == hs.rep; } |
369 | bool operator!=(const dense_hash_map& hs) const { return rep != hs.rep; } |
370 | |
371 | // I/O -- this is an add-on for writing hash map to disk |
372 | // |
373 | // For maximum flexibility, this does not assume a particular |
374 | // file type (though it will probably be a FILE *). We just pass |
375 | // the fp through to rep. |
376 | |
377 | // If your keys and values are simple enough, you can pass this |
378 | // serializer to serialize()/unserialize(). "Simple enough" means |
379 | // value_type is a POD type that contains no pointers. Note, |
380 | // however, we don't try to normalize endianness. |
381 | typedef typename ht::NopointerSerializer NopointerSerializer; |
382 | |
383 | // serializer: a class providing operator()(OUTPUT*, const value_type&) |
384 | // (writing value_type to OUTPUT). You can specify a |
385 | // NopointerSerializer object if appropriate (see above). |
386 | // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a |
387 | // pointer to a class providing size_t Write(const void*, size_t), |
388 | // which writes a buffer into a stream (which fp presumably |
389 | // owns) and returns the number of bytes successfully written. |
390 | // Note basic_ostream<not_char> is not currently supported. |
391 | template <typename ValueSerializer, typename OUTPUT> |
392 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { |
393 | return rep.serialize(serializer, fp); |
394 | } |
395 | |
396 | // serializer: a functor providing operator()(INPUT*, value_type*) |
397 | // (reading from INPUT and into value_type). You can specify a |
398 | // NopointerSerializer object if appropriate (see above). |
399 | // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a |
400 | // pointer to a class providing size_t Read(void*, size_t), |
401 | // which reads into a buffer from a stream (which fp presumably |
402 | // owns) and returns the number of bytes successfully read. |
403 | // Note basic_istream<not_char> is not currently supported. |
404 | // NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer |
405 | // may need to do a const cast in order to fill in the key. |
406 | template <typename ValueSerializer, typename INPUT> |
407 | bool unserialize(ValueSerializer serializer, INPUT* fp) { |
408 | return rep.unserialize(serializer, fp); |
409 | } |
410 | }; |
411 | |
412 | // We need a global swap as well |
413 | template <class Key, class T, class HashFcn, class EqualKey, class Alloc> |
414 | inline void swap(dense_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1, |
415 | dense_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2) { |
416 | hm1.swap(hm2); |
417 | } |
418 | |
419 | } // namespace google |
420 | |