1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ----
31//
32// This is just a very thin wrapper over densehashtable.h, just
33// like sgi stl's stl_hash_map is a very thin wrapper over
34// stl_hashtable. The major thing we define is operator[], because
35// we have a concept of a data_type which stl_hashtable doesn't
36// (it only has a key and a value).
37//
38// NOTE: this is exactly like sparse_hash_map.h, with the word
39// "sparse" replaced by "dense", except for the addition of
40// set_empty_key().
41//
42// YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION.
43//
44// Otherwise your program will die in mysterious ways. (Note if you
45// use the constructor that takes an InputIterator range, you pass in
46// the empty key in the constructor, rather than after. As a result,
47// this constructor differs from the standard STL version.)
48//
49// In other respects, we adhere mostly to the STL semantics for
50// hash-map. One important exception is that insert() may invalidate
51// iterators entirely -- STL semantics are that insert() may reorder
52// iterators, but they all still refer to something valid in the
53// hashtable. Not so for us. Likewise, insert() may invalidate
54// pointers into the hashtable. (Whether insert invalidates iterators
55// and pointers depends on whether it results in a hashtable resize).
56// On the plus side, delete() doesn't invalidate iterators or pointers
57// at all, or even change the ordering of elements.
58//
59// Here are a few "power user" tips:
60//
61// 1) set_deleted_key():
62// If you want to use erase() you *must* call set_deleted_key(),
63// in addition to set_empty_key(), after construction.
64// The deleted and empty keys must differ.
65//
66// 2) resize(0):
67// When an item is deleted, its memory isn't freed right
68// away. This allows you to iterate over a hashtable,
69// and call erase(), without invalidating the iterator.
70// To force the memory to be freed, call resize(0).
71// For tr1 compatibility, this can also be called as rehash(0).
72//
73// 3) min_load_factor(0.0)
74// Setting the minimum load factor to 0.0 guarantees that
75// the hash table will never shrink.
76//
77// Roughly speaking:
78// (1) dense_hash_map: fastest, uses the most memory unless entries are small
79// (2) sparse_hash_map: slowest, uses the least memory
80// (3) hash_map / unordered_map (STL): in the middle
81//
82// Typically I use sparse_hash_map when I care about space and/or when
83// I need to save the hashtable on disk. I use hash_map otherwise. I
84// don't personally use dense_hash_set ever; some people use it for
85// small sets with lots of lookups.
86//
87// - dense_hash_map has, typically, about 78% memory overhead (if your
88// data takes up X bytes, the hash_map uses .78X more bytes in overhead).
89// - sparse_hash_map has about 4 bits overhead per entry.
90// - sparse_hash_map can be 3-7 times slower than the others for lookup and,
91// especially, inserts. See time_hash_map.cc for details.
92//
93// See /usr/(local/)?doc/sparsehash-*/dense_hash_map.html
94// for information about how to use this class.
95
96#pragma once
97
98#include <algorithm> // needed by stl_alloc
99#include <functional> // for equal_to<>, select1st<>, etc
100#include <initializer_list> // for initializer_list
101#include <memory> // for alloc
102#include <utility> // for pair<>
103#include <tuple> // forward_as_tuple
104#include <type_traits> // for enable_if, is_constructible, etc
105#include <sparsehash/internal/densehashtable.h> // IWYU pragma: export
106#include <sparsehash/internal/libc_allocator_with_realloc.h>
107
108namespace google {
109
110template <class Key, class T, class HashFcn = std::hash<Key>,
111 class EqualKey = std::equal_to<Key>,
112 class Alloc = libc_allocator_with_realloc<std::pair<const Key, T>>>
113class dense_hash_map {
114 private:
115 // Apparently select1st is not stl-standard, so we define our own
116 struct SelectKey {
117 typedef const Key& result_type;
118 template <typename Pair>
119 const Key& operator()(Pair&& p) const {
120 return p.first;
121 }
122 };
123 struct SetKey {
124 void operator()(std::pair<const Key, T>* value, const Key& new_key) const {
125 using NCKey = typename std::remove_cv<Key>::type;
126 *const_cast<NCKey*>(&value->first) = new_key;
127
128 // It would be nice to clear the rest of value here as well, in
129 // case it's taking up a lot of memory. We do this by clearing
130 // the value. This assumes T has a zero-arg constructor!
131 value->second = T();
132 }
133 void operator()(std::pair<const Key, T>* value, const Key& new_key, bool) const {
134 new(value) std::pair<const Key, T>(std::piecewise_construct, std::forward_as_tuple(new_key), std::forward_as_tuple());
135 }
136 };
137
138 // The actual data
139 typedef typename sparsehash_internal::key_equal_chosen<HashFcn, EqualKey>::type EqualKeyChosen;
140 typedef dense_hashtable<std::pair<const Key, T>, Key, HashFcn, SelectKey,
141 SetKey, EqualKeyChosen, Alloc> ht;
142 ht rep;
143
144 static_assert(!sparsehash_internal::has_transparent_key_equal<HashFcn>::value
145 || std::is_same<EqualKey, std::equal_to<Key>>::value
146 || std::is_same<EqualKey, EqualKeyChosen>::value,
147 "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash");
148
149 public:
150 typedef typename ht::key_type key_type;
151 typedef T data_type;
152 typedef T mapped_type;
153 typedef typename ht::value_type value_type;
154 typedef typename ht::hasher hasher;
155 typedef typename ht::key_equal key_equal;
156 typedef Alloc allocator_type;
157
158 typedef typename ht::size_type size_type;
159 typedef typename ht::difference_type difference_type;
160 typedef typename ht::pointer pointer;
161 typedef typename ht::const_pointer const_pointer;
162 typedef typename ht::reference reference;
163 typedef typename ht::const_reference const_reference;
164
165 typedef typename ht::iterator iterator;
166 typedef typename ht::const_iterator const_iterator;
167 typedef typename ht::local_iterator local_iterator;
168 typedef typename ht::const_local_iterator const_local_iterator;
169
170 // Iterator functions
171 iterator begin() { return rep.begin(); }
172 iterator end() { return rep.end(); }
173 const_iterator begin() const { return rep.begin(); }
174 const_iterator end() const { return rep.end(); }
175 const_iterator cbegin() const { return rep.begin(); }
176 const_iterator cend() const { return rep.end(); }
177
178 // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements.
179 local_iterator begin(size_type i) { return rep.begin(i); }
180 local_iterator end(size_type i) { return rep.end(i); }
181 const_local_iterator begin(size_type i) const { return rep.begin(i); }
182 const_local_iterator end(size_type i) const { return rep.end(i); }
183 const_local_iterator cbegin(size_type i) const { return rep.begin(i); }
184 const_local_iterator cend(size_type i) const { return rep.end(i); }
185
186 // Accessor functions
187 allocator_type get_allocator() const { return rep.get_allocator(); }
188 hasher hash_funct() const { return rep.hash_funct(); }
189 hasher hash_function() const { return hash_funct(); }
190 key_equal key_eq() const { return rep.key_eq(); }
191
192 // Constructors
193 explicit dense_hash_map(size_type expected_max_items_in_table = 0,
194 const hasher& hf = hasher(),
195 const key_equal& eql = key_equal(),
196 const allocator_type& alloc = allocator_type())
197 : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(),
198 alloc) {}
199
200 template <class InputIterator>
201 dense_hash_map(InputIterator f, InputIterator l,
202 const key_type& empty_key_val,
203 size_type expected_max_items_in_table = 0,
204 const hasher& hf = hasher(),
205 const key_equal& eql = key_equal(),
206 const allocator_type& alloc = allocator_type())
207 : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(),
208 alloc) {
209 set_empty_key(empty_key_val);
210 rep.insert(f, l);
211 }
212 // We use the default copy constructor
213 // We use the default operator=()
214 // We use the default destructor
215
216 void clear() { rep.clear(); }
217 // This clears the hash map without resizing it down to the minimum
218 // bucket count, but rather keeps the number of buckets constant
219 void clear_no_resize() { rep.clear_no_resize(); }
220 void swap(dense_hash_map& hs) { rep.swap(hs.rep); }
221
222 // Functions concerning size
223 size_type size() const { return rep.size(); }
224 size_type max_size() const { return rep.max_size(); }
225 bool empty() const { return rep.empty(); }
226 size_type bucket_count() const { return rep.bucket_count(); }
227 size_type max_bucket_count() const { return rep.max_bucket_count(); }
228
229 // These are tr1 methods. bucket() is the bucket the key is or would be in.
230 size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
231 size_type bucket(const key_type& key) const { return rep.bucket(key); }
232 float load_factor() const { return size() * 1.0f / bucket_count(); }
233 float max_load_factor() const {
234 float shrink, grow;
235 rep.get_resizing_parameters(&shrink, &grow);
236 return grow;
237 }
238 void max_load_factor(float new_grow) {
239 float shrink, grow;
240 rep.get_resizing_parameters(&shrink, &grow);
241 rep.set_resizing_parameters(shrink, new_grow);
242 }
243 // These aren't tr1 methods but perhaps ought to be.
244 float min_load_factor() const {
245 float shrink, grow;
246 rep.get_resizing_parameters(&shrink, &grow);
247 return shrink;
248 }
249 void min_load_factor(float new_shrink) {
250 float shrink, grow;
251 rep.get_resizing_parameters(&shrink, &grow);
252 rep.set_resizing_parameters(new_shrink, grow);
253 }
254 // Deprecated; use min_load_factor() or max_load_factor() instead.
255 void set_resizing_parameters(float shrink, float grow) {
256 rep.set_resizing_parameters(shrink, grow);
257 }
258
259 void resize(size_type hint) { rep.resize(hint); }
260 void rehash(size_type hint) { resize(hint); } // the tr1 name
261
262 // Lookup routines
263 iterator find(const key_type& key) { return rep.find(key); }
264 const_iterator find(const key_type& key) const { return rep.find(key); }
265
266 template <typename K>
267 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, iterator>::type
268 find(const K& key) { return rep.find(key); }
269 template <typename K>
270 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, const_iterator>::type
271 find(const K& key) const { return rep.find(key); }
272
273 data_type& operator[](const key_type& key) { // This is our value-add!
274 // If key is in the hashtable, returns find(key)->second,
275 // otherwise returns insert(value_type(key, T()).first->second.
276 // Note it does not create an empty T unless the find fails.
277 return rep.template find_or_insert<data_type>(key).second;
278 }
279
280 data_type& operator[](key_type&& key) {
281 return rep.template find_or_insert<data_type>(std::move(key)).second;
282 }
283
284 size_type count(const key_type& key) const { return rep.count(key); }
285
286 template <typename K>
287 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, size_type>::type
288 count(const K& key) const { return rep.count(key); }
289
290 std::pair<iterator, iterator> equal_range(const key_type& key) {
291 return rep.equal_range(key);
292 }
293 std::pair<const_iterator, const_iterator> equal_range(
294 const key_type& key) const {
295 return rep.equal_range(key);
296 }
297
298 template<typename K>
299 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<iterator, iterator>>::type
300 equal_range(const K& key) {
301 return rep.equal_range(key);
302 }
303 template<typename K>
304 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<const_iterator, const_iterator>>::type
305 equal_range(const K& key) const {
306 return rep.equal_range(key);
307 }
308
309 // Insertion routines
310 std::pair<iterator, bool> insert(const value_type& obj) {
311 return rep.insert(obj);
312 }
313
314 template <typename Pair, typename = typename std::enable_if<std::is_constructible<value_type, Pair&&>::value>::type>
315 std::pair<iterator, bool> insert(Pair&& obj) {
316 return rep.insert(std::forward<Pair>(obj));
317 }
318
319 // overload to allow {} syntax: .insert( { {key}, {args} } )
320 std::pair<iterator, bool> insert(value_type&& obj) {
321 return rep.insert(std::move(obj));
322 }
323
324 template <typename... Args>
325 std::pair<iterator, bool> emplace(Args&&... args) {
326 return rep.emplace(std::forward<Args>(args)...);
327 }
328
329 template <typename... Args>
330 std::pair<iterator, bool> emplace_hint(const_iterator hint, Args&&... args) {
331 return rep.emplace_hint(hint, std::forward<Args>(args)...);
332 }
333
334
335 template <class InputIterator>
336 void insert(InputIterator f, InputIterator l) {
337 rep.insert(f, l);
338 }
339 void insert(const_iterator f, const_iterator l) { rep.insert(f, l); }
340 void insert(std::initializer_list<value_type> ilist) { rep.insert(ilist.begin(), ilist.end()); }
341 // Required for std::insert_iterator; the passed-in iterator is ignored.
342 iterator insert(const_iterator, const value_type& obj) { return insert(obj).first; }
343 iterator insert(const_iterator, value_type&& obj) { return insert(std::move(obj)).first; }
344 template <class P, class = typename std::enable_if<
345 std::is_constructible<value_type, P&&>::value &&
346 !std::is_same<value_type, P>::value
347 >::type>
348 iterator insert(const_iterator, P&& obj) { return insert(std::forward<P>(obj)).first; }
349
350 // Deletion and empty routines
351 // THESE ARE NON-STANDARD! I make you specify an "impossible" key
352 // value to identify deleted and empty buckets. You can change the
353 // deleted key as time goes on, or get rid of it entirely to be insert-only.
354 // YOU MUST CALL THIS!
355 void set_empty_key(const key_type& key) { rep.set_empty_key(key); }
356 key_type empty_key() const { return rep.empty_key(); }
357
358 void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
359 void clear_deleted_key() { rep.clear_deleted_key(); }
360 key_type deleted_key() const { return rep.deleted_key(); }
361
362 // These are standard
363 size_type erase(const key_type& key) { return rep.erase(key); }
364 iterator erase(const_iterator it) { return rep.erase(it); }
365 iterator erase(const_iterator f, const_iterator l) { return rep.erase(f, l); }
366
367 // Comparison
368 bool operator==(const dense_hash_map& hs) const { return rep == hs.rep; }
369 bool operator!=(const dense_hash_map& hs) const { return rep != hs.rep; }
370
371 // I/O -- this is an add-on for writing hash map to disk
372 //
373 // For maximum flexibility, this does not assume a particular
374 // file type (though it will probably be a FILE *). We just pass
375 // the fp through to rep.
376
377 // If your keys and values are simple enough, you can pass this
378 // serializer to serialize()/unserialize(). "Simple enough" means
379 // value_type is a POD type that contains no pointers. Note,
380 // however, we don't try to normalize endianness.
381 typedef typename ht::NopointerSerializer NopointerSerializer;
382
383 // serializer: a class providing operator()(OUTPUT*, const value_type&)
384 // (writing value_type to OUTPUT). You can specify a
385 // NopointerSerializer object if appropriate (see above).
386 // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
387 // pointer to a class providing size_t Write(const void*, size_t),
388 // which writes a buffer into a stream (which fp presumably
389 // owns) and returns the number of bytes successfully written.
390 // Note basic_ostream<not_char> is not currently supported.
391 template <typename ValueSerializer, typename OUTPUT>
392 bool serialize(ValueSerializer serializer, OUTPUT* fp) {
393 return rep.serialize(serializer, fp);
394 }
395
396 // serializer: a functor providing operator()(INPUT*, value_type*)
397 // (reading from INPUT and into value_type). You can specify a
398 // NopointerSerializer object if appropriate (see above).
399 // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
400 // pointer to a class providing size_t Read(void*, size_t),
401 // which reads into a buffer from a stream (which fp presumably
402 // owns) and returns the number of bytes successfully read.
403 // Note basic_istream<not_char> is not currently supported.
404 // NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer
405 // may need to do a const cast in order to fill in the key.
406 template <typename ValueSerializer, typename INPUT>
407 bool unserialize(ValueSerializer serializer, INPUT* fp) {
408 return rep.unserialize(serializer, fp);
409 }
410};
411
412// We need a global swap as well
413template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
414inline void swap(dense_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
415 dense_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2) {
416 hm1.swap(hm2);
417}
418
419} // namespace google
420