1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31//
32// This is just a very thin wrapper over sparsehashtable.h, just
33// like sgi stl's stl_hash_map is a very thin wrapper over
34// stl_hashtable. The major thing we define is operator[], because
35// we have a concept of a data_type which stl_hashtable doesn't
36// (it only has a key and a value).
37//
38// We adhere mostly to the STL semantics for hash-map. One important
39// exception is that insert() may invalidate iterators entirely -- STL
40// semantics are that insert() may reorder iterators, but they all
41// still refer to something valid in the hashtable. Not so for us.
42// Likewise, insert() may invalidate pointers into the hashtable.
43// (Whether insert invalidates iterators and pointers depends on
44// whether it results in a hashtable resize). On the plus side,
45// delete() doesn't invalidate iterators or pointers at all, or even
46// change the ordering of elements.
47//
48// Here are a few "power user" tips:
49//
50// 1) set_deleted_key():
51// Unlike STL's hash_map, if you want to use erase() you
52// *must* call set_deleted_key() after construction.
53//
54// 2) resize(0):
55// When an item is deleted, its memory isn't freed right
56// away. This is what allows you to iterate over a hashtable
57// and call erase() without invalidating the iterator.
58// To force the memory to be freed, call resize(0).
59// For tr1 compatibility, this can also be called as rehash(0).
60//
61// 3) min_load_factor(0.0)
62// Setting the minimum load factor to 0.0 guarantees that
63// the hash table will never shrink.
64//
65// Roughly speaking:
66// (1) dense_hash_map: fastest, uses the most memory unless entries are small
67// (2) sparse_hash_map: slowest, uses the least memory
68// (3) hash_map / unordered_map (STL): in the middle
69//
70// Typically I use sparse_hash_map when I care about space and/or when
71// I need to save the hashtable on disk. I use hash_map otherwise. I
72// don't personally use dense_hash_map ever; some people use it for
73// small maps with lots of lookups.
74//
75// - dense_hash_map has, typically, about 78% memory overhead (if your
76// data takes up X bytes, the hash_map uses .78X more bytes in overhead).
77// - sparse_hash_map has about 4 bits overhead per entry.
78// - sparse_hash_map can be 3-7 times slower than the others for lookup and,
79// especially, inserts. See time_hash_map.cc for details.
80//
81// See /usr/(local/)?doc/sparsehash-*/sparse_hash_map.html
82// for information about how to use this class.
83
84#pragma once
85
86#include <algorithm> // needed by stl_alloc
87#include <functional> // for equal_to<>, select1st<>, etc
88#include <memory> // for alloc
89#include <utility> // for pair<>
90#include <sparsehash/internal/libc_allocator_with_realloc.h>
91#include <sparsehash/internal/sparsehashtable.h> // IWYU pragma: export
92
93namespace google {
94
95template <class Key, class T, class HashFcn = std::hash<Key>,
96 class EqualKey = std::equal_to<Key>,
97 class Alloc = libc_allocator_with_realloc<std::pair<const Key, T>>>
98class sparse_hash_map {
99 private:
100 // Apparently select1st is not stl-standard, so we define our own
101 struct SelectKey {
102 typedef const Key& result_type;
103 const Key& operator()(const std::pair<const Key, T>& p) const {
104 return p.first;
105 }
106 };
107 struct SetKey {
108 void operator()(std::pair<const Key, T>* value, const Key& new_key) const {
109 *const_cast<Key*>(&value->first) = new_key;
110 // It would be nice to clear the rest of value here as well, in
111 // case it's taking up a lot of memory. We do this by clearing
112 // the value. This assumes T has a zero-arg constructor!
113 value->second = T();
114 }
115 };
116 // For operator[].
117 struct DefaultValue {
118 std::pair<const Key, T> operator()(const Key& key) {
119 return std::make_pair(key, T());
120 }
121 };
122
123 // The actual data
124 typedef typename sparsehash_internal::key_equal_chosen<HashFcn, EqualKey>::type EqualKeyChosen;
125 typedef sparse_hashtable<std::pair<const Key, T>, Key, HashFcn, SelectKey,
126 SetKey, EqualKeyChosen, Alloc> ht;
127 ht rep;
128
129 static_assert(!sparsehash_internal::has_transparent_key_equal<HashFcn>::value
130 || std::is_same<EqualKey, std::equal_to<Key>>::value
131 || std::is_same<EqualKey, EqualKeyChosen>::value,
132 "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash");
133
134 public:
135 typedef typename ht::key_type key_type;
136 typedef T data_type;
137 typedef T mapped_type;
138 typedef typename ht::value_type value_type;
139 typedef typename ht::hasher hasher;
140 typedef typename ht::key_equal key_equal;
141 typedef Alloc allocator_type;
142
143 typedef typename ht::size_type size_type;
144 typedef typename ht::difference_type difference_type;
145 typedef typename ht::pointer pointer;
146 typedef typename ht::const_pointer const_pointer;
147 typedef typename ht::reference reference;
148 typedef typename ht::const_reference const_reference;
149
150 typedef typename ht::iterator iterator;
151 typedef typename ht::const_iterator const_iterator;
152 typedef typename ht::local_iterator local_iterator;
153 typedef typename ht::const_local_iterator const_local_iterator;
154
155 // Iterator functions
156 iterator begin() { return rep.begin(); }
157 iterator end() { return rep.end(); }
158 const_iterator begin() const { return rep.begin(); }
159 const_iterator end() const { return rep.end(); }
160
161 // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements.
162 local_iterator begin(size_type i) { return rep.begin(i); }
163 local_iterator end(size_type i) { return rep.end(i); }
164 const_local_iterator begin(size_type i) const { return rep.begin(i); }
165 const_local_iterator end(size_type i) const { return rep.end(i); }
166
167 // Accessor functions
168 allocator_type get_allocator() const { return rep.get_allocator(); }
169 hasher hash_funct() const { return rep.hash_funct(); }
170 hasher hash_function() const { return hash_funct(); }
171 key_equal key_eq() const { return rep.key_eq(); }
172
173 // Constructors
174 explicit sparse_hash_map(size_type expected_max_items_in_table = 0,
175 const hasher& hf = hasher(),
176 const key_equal& eql = key_equal(),
177 const allocator_type& alloc = allocator_type())
178 : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(),
179 alloc) {}
180
181 template <class InputIterator>
182 sparse_hash_map(InputIterator f, InputIterator l,
183 size_type expected_max_items_in_table = 0,
184 const hasher& hf = hasher(),
185 const key_equal& eql = key_equal(),
186 const allocator_type& alloc = allocator_type())
187 : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(),
188 alloc) {
189 rep.insert(f, l);
190 }
191 // We use the default copy constructor
192 // We use the default operator=()
193 // We use the default destructor
194
195 void clear() { rep.clear(); }
196 void swap(sparse_hash_map& hs) { rep.swap(hs.rep); }
197
198 // Functions concerning size
199 size_type size() const { return rep.size(); }
200 size_type max_size() const { return rep.max_size(); }
201 bool empty() const { return rep.empty(); }
202 size_type bucket_count() const { return rep.bucket_count(); }
203 size_type max_bucket_count() const { return rep.max_bucket_count(); }
204
205 // These are tr1 methods. bucket() is the bucket the key is or would be in.
206 size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
207 size_type bucket(const key_type& key) const { return rep.bucket(key); }
208 float load_factor() const { return size() * 1.0f / bucket_count(); }
209 float max_load_factor() const {
210 float shrink, grow;
211 rep.get_resizing_parameters(&shrink, &grow);
212 return grow;
213 }
214 void max_load_factor(float new_grow) {
215 float shrink, grow;
216 rep.get_resizing_parameters(&shrink, &grow);
217 rep.set_resizing_parameters(shrink, new_grow);
218 }
219 // These aren't tr1 methods but perhaps ought to be.
220 float min_load_factor() const {
221 float shrink, grow;
222 rep.get_resizing_parameters(&shrink, &grow);
223 return shrink;
224 }
225 void min_load_factor(float new_shrink) {
226 float shrink, grow;
227 rep.get_resizing_parameters(&shrink, &grow);
228 rep.set_resizing_parameters(new_shrink, grow);
229 }
230 // Deprecated; use min_load_factor() or max_load_factor() instead.
231 void set_resizing_parameters(float shrink, float grow) {
232 rep.set_resizing_parameters(shrink, grow);
233 }
234
235 void resize(size_type hint) { rep.resize(hint); }
236 void rehash(size_type hint) { resize(hint); } // the tr1 name
237
238 // Lookup routines
239 iterator find(const key_type& key) { return rep.find(key); }
240 const_iterator find(const key_type& key) const { return rep.find(key); }
241
242 template <typename K>
243 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, iterator>::type
244 find(const K& key) { return rep.find(key); }
245 template <typename K>
246 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, const_iterator>::type
247 find(const K& key) const { return rep.find(key); }
248
249 data_type& operator[](const key_type& key) { // This is our value-add!
250 // If key is in the hashtable, returns find(key)->second,
251 // otherwise returns insert(value_type(key, T()).first->second.
252 // Note it does not create an empty T unless the find fails.
253 return rep.template find_or_insert<DefaultValue>(key).second;
254 }
255
256 size_type count(const key_type& key) const { return rep.count(key); }
257
258 template <typename K>
259 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, size_type>::type
260 count(const K& key) const { return rep.count(key); }
261
262 std::pair<iterator, iterator> equal_range(const key_type& key) {
263 return rep.equal_range(key);
264 }
265 std::pair<const_iterator, const_iterator> equal_range(
266 const key_type& key) const {
267 return rep.equal_range(key);
268 }
269
270 template<typename K>
271 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<iterator, iterator>>::type
272 equal_range(const K& key) {
273 return rep.equal_range(key);
274 }
275 template<typename K>
276 typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<const_iterator, const_iterator>>::type
277 equal_range(const K& key) const {
278 return rep.equal_range(key);
279 }
280
281 // Insertion routines
282 std::pair<iterator, bool> insert(const value_type& obj) {
283 return rep.insert(obj);
284 }
285 template <class InputIterator>
286 void insert(InputIterator f, InputIterator l) {
287 rep.insert(f, l);
288 }
289 void insert(const_iterator f, const_iterator l) { rep.insert(f, l); }
290 // Required for std::insert_iterator; the passed-in iterator is ignored.
291 iterator insert(iterator, const value_type& obj) { return insert(obj).first; }
292
293 // Deletion routines
294 // THESE ARE NON-STANDARD! I make you specify an "impossible" key
295 // value to identify deleted buckets. You can change the key as
296 // time goes on, or get rid of it entirely to be insert-only.
297 void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
298 void clear_deleted_key() { rep.clear_deleted_key(); }
299 key_type deleted_key() const { return rep.deleted_key(); }
300
301 // These are standard
302 size_type erase(const key_type& key) { return rep.erase(key); }
303 void erase(iterator it) { rep.erase(it); }
304 void erase(iterator f, iterator l) { rep.erase(f, l); }
305
306 // Comparison
307 bool operator==(const sparse_hash_map& hs) const { return rep == hs.rep; }
308 bool operator!=(const sparse_hash_map& hs) const { return rep != hs.rep; }
309
310 // I/O -- this is an add-on for writing metainformation to disk
311 //
312 // For maximum flexibility, this does not assume a particular
313 // file type (though it will probably be a FILE *). We just pass
314 // the fp through to rep.
315
316 // If your keys and values are simple enough, you can pass this
317 // serializer to serialize()/unserialize(). "Simple enough" means
318 // value_type is a POD type that contains no pointers. Note,
319 // however, we don't try to normalize endianness.
320 typedef typename ht::NopointerSerializer NopointerSerializer;
321
322 // serializer: a class providing operator()(OUTPUT*, const value_type&)
323 // (writing value_type to OUTPUT). You can specify a
324 // NopointerSerializer object if appropriate (see above).
325 // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
326 // pointer to a class providing size_t Write(const void*, size_t),
327 // which writes a buffer into a stream (which fp presumably
328 // owns) and returns the number of bytes successfully written.
329 // Note basic_ostream<not_char> is not currently supported.
330 template <typename ValueSerializer, typename OUTPUT>
331 bool serialize(ValueSerializer serializer, OUTPUT* fp) {
332 return rep.serialize(serializer, fp);
333 }
334
335 // serializer: a functor providing operator()(INPUT*, value_type*)
336 // (reading from INPUT and into value_type). You can specify a
337 // NopointerSerializer object if appropriate (see above).
338 // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
339 // pointer to a class providing size_t Read(void*, size_t),
340 // which reads into a buffer from a stream (which fp presumably
341 // owns) and returns the number of bytes successfully read.
342 // Note basic_istream<not_char> is not currently supported.
343 // NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer
344 // may need to do a const cast in order to fill in the key.
345 // NOTE: if Key or T are not POD types, the serializer MUST use
346 // placement-new to initialize their values, rather than a normal
347 // equals-assignment or similar. (The value_type* passed into the
348 // serializer points to garbage memory.)
349 template <typename ValueSerializer, typename INPUT>
350 bool unserialize(ValueSerializer serializer, INPUT* fp) {
351 return rep.unserialize(serializer, fp);
352 }
353
354 // The four methods below are DEPRECATED.
355 // Use serialize() and unserialize() for new code.
356 template <typename OUTPUT>
357 bool write_metadata(OUTPUT* fp) {
358 return rep.write_metadata(fp);
359 }
360
361 template <typename INPUT>
362 bool read_metadata(INPUT* fp) {
363 return rep.read_metadata(fp);
364 }
365
366 template <typename OUTPUT>
367 bool write_nopointer_data(OUTPUT* fp) {
368 return rep.write_nopointer_data(fp);
369 }
370
371 template <typename INPUT>
372 bool read_nopointer_data(INPUT* fp) {
373 return rep.read_nopointer_data(fp);
374 }
375};
376
377// We need a global swap as well
378template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
379inline void swap(sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
380 sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2) {
381 hm1.swap(hm2);
382}
383
384} // namespace google
385