1 | // Copyright (c) 2005, Google Inc. |
2 | // All rights reserved. |
3 | // |
4 | // Redistribution and use in source and binary forms, with or without |
5 | // modification, are permitted provided that the following conditions are |
6 | // met: |
7 | // |
8 | // * Redistributions of source code must retain the above copyright |
9 | // notice, this list of conditions and the following disclaimer. |
10 | // * Redistributions in binary form must reproduce the above |
11 | // copyright notice, this list of conditions and the following disclaimer |
12 | // in the documentation and/or other materials provided with the |
13 | // distribution. |
14 | // * Neither the name of Google Inc. nor the names of its |
15 | // contributors may be used to endorse or promote products derived from |
16 | // this software without specific prior written permission. |
17 | // |
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | |
30 | // --- |
31 | // |
32 | // This is just a very thin wrapper over sparsehashtable.h, just |
33 | // like sgi stl's stl_hash_map is a very thin wrapper over |
34 | // stl_hashtable. The major thing we define is operator[], because |
35 | // we have a concept of a data_type which stl_hashtable doesn't |
36 | // (it only has a key and a value). |
37 | // |
38 | // We adhere mostly to the STL semantics for hash-map. One important |
39 | // exception is that insert() may invalidate iterators entirely -- STL |
40 | // semantics are that insert() may reorder iterators, but they all |
41 | // still refer to something valid in the hashtable. Not so for us. |
42 | // Likewise, insert() may invalidate pointers into the hashtable. |
43 | // (Whether insert invalidates iterators and pointers depends on |
44 | // whether it results in a hashtable resize). On the plus side, |
45 | // delete() doesn't invalidate iterators or pointers at all, or even |
46 | // change the ordering of elements. |
47 | // |
48 | // Here are a few "power user" tips: |
49 | // |
50 | // 1) set_deleted_key(): |
51 | // Unlike STL's hash_map, if you want to use erase() you |
52 | // *must* call set_deleted_key() after construction. |
53 | // |
54 | // 2) resize(0): |
55 | // When an item is deleted, its memory isn't freed right |
56 | // away. This is what allows you to iterate over a hashtable |
57 | // and call erase() without invalidating the iterator. |
58 | // To force the memory to be freed, call resize(0). |
59 | // For tr1 compatibility, this can also be called as rehash(0). |
60 | // |
61 | // 3) min_load_factor(0.0) |
62 | // Setting the minimum load factor to 0.0 guarantees that |
63 | // the hash table will never shrink. |
64 | // |
65 | // Roughly speaking: |
66 | // (1) dense_hash_map: fastest, uses the most memory unless entries are small |
67 | // (2) sparse_hash_map: slowest, uses the least memory |
68 | // (3) hash_map / unordered_map (STL): in the middle |
69 | // |
70 | // Typically I use sparse_hash_map when I care about space and/or when |
71 | // I need to save the hashtable on disk. I use hash_map otherwise. I |
72 | // don't personally use dense_hash_map ever; some people use it for |
73 | // small maps with lots of lookups. |
74 | // |
75 | // - dense_hash_map has, typically, about 78% memory overhead (if your |
76 | // data takes up X bytes, the hash_map uses .78X more bytes in overhead). |
77 | // - sparse_hash_map has about 4 bits overhead per entry. |
78 | // - sparse_hash_map can be 3-7 times slower than the others for lookup and, |
79 | // especially, inserts. See time_hash_map.cc for details. |
80 | // |
81 | // See /usr/(local/)?doc/sparsehash-*/sparse_hash_map.html |
82 | // for information about how to use this class. |
83 | |
84 | #pragma once |
85 | |
86 | #include <algorithm> // needed by stl_alloc |
87 | #include <functional> // for equal_to<>, select1st<>, etc |
88 | #include <memory> // for alloc |
89 | #include <utility> // for pair<> |
90 | #include <sparsehash/internal/libc_allocator_with_realloc.h> |
91 | #include <sparsehash/internal/sparsehashtable.h> // IWYU pragma: export |
92 | |
93 | namespace google { |
94 | |
95 | template <class Key, class T, class HashFcn = std::hash<Key>, |
96 | class EqualKey = std::equal_to<Key>, |
97 | class Alloc = libc_allocator_with_realloc<std::pair<const Key, T>>> |
98 | class sparse_hash_map { |
99 | private: |
100 | // Apparently select1st is not stl-standard, so we define our own |
101 | struct SelectKey { |
102 | typedef const Key& result_type; |
103 | const Key& operator()(const std::pair<const Key, T>& p) const { |
104 | return p.first; |
105 | } |
106 | }; |
107 | struct SetKey { |
108 | void operator()(std::pair<const Key, T>* value, const Key& new_key) const { |
109 | *const_cast<Key*>(&value->first) = new_key; |
110 | // It would be nice to clear the rest of value here as well, in |
111 | // case it's taking up a lot of memory. We do this by clearing |
112 | // the value. This assumes T has a zero-arg constructor! |
113 | value->second = T(); |
114 | } |
115 | }; |
116 | // For operator[]. |
117 | struct DefaultValue { |
118 | std::pair<const Key, T> operator()(const Key& key) { |
119 | return std::make_pair(key, T()); |
120 | } |
121 | }; |
122 | |
123 | // The actual data |
124 | typedef typename sparsehash_internal::key_equal_chosen<HashFcn, EqualKey>::type EqualKeyChosen; |
125 | typedef sparse_hashtable<std::pair<const Key, T>, Key, HashFcn, SelectKey, |
126 | SetKey, EqualKeyChosen, Alloc> ht; |
127 | ht rep; |
128 | |
129 | static_assert(!sparsehash_internal::has_transparent_key_equal<HashFcn>::value |
130 | || std::is_same<EqualKey, std::equal_to<Key>>::value |
131 | || std::is_same<EqualKey, EqualKeyChosen>::value, |
132 | "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash" ); |
133 | |
134 | public: |
135 | typedef typename ht::key_type key_type; |
136 | typedef T data_type; |
137 | typedef T mapped_type; |
138 | typedef typename ht::value_type value_type; |
139 | typedef typename ht::hasher hasher; |
140 | typedef typename ht::key_equal key_equal; |
141 | typedef Alloc allocator_type; |
142 | |
143 | typedef typename ht::size_type size_type; |
144 | typedef typename ht::difference_type difference_type; |
145 | typedef typename ht::pointer pointer; |
146 | typedef typename ht::const_pointer const_pointer; |
147 | typedef typename ht::reference reference; |
148 | typedef typename ht::const_reference const_reference; |
149 | |
150 | typedef typename ht::iterator iterator; |
151 | typedef typename ht::const_iterator const_iterator; |
152 | typedef typename ht::local_iterator local_iterator; |
153 | typedef typename ht::const_local_iterator const_local_iterator; |
154 | |
155 | // Iterator functions |
156 | iterator begin() { return rep.begin(); } |
157 | iterator end() { return rep.end(); } |
158 | const_iterator begin() const { return rep.begin(); } |
159 | const_iterator end() const { return rep.end(); } |
160 | |
161 | // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements. |
162 | local_iterator begin(size_type i) { return rep.begin(i); } |
163 | local_iterator end(size_type i) { return rep.end(i); } |
164 | const_local_iterator begin(size_type i) const { return rep.begin(i); } |
165 | const_local_iterator end(size_type i) const { return rep.end(i); } |
166 | |
167 | // Accessor functions |
168 | allocator_type get_allocator() const { return rep.get_allocator(); } |
169 | hasher hash_funct() const { return rep.hash_funct(); } |
170 | hasher hash_function() const { return hash_funct(); } |
171 | key_equal key_eq() const { return rep.key_eq(); } |
172 | |
173 | // Constructors |
174 | explicit sparse_hash_map(size_type expected_max_items_in_table = 0, |
175 | const hasher& hf = hasher(), |
176 | const key_equal& eql = key_equal(), |
177 | const allocator_type& alloc = allocator_type()) |
178 | : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), |
179 | alloc) {} |
180 | |
181 | template <class InputIterator> |
182 | sparse_hash_map(InputIterator f, InputIterator l, |
183 | size_type expected_max_items_in_table = 0, |
184 | const hasher& hf = hasher(), |
185 | const key_equal& eql = key_equal(), |
186 | const allocator_type& alloc = allocator_type()) |
187 | : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), |
188 | alloc) { |
189 | rep.insert(f, l); |
190 | } |
191 | // We use the default copy constructor |
192 | // We use the default operator=() |
193 | // We use the default destructor |
194 | |
195 | void clear() { rep.clear(); } |
196 | void swap(sparse_hash_map& hs) { rep.swap(hs.rep); } |
197 | |
198 | // Functions concerning size |
199 | size_type size() const { return rep.size(); } |
200 | size_type max_size() const { return rep.max_size(); } |
201 | bool empty() const { return rep.empty(); } |
202 | size_type bucket_count() const { return rep.bucket_count(); } |
203 | size_type max_bucket_count() const { return rep.max_bucket_count(); } |
204 | |
205 | // These are tr1 methods. bucket() is the bucket the key is or would be in. |
206 | size_type bucket_size(size_type i) const { return rep.bucket_size(i); } |
207 | size_type bucket(const key_type& key) const { return rep.bucket(key); } |
208 | float load_factor() const { return size() * 1.0f / bucket_count(); } |
209 | float max_load_factor() const { |
210 | float shrink, grow; |
211 | rep.get_resizing_parameters(&shrink, &grow); |
212 | return grow; |
213 | } |
214 | void max_load_factor(float new_grow) { |
215 | float shrink, grow; |
216 | rep.get_resizing_parameters(&shrink, &grow); |
217 | rep.set_resizing_parameters(shrink, new_grow); |
218 | } |
219 | // These aren't tr1 methods but perhaps ought to be. |
220 | float min_load_factor() const { |
221 | float shrink, grow; |
222 | rep.get_resizing_parameters(&shrink, &grow); |
223 | return shrink; |
224 | } |
225 | void min_load_factor(float new_shrink) { |
226 | float shrink, grow; |
227 | rep.get_resizing_parameters(&shrink, &grow); |
228 | rep.set_resizing_parameters(new_shrink, grow); |
229 | } |
230 | // Deprecated; use min_load_factor() or max_load_factor() instead. |
231 | void set_resizing_parameters(float shrink, float grow) { |
232 | rep.set_resizing_parameters(shrink, grow); |
233 | } |
234 | |
235 | void resize(size_type hint) { rep.resize(hint); } |
236 | void rehash(size_type hint) { resize(hint); } // the tr1 name |
237 | |
238 | // Lookup routines |
239 | iterator find(const key_type& key) { return rep.find(key); } |
240 | const_iterator find(const key_type& key) const { return rep.find(key); } |
241 | |
242 | template <typename K> |
243 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, iterator>::type |
244 | find(const K& key) { return rep.find(key); } |
245 | template <typename K> |
246 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, const_iterator>::type |
247 | find(const K& key) const { return rep.find(key); } |
248 | |
249 | data_type& operator[](const key_type& key) { // This is our value-add! |
250 | // If key is in the hashtable, returns find(key)->second, |
251 | // otherwise returns insert(value_type(key, T()).first->second. |
252 | // Note it does not create an empty T unless the find fails. |
253 | return rep.template find_or_insert<DefaultValue>(key).second; |
254 | } |
255 | |
256 | size_type count(const key_type& key) const { return rep.count(key); } |
257 | |
258 | template <typename K> |
259 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, size_type>::type |
260 | count(const K& key) const { return rep.count(key); } |
261 | |
262 | std::pair<iterator, iterator> equal_range(const key_type& key) { |
263 | return rep.equal_range(key); |
264 | } |
265 | std::pair<const_iterator, const_iterator> equal_range( |
266 | const key_type& key) const { |
267 | return rep.equal_range(key); |
268 | } |
269 | |
270 | template<typename K> |
271 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<iterator, iterator>>::type |
272 | equal_range(const K& key) { |
273 | return rep.equal_range(key); |
274 | } |
275 | template<typename K> |
276 | typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<const_iterator, const_iterator>>::type |
277 | equal_range(const K& key) const { |
278 | return rep.equal_range(key); |
279 | } |
280 | |
281 | // Insertion routines |
282 | std::pair<iterator, bool> insert(const value_type& obj) { |
283 | return rep.insert(obj); |
284 | } |
285 | template <class InputIterator> |
286 | void insert(InputIterator f, InputIterator l) { |
287 | rep.insert(f, l); |
288 | } |
289 | void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } |
290 | // Required for std::insert_iterator; the passed-in iterator is ignored. |
291 | iterator insert(iterator, const value_type& obj) { return insert(obj).first; } |
292 | |
293 | // Deletion routines |
294 | // THESE ARE NON-STANDARD! I make you specify an "impossible" key |
295 | // value to identify deleted buckets. You can change the key as |
296 | // time goes on, or get rid of it entirely to be insert-only. |
297 | void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } |
298 | void clear_deleted_key() { rep.clear_deleted_key(); } |
299 | key_type deleted_key() const { return rep.deleted_key(); } |
300 | |
301 | // These are standard |
302 | size_type erase(const key_type& key) { return rep.erase(key); } |
303 | void erase(iterator it) { rep.erase(it); } |
304 | void erase(iterator f, iterator l) { rep.erase(f, l); } |
305 | |
306 | // Comparison |
307 | bool operator==(const sparse_hash_map& hs) const { return rep == hs.rep; } |
308 | bool operator!=(const sparse_hash_map& hs) const { return rep != hs.rep; } |
309 | |
310 | // I/O -- this is an add-on for writing metainformation to disk |
311 | // |
312 | // For maximum flexibility, this does not assume a particular |
313 | // file type (though it will probably be a FILE *). We just pass |
314 | // the fp through to rep. |
315 | |
316 | // If your keys and values are simple enough, you can pass this |
317 | // serializer to serialize()/unserialize(). "Simple enough" means |
318 | // value_type is a POD type that contains no pointers. Note, |
319 | // however, we don't try to normalize endianness. |
320 | typedef typename ht::NopointerSerializer NopointerSerializer; |
321 | |
322 | // serializer: a class providing operator()(OUTPUT*, const value_type&) |
323 | // (writing value_type to OUTPUT). You can specify a |
324 | // NopointerSerializer object if appropriate (see above). |
325 | // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a |
326 | // pointer to a class providing size_t Write(const void*, size_t), |
327 | // which writes a buffer into a stream (which fp presumably |
328 | // owns) and returns the number of bytes successfully written. |
329 | // Note basic_ostream<not_char> is not currently supported. |
330 | template <typename ValueSerializer, typename OUTPUT> |
331 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { |
332 | return rep.serialize(serializer, fp); |
333 | } |
334 | |
335 | // serializer: a functor providing operator()(INPUT*, value_type*) |
336 | // (reading from INPUT and into value_type). You can specify a |
337 | // NopointerSerializer object if appropriate (see above). |
338 | // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a |
339 | // pointer to a class providing size_t Read(void*, size_t), |
340 | // which reads into a buffer from a stream (which fp presumably |
341 | // owns) and returns the number of bytes successfully read. |
342 | // Note basic_istream<not_char> is not currently supported. |
343 | // NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer |
344 | // may need to do a const cast in order to fill in the key. |
345 | // NOTE: if Key or T are not POD types, the serializer MUST use |
346 | // placement-new to initialize their values, rather than a normal |
347 | // equals-assignment or similar. (The value_type* passed into the |
348 | // serializer points to garbage memory.) |
349 | template <typename ValueSerializer, typename INPUT> |
350 | bool unserialize(ValueSerializer serializer, INPUT* fp) { |
351 | return rep.unserialize(serializer, fp); |
352 | } |
353 | |
354 | // The four methods below are DEPRECATED. |
355 | // Use serialize() and unserialize() for new code. |
356 | template <typename OUTPUT> |
357 | bool write_metadata(OUTPUT* fp) { |
358 | return rep.write_metadata(fp); |
359 | } |
360 | |
361 | template <typename INPUT> |
362 | bool read_metadata(INPUT* fp) { |
363 | return rep.read_metadata(fp); |
364 | } |
365 | |
366 | template <typename OUTPUT> |
367 | bool write_nopointer_data(OUTPUT* fp) { |
368 | return rep.write_nopointer_data(fp); |
369 | } |
370 | |
371 | template <typename INPUT> |
372 | bool read_nopointer_data(INPUT* fp) { |
373 | return rep.read_nopointer_data(fp); |
374 | } |
375 | }; |
376 | |
377 | // We need a global swap as well |
378 | template <class Key, class T, class HashFcn, class EqualKey, class Alloc> |
379 | inline void swap(sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1, |
380 | sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2) { |
381 | hm1.swap(hm2); |
382 | } |
383 | |
384 | } // namespace google |
385 | |