1 | // Copyright (c) 2005, Google Inc. |
2 | // All rights reserved. |
3 | // |
4 | // Redistribution and use in source and binary forms, with or without |
5 | // modification, are permitted provided that the following conditions are |
6 | // met: |
7 | // |
8 | // * Redistributions of source code must retain the above copyright |
9 | // notice, this list of conditions and the following disclaimer. |
10 | // * Redistributions in binary form must reproduce the above |
11 | // copyright notice, this list of conditions and the following disclaimer |
12 | // in the documentation and/or other materials provided with the |
13 | // distribution. |
14 | // * Neither the name of Google Inc. nor the names of its |
15 | // contributors may be used to endorse or promote products derived from |
16 | // this software without specific prior written permission. |
17 | // |
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | |
30 | // --- |
31 | // |
32 | // A dense hashtable is a particular implementation of |
33 | // a hashtable: one that is meant to minimize memory allocation. |
34 | // It does this by using an array to store all the data. We |
35 | // steal a value from the key space to indicate "empty" array |
36 | // elements (ie indices where no item lives) and another to indicate |
37 | // "deleted" elements. |
38 | // |
39 | // (Note it is possible to change the value of the delete key |
40 | // on the fly; you can even remove it, though after that point |
41 | // the hashtable is insert_only until you set it again. The empty |
42 | // value however can't be changed.) |
43 | // |
44 | // To minimize allocation and pointer overhead, we use internal |
45 | // probing, in which the hashtable is a single table, and collisions |
46 | // are resolved by trying to insert again in another bucket. The |
47 | // most cache-efficient internal probing schemes are linear probing |
48 | // (which suffers, alas, from clumping) and quadratic probing, which |
49 | // is what we implement by default. |
50 | // |
51 | // Type requirements: value_type is required to be Copy Constructible |
52 | // and Default Constructible. It is not required to be (and commonly |
53 | // isn't) Assignable. |
54 | // |
55 | // You probably shouldn't use this code directly. Use dense_hash_map<> |
56 | // or dense_hash_set<> instead. |
57 | |
58 | // You can change the following below: |
59 | // HT_OCCUPANCY_PCT -- how full before we double size |
60 | // HT_EMPTY_PCT -- how empty before we halve size |
61 | // HT_MIN_BUCKETS -- default smallest bucket size |
62 | // |
63 | // You can also change enlarge_factor (which defaults to |
64 | // HT_OCCUPANCY_PCT), and shrink_factor (which defaults to |
65 | // HT_EMPTY_PCT) with set_resizing_parameters(). |
66 | // |
67 | // How to decide what values to use? |
68 | // shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good. |
69 | // HT_MIN_BUCKETS is probably unnecessary since you can specify |
70 | // (indirectly) the starting number of buckets at construct-time. |
71 | // For enlarge_factor, you can use this chart to try to trade-off |
72 | // expected lookup time to the space taken up. By default, this |
73 | // code uses quadratic probing, though you can change it to linear |
74 | // via JUMP_ below if you really want to. |
75 | // |
76 | // From |
77 | // http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html |
78 | // NUMBER OF PROBES / LOOKUP Successful Unsuccessful |
79 | // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L) |
80 | // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2 |
81 | // |
82 | // -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99 |
83 | // QUADRATIC COLLISION RES. |
84 | // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11 |
85 | // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6 |
86 | // LINEAR COLLISION RES. |
87 | // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5 |
88 | // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0 |
89 | |
90 | #pragma once |
91 | |
92 | #include <assert.h> |
93 | #include <stdio.h> // for FILE, fwrite, fread |
94 | #include <algorithm> // For swap(), eg |
95 | #include <iterator> // For iterator tags |
96 | #include <limits> // for numeric_limits |
97 | #include <memory> // For uninitialized_fill |
98 | #include <utility> // for pair |
99 | #include <stdexcept> // For length_error |
100 | #include <type_traits> |
101 | #include <sparsehash/internal/hashtable-common.h> |
102 | #include <sparsehash/internal/libc_allocator_with_realloc.h> |
103 | |
104 | namespace google { |
105 | |
106 | // The probing method |
107 | // Linear probing |
108 | // #define JUMP_(key, num_probes) ( 1 ) |
109 | // Quadratic probing |
110 | #define JUMP_(key, num_probes) (num_probes) |
111 | |
112 | // Hashtable class, used to implement the hashed associative containers |
113 | // hash_set and hash_map. |
114 | |
115 | // Value: what is stored in the table (each bucket is a Value). |
116 | // Key: something in a 1-to-1 correspondence to a Value, that can be used |
117 | // to search for a Value in the table (find() takes a Key). |
118 | // HashFcn: Takes a Key and returns an integer, the more unique the better. |
119 | // ExtractKey: given a Value, returns the unique Key associated with it. |
120 | // Must inherit from unary_function, or at least have a |
121 | // result_type enum indicating the return type of operator(). |
122 | // SetKey: given a Value* and a Key, modifies the value such that |
123 | // ExtractKey(value) == key. We guarantee this is only called |
124 | // with key == deleted_key or key == empty_key. |
125 | // EqualKey: Given two Keys, says whether they are the same (that is, |
126 | // if they are both associated with the same Value). |
127 | // Alloc: STL allocator to use to allocate memory. |
128 | |
129 | template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey, |
130 | class EqualKey, class Alloc> |
131 | class dense_hashtable; |
132 | |
133 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
134 | struct dense_hashtable_iterator; |
135 | |
136 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
137 | struct dense_hashtable_const_iterator; |
138 | |
139 | // We're just an array, but we need to skip over empty and deleted elements |
140 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
141 | struct dense_hashtable_iterator { |
142 | private: |
143 | using value_alloc_type = |
144 | typename std::allocator_traits<A>::template rebind_alloc<V>; |
145 | |
146 | public: |
147 | typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator; |
148 | typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> |
149 | const_iterator; |
150 | |
151 | typedef std::forward_iterator_tag iterator_category; // very little defined! |
152 | typedef V value_type; |
153 | typedef typename value_alloc_type::difference_type difference_type; |
154 | typedef typename value_alloc_type::size_type size_type; |
155 | typedef typename value_alloc_type::reference reference; |
156 | typedef typename value_alloc_type::pointer pointer; |
157 | |
158 | // "Real" constructor and default constructor |
159 | dense_hashtable_iterator( |
160 | const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, pointer it, |
161 | pointer it_end, bool advance) |
162 | : ht(h), pos(it), end(it_end) { |
163 | if (advance) advance_past_empty_and_deleted(); |
164 | } |
165 | dense_hashtable_iterator() {} |
166 | // The default destructor is fine; we don't define one |
167 | // The default operator= is fine; we don't define one |
168 | |
169 | // Happy dereferencer |
170 | reference operator*() const { return *pos; } |
171 | pointer operator->() const { return &(operator*()); } |
172 | |
173 | // Arithmetic. The only hard part is making sure that |
174 | // we're not on an empty or marked-deleted array element |
175 | void advance_past_empty_and_deleted() { |
176 | while (pos != end && (ht->test_empty(*this) || ht->test_deleted(*this))) |
177 | ++pos; |
178 | } |
179 | iterator& operator++() { |
180 | assert(pos != end); |
181 | ++pos; |
182 | advance_past_empty_and_deleted(); |
183 | return *this; |
184 | } |
185 | iterator operator++(int) { |
186 | iterator tmp(*this); |
187 | ++*this; |
188 | return tmp; |
189 | } |
190 | |
191 | // Comparison. |
192 | bool operator==(const iterator& it) const { return pos == it.pos; } |
193 | bool operator!=(const iterator& it) const { return pos != it.pos; } |
194 | |
195 | // The actual data |
196 | const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht; |
197 | pointer pos, end; |
198 | }; |
199 | |
200 | // Now do it all again, but with const-ness! |
201 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
202 | struct dense_hashtable_const_iterator { |
203 | private: |
204 | using value_alloc_type = |
205 | typename std::allocator_traits<A>::template rebind_alloc<V>; |
206 | |
207 | public: |
208 | typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator; |
209 | typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> |
210 | const_iterator; |
211 | |
212 | typedef std::forward_iterator_tag iterator_category; // very little defined! |
213 | typedef V value_type; |
214 | typedef typename value_alloc_type::difference_type difference_type; |
215 | typedef typename value_alloc_type::size_type size_type; |
216 | typedef typename value_alloc_type::const_reference reference; |
217 | typedef typename value_alloc_type::const_pointer pointer; |
218 | |
219 | // "Real" constructor and default constructor |
220 | dense_hashtable_const_iterator( |
221 | const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, pointer it, |
222 | pointer it_end, bool advance) |
223 | : ht(h), pos(it), end(it_end) { |
224 | if (advance) advance_past_empty_and_deleted(); |
225 | } |
226 | dense_hashtable_const_iterator() : ht(NULL), pos(pointer()), end(pointer()) {} |
227 | // This lets us convert regular iterators to const iterators |
228 | dense_hashtable_const_iterator(const iterator& it) |
229 | : ht(it.ht), pos(it.pos), end(it.end) {} |
230 | // The default destructor is fine; we don't define one |
231 | // The default operator= is fine; we don't define one |
232 | |
233 | // Happy dereferencer |
234 | reference operator*() const { return *pos; } |
235 | pointer operator->() const { return &(operator*()); } |
236 | |
237 | // Arithmetic. The only hard part is making sure that |
238 | // we're not on an empty or marked-deleted array element |
239 | void advance_past_empty_and_deleted() { |
240 | while (pos != end && (ht->test_empty(*this) || ht->test_deleted(*this))) |
241 | ++pos; |
242 | } |
243 | const_iterator& operator++() { |
244 | assert(pos != end); |
245 | ++pos; |
246 | advance_past_empty_and_deleted(); |
247 | return *this; |
248 | } |
249 | const_iterator operator++(int) { |
250 | const_iterator tmp(*this); |
251 | ++*this; |
252 | return tmp; |
253 | } |
254 | |
255 | // Comparison. |
256 | bool operator==(const const_iterator& it) const { return pos == it.pos; } |
257 | bool operator!=(const const_iterator& it) const { return pos != it.pos; } |
258 | |
259 | // The actual data |
260 | const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht; |
261 | pointer pos, end; |
262 | }; |
263 | |
264 | template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey, |
265 | class EqualKey, class Alloc> |
266 | class dense_hashtable { |
267 | private: |
268 | using value_alloc_type = |
269 | typename std::allocator_traits<Alloc>::template rebind_alloc<Value>; |
270 | |
271 | public: |
272 | typedef Key key_type; |
273 | typedef Value value_type; |
274 | typedef HashFcn hasher; |
275 | typedef EqualKey key_equal; |
276 | typedef Alloc allocator_type; |
277 | |
278 | typedef typename value_alloc_type::size_type size_type; |
279 | typedef typename value_alloc_type::difference_type difference_type; |
280 | typedef typename value_alloc_type::reference reference; |
281 | typedef typename value_alloc_type::const_reference const_reference; |
282 | typedef typename value_alloc_type::pointer pointer; |
283 | typedef typename value_alloc_type::const_pointer const_pointer; |
284 | typedef dense_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey, |
285 | EqualKey, Alloc> iterator; |
286 | |
287 | typedef dense_hashtable_const_iterator< |
288 | Value, Key, HashFcn, ExtractKey, SetKey, EqualKey, Alloc> const_iterator; |
289 | |
290 | // These come from tr1. For us they're the same as regular iterators. |
291 | typedef iterator local_iterator; |
292 | typedef const_iterator const_local_iterator; |
293 | |
294 | // How full we let the table get before we resize, by default. |
295 | // Knuth says .8 is good -- higher causes us to probe too much, |
296 | // though it saves memory. |
297 | static const int HT_OCCUPANCY_PCT; // defined at the bottom of this file |
298 | |
299 | // How empty we let the table get before we resize lower, by default. |
300 | // (0.0 means never resize lower.) |
301 | // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing |
302 | static const int HT_EMPTY_PCT; // defined at the bottom of this file |
303 | |
304 | // Minimum size we're willing to let hashtables be. |
305 | // Must be a power of two, and at least 4. |
306 | // Note, however, that for a given hashtable, the initial size is a |
307 | // function of the first constructor arg, and may be >HT_MIN_BUCKETS. |
308 | static const size_type HT_MIN_BUCKETS = 4; |
309 | |
310 | // By default, if you don't specify a hashtable size at |
311 | // construction-time, we use this size. Must be a power of two, and |
312 | // at least HT_MIN_BUCKETS. |
313 | static const size_type HT_DEFAULT_STARTING_BUCKETS = 32; |
314 | |
315 | // ITERATOR FUNCTIONS |
316 | iterator begin() { return iterator(this, table, table + num_buckets, true); } |
317 | iterator end() { |
318 | return iterator(this, table + num_buckets, table + num_buckets, true); |
319 | } |
320 | const_iterator begin() const { |
321 | return const_iterator(this, table, table + num_buckets, true); |
322 | } |
323 | const_iterator end() const { |
324 | return const_iterator(this, table + num_buckets, table + num_buckets, true); |
325 | } |
326 | |
327 | // These come from tr1 unordered_map. They iterate over 'bucket' n. |
328 | // We'll just consider bucket n to be the n-th element of the table. |
329 | local_iterator begin(size_type i) { |
330 | return local_iterator(this, table + i, table + i + 1, false); |
331 | } |
332 | local_iterator end(size_type i) { |
333 | local_iterator it = begin(i); |
334 | if (!test_empty(i) && !test_deleted(i)) ++it; |
335 | return it; |
336 | } |
337 | const_local_iterator begin(size_type i) const { |
338 | return const_local_iterator(this, table + i, table + i + 1, false); |
339 | } |
340 | const_local_iterator end(size_type i) const { |
341 | const_local_iterator it = begin(i); |
342 | if (!test_empty(i) && !test_deleted(i)) ++it; |
343 | return it; |
344 | } |
345 | |
346 | // ACCESSOR FUNCTIONS for the things we templatize on, basically |
347 | hasher hash_funct() const { return settings; } |
348 | key_equal key_eq() const { return key_info; } |
349 | allocator_type get_allocator() const { return allocator_type(val_info); } |
350 | |
351 | // Accessor function for statistics gathering. |
352 | int num_table_copies() const { return settings.num_ht_copies(); } |
353 | |
354 | private: |
355 | // Annoyingly, we can't copy values around, because they might have |
356 | // const components (they're probably pair<const X, Y>). We use |
357 | // explicit destructor invocation and placement new to get around |
358 | // this. Arg. |
359 | template <typename... Args> |
360 | void set_value(pointer dst, Args&&... args) { |
361 | dst->~value_type(); // delete the old value, if any |
362 | new (dst) value_type(std::forward<Args>(args)...); |
363 | } |
364 | |
365 | void destroy_buckets(size_type first, size_type last) { |
366 | for (; first != last; ++first) table[first].~value_type(); |
367 | } |
368 | |
369 | // DELETE HELPER FUNCTIONS |
370 | // This lets the user describe a key that will indicate deleted |
371 | // table entries. This key should be an "impossible" entry -- |
372 | // if you try to insert it for real, you won't be able to retrieve it! |
373 | // (NB: while you pass in an entire value, only the key part is looked |
374 | // at. This is just because I don't know how to assign just a key.) |
375 | private: |
376 | void squash_deleted() { // gets rid of any deleted entries we have |
377 | if (num_deleted) { // get rid of deleted before writing |
378 | size_type resize_to = settings.min_buckets( |
379 | num_elements, bucket_count()); |
380 | dense_hashtable tmp(std::move(*this), resize_to); // copying will get rid of deleted |
381 | swap(tmp); // now we are tmp |
382 | } |
383 | assert(num_deleted == 0); |
384 | } |
385 | |
386 | // Test if the given key is the deleted indicator. Requires |
387 | // num_deleted > 0, for correctness of read(), and because that |
388 | // guarantees that key_info.delkey is valid. |
389 | bool test_deleted_key(const key_type& key) const { |
390 | assert(num_deleted > 0); |
391 | return equals(key_info.delkey, key); |
392 | } |
393 | |
394 | public: |
395 | void set_deleted_key(const key_type& key) { |
396 | // the empty indicator (if specified) and the deleted indicator |
397 | // must be different |
398 | assert( |
399 | (!settings.use_empty() || !equals(key, key_info.empty_key)) && |
400 | "Passed the empty-key to set_deleted_key" ); |
401 | // It's only safe to change what "deleted" means if we purge deleted guys |
402 | squash_deleted(); |
403 | settings.set_use_deleted(true); |
404 | key_info.delkey = key; |
405 | } |
406 | void clear_deleted_key() { |
407 | squash_deleted(); |
408 | settings.set_use_deleted(false); |
409 | } |
410 | key_type deleted_key() const { |
411 | assert(settings.use_deleted() && |
412 | "Must set deleted key before calling deleted_key" ); |
413 | return key_info.delkey; |
414 | } |
415 | |
416 | // These are public so the iterators can use them |
417 | // True if the item at position bucknum is "deleted" marker |
418 | bool test_deleted(size_type bucknum) const { |
419 | // Invariant: !use_deleted() implies num_deleted is 0. |
420 | assert(settings.use_deleted() || num_deleted == 0); |
421 | return num_deleted > 0 && test_deleted_key(get_key(table[bucknum])); |
422 | } |
423 | bool test_deleted(const iterator& it) const { |
424 | // Invariant: !use_deleted() implies num_deleted is 0. |
425 | assert(settings.use_deleted() || num_deleted == 0); |
426 | return num_deleted > 0 && test_deleted_key(get_key(*it)); |
427 | } |
428 | bool test_deleted(const const_iterator& it) const { |
429 | // Invariant: !use_deleted() implies num_deleted is 0. |
430 | assert(settings.use_deleted() || num_deleted == 0); |
431 | return num_deleted > 0 && test_deleted_key(get_key(*it)); |
432 | } |
433 | |
434 | private: |
435 | void check_use_deleted(const char* caller) { |
436 | (void)caller; // could log it if the assert failed |
437 | assert(settings.use_deleted()); |
438 | } |
439 | |
440 | // Set it so test_deleted is true. true if object didn't used to be deleted. |
441 | bool set_deleted(iterator& it) { |
442 | check_use_deleted("set_deleted()" ); |
443 | bool retval = !test_deleted(it); |
444 | // &* converts from iterator to value-type. |
445 | set_key(&(*it), key_info.delkey); |
446 | return retval; |
447 | } |
448 | // Set it so test_deleted is false. true if object used to be deleted. |
449 | bool clear_deleted(iterator& it) { |
450 | check_use_deleted("clear_deleted()" ); |
451 | // Happens automatically when we assign something else in its place. |
452 | return test_deleted(it); |
453 | } |
454 | |
455 | // We also allow to set/clear the deleted bit on a const iterator. |
456 | // We allow a const_iterator for the same reason you can delete a |
457 | // const pointer: it's convenient, and semantically you can't use |
458 | // 'it' after it's been deleted anyway, so its const-ness doesn't |
459 | // really matter. |
460 | bool set_deleted(const_iterator& it) { |
461 | check_use_deleted("set_deleted()" ); |
462 | bool retval = !test_deleted(it); |
463 | set_key(const_cast<pointer>(&(*it)), key_info.delkey); |
464 | return retval; |
465 | } |
466 | // Set it so test_deleted is false. true if object used to be deleted. |
467 | bool clear_deleted(const_iterator& it) { |
468 | check_use_deleted("clear_deleted()" ); |
469 | return test_deleted(it); |
470 | } |
471 | |
472 | // EMPTY HELPER FUNCTIONS |
473 | // This lets the user describe a key that will indicate empty (unused) |
474 | // table entries. This key should be an "impossible" entry -- |
475 | // if you try to insert it for real, you won't be able to retrieve it! |
476 | // (NB: while you pass in an entire value, only the key part is looked |
477 | // at. This is just because I don't know how to assign just a key.) |
478 | public: |
479 | // These are public so the iterators can use them |
480 | // True if the item at position bucknum is "empty" marker |
481 | bool test_empty(size_type bucknum) const { |
482 | assert(settings.use_empty()); // we always need to know what's empty! |
483 | return equals(key_info.empty_key, get_key(table[bucknum])); |
484 | } |
485 | bool test_empty(const iterator& it) const { |
486 | assert(settings.use_empty()); // we always need to know what's empty! |
487 | return equals(key_info.empty_key, get_key(*it)); |
488 | } |
489 | bool test_empty(const const_iterator& it) const { |
490 | assert(settings.use_empty()); // we always need to know what's empty! |
491 | return equals(key_info.empty_key, get_key(*it)); |
492 | } |
493 | |
494 | private: |
495 | void fill_range_with_empty(pointer table_start, size_type count) { |
496 | for (size_type i = 0; i < count; ++i) |
497 | { |
498 | construct_key(&table_start[i], key_info.empty_key); |
499 | } |
500 | } |
501 | |
502 | public: |
503 | void set_empty_key(const key_type& key) { |
504 | // Once you set the empty key, you can't change it |
505 | assert(!settings.use_empty() && "Calling set_empty_key multiple times" ); |
506 | // The deleted indicator (if specified) and the empty indicator |
507 | // must be different. |
508 | assert( |
509 | (!settings.use_deleted() || !equals(key, key_info.delkey)) && |
510 | "Setting the empty key the same as the deleted key" ); |
511 | settings.set_use_empty(true); |
512 | key_info.empty_key = key; |
513 | |
514 | assert(!table); // must set before first use |
515 | // num_buckets was set in constructor even though table was NULL |
516 | table = val_info.allocate(num_buckets); |
517 | assert(table); |
518 | fill_range_with_empty(table, num_buckets); |
519 | } |
520 | key_type empty_key() const { |
521 | assert(settings.use_empty()); |
522 | return key_info.empty_key; |
523 | } |
524 | |
525 | // FUNCTIONS CONCERNING SIZE |
526 | public: |
527 | size_type size() const { return num_elements - num_deleted; } |
528 | size_type max_size() const { return val_info.max_size(); } |
529 | bool empty() const { return size() == 0; } |
530 | size_type bucket_count() const { return num_buckets; } |
531 | size_type max_bucket_count() const { return max_size(); } |
532 | size_type nonempty_bucket_count() const { return num_elements; } |
533 | // These are tr1 methods. Their idea of 'bucket' doesn't map well to |
534 | // what we do. We just say every bucket has 0 or 1 items in it. |
535 | size_type bucket_size(size_type i) const { |
536 | return begin(i) == end(i) ? 0 : 1; |
537 | } |
538 | |
539 | private: |
540 | // Because of the above, size_type(-1) is never legal; use it for errors |
541 | static const size_type ILLEGAL_BUCKET = size_type(-1); |
542 | |
543 | // Used after a string of deletes. Returns true if we actually shrunk. |
544 | // TODO(csilvers): take a delta so we can take into account inserts |
545 | // done after shrinking. Maybe make part of the Settings class? |
546 | bool maybe_shrink() { |
547 | assert(num_elements >= num_deleted); |
548 | assert((bucket_count() & (bucket_count() - 1)) == 0); // is a power of two |
549 | assert(bucket_count() >= HT_MIN_BUCKETS); |
550 | bool retval = false; |
551 | |
552 | // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS, |
553 | // we'll never shrink until you get relatively big, and we'll never |
554 | // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something |
555 | // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will |
556 | // shrink us down to HT_MIN_BUCKETS buckets, which is too small. |
557 | const size_type num_remain = num_elements - num_deleted; |
558 | const size_type shrink_threshold = settings.shrink_threshold(); |
559 | if (shrink_threshold > 0 && num_remain < shrink_threshold && |
560 | bucket_count() > HT_DEFAULT_STARTING_BUCKETS) { |
561 | const float shrink_factor = settings.shrink_factor(); |
562 | size_type sz = bucket_count() / 2; // find how much we should shrink |
563 | while (sz > HT_DEFAULT_STARTING_BUCKETS && |
564 | num_remain < sz * shrink_factor) { |
565 | sz /= 2; // stay a power of 2 |
566 | } |
567 | dense_hashtable tmp(std::move(*this), sz); // Do the actual resizing |
568 | swap(tmp); // now we are tmp |
569 | retval = true; |
570 | } |
571 | settings.set_consider_shrink(false); // because we just considered it |
572 | return retval; |
573 | } |
574 | |
575 | // We'll let you resize a hashtable -- though this makes us copy all! |
576 | // When you resize, you say, "make it big enough for this many more elements" |
577 | // Returns true if we actually resized, false if size was already ok. |
578 | bool resize_delta(size_type delta) { |
579 | bool did_resize = false; |
580 | if (settings.consider_shrink()) { // see if lots of deletes happened |
581 | if (maybe_shrink()) did_resize = true; |
582 | } |
583 | if (num_elements >= (std::numeric_limits<size_type>::max)() - delta) { |
584 | throw std::length_error("resize overflow" ); |
585 | } |
586 | if (bucket_count() >= HT_MIN_BUCKETS && |
587 | (num_elements + delta) <= settings.enlarge_threshold()) |
588 | return did_resize; // we're ok as we are |
589 | |
590 | // Sometimes, we need to resize just to get rid of all the |
591 | // "deleted" buckets that are clogging up the hashtable. So when |
592 | // deciding whether to resize, count the deleted buckets (which |
593 | // are currently taking up room). But later, when we decide what |
594 | // size to resize to, *don't* count deleted buckets, since they |
595 | // get discarded during the resize. |
596 | size_type needed_size = settings.min_buckets(num_elements + delta, 0); |
597 | if (needed_size <= bucket_count()) // we have enough buckets |
598 | return did_resize; |
599 | |
600 | size_type resize_to = settings.min_buckets( |
601 | num_elements - num_deleted + delta, bucket_count()); |
602 | |
603 | // When num_deleted is large, we may still grow but we do not want to |
604 | // over expand. So we reduce needed_size by a portion of num_deleted |
605 | // (the exact portion does not matter). This is especially helpful |
606 | // when min_load_factor is zero (no shrink at all) to avoid doubling |
607 | // the bucket count to infinity. See also test ResizeWithoutShrink. |
608 | needed_size = settings.min_buckets(num_elements - num_deleted / 4 + delta, 0); |
609 | |
610 | if (resize_to < needed_size && // may double resize_to |
611 | resize_to < (std::numeric_limits<size_type>::max)() / 2) { |
612 | // This situation means that we have enough deleted elements, |
613 | // that once we purge them, we won't actually have needed to |
614 | // grow. But we may want to grow anyway: if we just purge one |
615 | // element, say, we'll have to grow anyway next time we |
616 | // insert. Might as well grow now, since we're already going |
617 | // through the trouble of copying (in order to purge the |
618 | // deleted elements). |
619 | const size_type target = |
620 | static_cast<size_type>(settings.shrink_size(resize_to * 2)); |
621 | if (num_elements - num_deleted + delta >= target) { |
622 | // Good, we won't be below the shrink threshhold even if we double. |
623 | resize_to *= 2; |
624 | } |
625 | } |
626 | dense_hashtable tmp(std::move(*this), resize_to); |
627 | swap(tmp); // now we are tmp |
628 | return true; |
629 | } |
630 | |
631 | // We require table be not-NULL and empty before calling this. |
632 | void resize_table(size_type /*old_size*/, size_type new_size, |
633 | std::true_type) { |
634 | table = val_info.realloc_or_die(table, new_size); |
635 | } |
636 | |
637 | void resize_table(size_type old_size, size_type new_size, std::false_type) { |
638 | val_info.deallocate(table, old_size); |
639 | table = val_info.allocate(new_size); |
640 | } |
641 | |
642 | // Used to actually do the rehashing when we grow/shrink a hashtable |
643 | template <typename Hashtable> |
644 | void copy_or_move_from(Hashtable&& ht, size_type min_buckets_wanted) { |
645 | clear_to_size(settings.min_buckets(ht.size(), min_buckets_wanted)); |
646 | |
647 | // We use a normal iterator to get non-deleted bcks from ht |
648 | // We could use insert() here, but since we know there are |
649 | // no duplicates and no deleted items, we can be more efficient |
650 | assert((bucket_count() & (bucket_count() - 1)) == 0); // a power of two |
651 | for (auto&& value : ht) { |
652 | size_type num_probes = 0; // how many times we've probed |
653 | size_type bucknum; |
654 | const size_type bucket_count_minus_one = bucket_count() - 1; |
655 | for (bucknum = hash(get_key(value)) & bucket_count_minus_one; |
656 | !test_empty(bucknum); // not empty |
657 | bucknum = |
658 | (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) { |
659 | ++num_probes; |
660 | assert(num_probes < bucket_count() && |
661 | "Hashtable is full: an error in key_equal<> or hash<>" ); |
662 | } |
663 | |
664 | using will_move = std::is_rvalue_reference<Hashtable&&>; |
665 | using value_t = typename std::conditional<will_move::value, value_type&&, const_reference>::type; |
666 | |
667 | set_value(&table[bucknum], std::forward<value_t>(value)); |
668 | num_elements++; |
669 | } |
670 | settings.inc_num_ht_copies(); |
671 | } |
672 | |
673 | // Required by the spec for hashed associative container |
674 | public: |
675 | // Though the docs say this should be num_buckets, I think it's much |
676 | // more useful as num_elements. As a special feature, calling with |
677 | // req_elements==0 will cause us to shrink if we can, saving space. |
678 | void resize(size_type req_elements) { // resize to this or larger |
679 | if (settings.consider_shrink() || req_elements == 0) maybe_shrink(); |
680 | if (req_elements > num_elements) resize_delta(req_elements - num_elements); |
681 | } |
682 | |
683 | // Get and change the value of shrink_factor and enlarge_factor. The |
684 | // description at the beginning of this file explains how to choose |
685 | // the values. Setting the shrink parameter to 0.0 ensures that the |
686 | // table never shrinks. |
687 | void get_resizing_parameters(float* shrink, float* grow) const { |
688 | *shrink = settings.shrink_factor(); |
689 | *grow = settings.enlarge_factor(); |
690 | } |
691 | void set_resizing_parameters(float shrink, float grow) { |
692 | settings.set_resizing_parameters(shrink, grow); |
693 | settings.reset_thresholds(bucket_count()); |
694 | } |
695 | |
696 | // CONSTRUCTORS -- as required by the specs, we take a size, |
697 | // but also let you specify a hashfunction, key comparator, |
698 | // and key extractor. We also define a copy constructor and =. |
699 | // DESTRUCTOR -- needs to free the table |
700 | explicit dense_hashtable(size_type expected_max_items_in_table = 0, |
701 | const HashFcn& hf = HashFcn(), |
702 | const EqualKey& eql = EqualKey(), |
703 | const ExtractKey& ext = ExtractKey(), |
704 | const SetKey& set = SetKey(), |
705 | const Alloc& alloc = Alloc()) |
706 | : settings(hf), |
707 | key_info(ext, set, eql), |
708 | num_deleted(0), |
709 | num_elements(0), |
710 | num_buckets(expected_max_items_in_table == 0 |
711 | ? HT_DEFAULT_STARTING_BUCKETS |
712 | : settings.min_buckets(expected_max_items_in_table, 0)), |
713 | val_info(alloc_impl<value_alloc_type>(alloc)), |
714 | table(NULL) { |
715 | // table is NULL until emptyval is set. However, we set num_buckets |
716 | // here so we know how much space to allocate once emptyval is set |
717 | settings.reset_thresholds(bucket_count()); |
718 | } |
719 | |
720 | // As a convenience for resize(), we allow an optional second argument |
721 | // which lets you make this new hashtable a different size than ht |
722 | dense_hashtable(const dense_hashtable& ht, |
723 | size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) |
724 | : settings(ht.settings), |
725 | key_info(ht.key_info), |
726 | num_deleted(0), |
727 | num_elements(0), |
728 | num_buckets(0), |
729 | val_info(ht.val_info), |
730 | table(NULL) { |
731 | if (!ht.settings.use_empty()) { |
732 | // If use_empty isn't set, copy_from will crash, so we do our own copying. |
733 | assert(ht.empty()); |
734 | num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted); |
735 | settings.reset_thresholds(bucket_count()); |
736 | return; |
737 | } |
738 | settings.reset_thresholds(bucket_count()); |
739 | copy_or_move_from(ht, min_buckets_wanted); // copy_or_move_from() ignores deleted entries |
740 | } |
741 | |
742 | dense_hashtable(dense_hashtable&& ht) |
743 | : dense_hashtable() { |
744 | swap(ht); |
745 | } |
746 | |
747 | dense_hashtable(dense_hashtable&& ht, |
748 | size_type min_buckets_wanted) |
749 | : settings(ht.settings), |
750 | key_info(ht.key_info), |
751 | num_deleted(0), |
752 | num_elements(0), |
753 | num_buckets(0), |
754 | val_info(std::move(ht.val_info)), |
755 | table(NULL) { |
756 | if (!ht.settings.use_empty()) { |
757 | // If use_empty isn't set, copy_or_move_from will crash, so we do our own copying. |
758 | assert(ht.empty()); |
759 | num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted); |
760 | settings.reset_thresholds(bucket_count()); |
761 | return; |
762 | } |
763 | settings.reset_thresholds(bucket_count()); |
764 | copy_or_move_from(std::move(ht), min_buckets_wanted); // copy_or_move_from() ignores deleted entries |
765 | } |
766 | |
767 | dense_hashtable& operator=(const dense_hashtable& ht) { |
768 | if (&ht == this) return *this; // don't copy onto ourselves |
769 | if (!ht.settings.use_empty()) { |
770 | assert(ht.empty()); |
771 | dense_hashtable empty_table(ht); // empty table with ht's thresholds |
772 | this->swap(empty_table); |
773 | return *this; |
774 | } |
775 | settings = ht.settings; |
776 | key_info = ht.key_info; |
777 | // copy_or_move_from() calls clear and sets num_deleted to 0 too |
778 | copy_or_move_from(ht, HT_MIN_BUCKETS); |
779 | // we purposefully don't copy the allocator, which may not be copyable |
780 | return *this; |
781 | } |
782 | |
783 | dense_hashtable& operator=(dense_hashtable&& ht) { |
784 | assert(&ht != this); // this should not happen |
785 | swap(ht); |
786 | return *this; |
787 | } |
788 | |
789 | ~dense_hashtable() { |
790 | if (table) { |
791 | destroy_buckets(0, num_buckets); |
792 | val_info.deallocate(table, num_buckets); |
793 | } |
794 | } |
795 | |
796 | // Many STL algorithms use swap instead of copy constructors |
797 | void swap(dense_hashtable& ht) { |
798 | std::swap(settings, ht.settings); |
799 | std::swap(key_info, ht.key_info); |
800 | std::swap(num_deleted, ht.num_deleted); |
801 | std::swap(num_elements, ht.num_elements); |
802 | std::swap(num_buckets, ht.num_buckets); |
803 | std::swap(table, ht.table); |
804 | settings.reset_thresholds(bucket_count()); // also resets consider_shrink |
805 | ht.settings.reset_thresholds(ht.bucket_count()); |
806 | // we purposefully don't swap the allocator, which may not be swap-able |
807 | } |
808 | |
809 | private: |
810 | void clear_to_size(size_type new_num_buckets) { |
811 | if (!table) { |
812 | table = val_info.allocate(new_num_buckets); |
813 | } else { |
814 | destroy_buckets(0, num_buckets); |
815 | if (new_num_buckets != num_buckets) { // resize, if necessary |
816 | typedef std::integral_constant< |
817 | bool, std::is_same<value_alloc_type, |
818 | libc_allocator_with_realloc<value_type>>::value> |
819 | realloc_ok; |
820 | resize_table(num_buckets, new_num_buckets, realloc_ok()); |
821 | } |
822 | } |
823 | assert(table); |
824 | fill_range_with_empty(table, new_num_buckets); |
825 | num_elements = 0; |
826 | num_deleted = 0; |
827 | num_buckets = new_num_buckets; // our new size |
828 | settings.reset_thresholds(bucket_count()); |
829 | } |
830 | |
831 | public: |
832 | // It's always nice to be able to clear a table without deallocating it |
833 | void clear() { |
834 | // If the table is already empty, and the number of buckets is |
835 | // already as we desire, there's nothing to do. |
836 | const size_type new_num_buckets = settings.min_buckets(0, 0); |
837 | if (num_elements == 0 && new_num_buckets == num_buckets) { |
838 | return; |
839 | } |
840 | clear_to_size(new_num_buckets); |
841 | } |
842 | |
843 | // Clear the table without resizing it. |
844 | // Mimicks the stl_hashtable's behaviour when clear()-ing in that it |
845 | // does not modify the bucket count |
846 | void clear_no_resize() { |
847 | if (num_elements > 0) { |
848 | assert(table); |
849 | destroy_buckets(0, num_buckets); |
850 | fill_range_with_empty(table, num_buckets); |
851 | } |
852 | // don't consider to shrink before another erase() |
853 | settings.reset_thresholds(bucket_count()); |
854 | num_elements = 0; |
855 | num_deleted = 0; |
856 | } |
857 | |
858 | // LOOKUP ROUTINES |
859 | private: |
860 | // Returns a pair of positions: 1st where the object is, 2nd where |
861 | // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET |
862 | // if object is not found; 2nd is ILLEGAL_BUCKET if it is. |
863 | // Note: because of deletions where-to-insert is not trivial: it's the |
864 | // first deleted bucket we see, as long as we don't find the key later |
865 | template <typename K> |
866 | std::pair<size_type, size_type> find_position(const K& key) const { |
867 | size_type num_probes = 0; // how many times we've probed |
868 | const size_type bucket_count_minus_one = bucket_count() - 1; |
869 | size_type bucknum = hash(key) & bucket_count_minus_one; |
870 | size_type insert_pos = ILLEGAL_BUCKET; // where we would insert |
871 | while (1) { // probe until something happens |
872 | if (test_empty(bucknum)) { // bucket is empty |
873 | if (insert_pos == ILLEGAL_BUCKET) // found no prior place to insert |
874 | return std::pair<size_type, size_type>(ILLEGAL_BUCKET, bucknum); |
875 | else |
876 | return std::pair<size_type, size_type>(ILLEGAL_BUCKET, insert_pos); |
877 | |
878 | } else if (test_deleted(bucknum)) { // keep searching, but mark to insert |
879 | if (insert_pos == ILLEGAL_BUCKET) insert_pos = bucknum; |
880 | |
881 | } else if (equals(key, get_key(table[bucknum]))) { |
882 | return std::pair<size_type, size_type>(bucknum, ILLEGAL_BUCKET); |
883 | } |
884 | ++num_probes; // we're doing another probe |
885 | bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; |
886 | assert(num_probes < bucket_count() && |
887 | "Hashtable is full: an error in key_equal<> or hash<>" ); |
888 | } |
889 | } |
890 | |
891 | public: |
892 | template <typename K> |
893 | iterator find(const K& key) { |
894 | if (size() == 0) return end(); |
895 | std::pair<size_type, size_type> pos = find_position(key); |
896 | if (pos.first == ILLEGAL_BUCKET) // alas, not there |
897 | return end(); |
898 | else |
899 | return iterator(this, table + pos.first, table + num_buckets, false); |
900 | } |
901 | |
902 | template <typename K> |
903 | const_iterator find(const K& key) const { |
904 | if (size() == 0) return end(); |
905 | std::pair<size_type, size_type> pos = find_position(key); |
906 | if (pos.first == ILLEGAL_BUCKET) // alas, not there |
907 | return end(); |
908 | else |
909 | return const_iterator(this, table + pos.first, table + num_buckets, |
910 | false); |
911 | } |
912 | |
913 | // This is a tr1 method: the bucket a given key is in, or what bucket |
914 | // it would be put in, if it were to be inserted. Shrug. |
915 | size_type bucket(const key_type& key) const { |
916 | std::pair<size_type, size_type> pos = find_position(key); |
917 | return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first; |
918 | } |
919 | |
920 | // Counts how many elements have key key. For maps, it's either 0 or 1. |
921 | template <typename K> |
922 | size_type count(const K& key) const { |
923 | std::pair<size_type, size_type> pos = find_position(key); |
924 | return pos.first == ILLEGAL_BUCKET ? 0 : 1; |
925 | } |
926 | |
927 | // Likewise, equal_range doesn't really make sense for us. Oh well. |
928 | template <typename K> |
929 | std::pair<iterator, iterator> equal_range(const K& key) { |
930 | iterator pos = find(key); // either an iterator or end |
931 | if (pos == end()) { |
932 | return std::pair<iterator, iterator>(pos, pos); |
933 | } else { |
934 | const iterator startpos = pos++; |
935 | return std::pair<iterator, iterator>(startpos, pos); |
936 | } |
937 | } |
938 | template <typename K> |
939 | std::pair<const_iterator, const_iterator> equal_range( |
940 | const K& key) const { |
941 | const_iterator pos = find(key); // either an iterator or end |
942 | if (pos == end()) { |
943 | return std::pair<const_iterator, const_iterator>(pos, pos); |
944 | } else { |
945 | const const_iterator startpos = pos++; |
946 | return std::pair<const_iterator, const_iterator>(startpos, pos); |
947 | } |
948 | } |
949 | |
950 | // INSERTION ROUTINES |
951 | private: |
952 | // Private method used by insert_noresize and find_or_insert. |
953 | template <typename... Args> |
954 | iterator insert_at(size_type pos, Args&&... args) { |
955 | if (size() >= max_size()) { |
956 | throw std::length_error("insert overflow" ); |
957 | } |
958 | if (test_deleted(pos)) { // just replace if it's been del. |
959 | // shrug: shouldn't need to be const. |
960 | const_iterator delpos(this, table + pos, table + num_buckets, false); |
961 | clear_deleted(delpos); |
962 | assert(num_deleted > 0); |
963 | --num_deleted; // used to be, now it isn't |
964 | } else { |
965 | ++num_elements; // replacing an empty bucket |
966 | } |
967 | set_value(&table[pos], std::forward<Args>(args)...); |
968 | return iterator(this, table + pos, table + num_buckets, false); |
969 | } |
970 | |
971 | // If you know *this is big enough to hold obj, use this routine |
972 | template <typename K, typename... Args> |
973 | std::pair<iterator, bool> insert_noresize(K&& key, Args&&... args) { |
974 | // First, double-check we're not inserting delkey or emptyval |
975 | assert(settings.use_empty() && "Inserting without empty key" ); |
976 | assert(!equals(std::forward<K>(key), key_info.empty_key) && "Inserting the empty key" ); |
977 | assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && "Inserting the deleted key" ); |
978 | |
979 | const std::pair<size_type, size_type> pos = find_position(key); |
980 | if (pos.first != ILLEGAL_BUCKET) { // object was already there |
981 | return std::pair<iterator, bool>( |
982 | iterator(this, table + pos.first, table + num_buckets, false), |
983 | false); // false: we didn't insert |
984 | } else { // pos.second says where to put it |
985 | return std::pair<iterator, bool>(insert_at(pos.second, std::forward<Args>(args)...), true); |
986 | } |
987 | } |
988 | |
989 | // Specializations of insert(it, it) depending on the power of the iterator: |
990 | // (1) Iterator supports operator-, resize before inserting |
991 | template <class ForwardIterator> |
992 | void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) { |
993 | size_t dist = std::distance(f, l); |
994 | if (dist >= (std::numeric_limits<size_type>::max)()) { |
995 | throw std::length_error("insert-range overflow" ); |
996 | } |
997 | resize_delta(static_cast<size_type>(dist)); |
998 | for (; dist > 0; --dist, ++f) { |
999 | insert_noresize(get_key(*f), *f); |
1000 | } |
1001 | } |
1002 | |
1003 | // (2) Arbitrary iterator, can't tell how much to resize |
1004 | template <class InputIterator> |
1005 | void insert(InputIterator f, InputIterator l, std::input_iterator_tag) { |
1006 | for (; f != l; ++f) insert(*f); |
1007 | } |
1008 | |
1009 | public: |
1010 | // This is the normal insert routine, used by the outside world |
1011 | template <typename Arg> |
1012 | std::pair<iterator, bool> insert(Arg&& obj) { |
1013 | resize_delta(1); // adding an object, grow if need be |
1014 | return insert_noresize(get_key(std::forward<Arg>(obj)), std::forward<Arg>(obj)); |
1015 | } |
1016 | |
1017 | template <typename K, typename... Args> |
1018 | std::pair<iterator, bool> emplace(K&& key, Args&&... args) { |
1019 | resize_delta(1); |
1020 | // here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself |
1021 | return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...); |
1022 | } |
1023 | |
1024 | template <typename K, typename... Args> |
1025 | std::pair<iterator, bool> emplace_hint(const_iterator hint, K&& key, Args&&... args) { |
1026 | resize_delta(1); |
1027 | |
1028 | if (equals(key, hint->first)) { |
1029 | return {iterator(this, const_cast<pointer>(hint.pos), const_cast<pointer>(hint.end), false), false}; |
1030 | } |
1031 | |
1032 | // here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself |
1033 | return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...); |
1034 | } |
1035 | |
1036 | // When inserting a lot at a time, we specialize on the type of iterator |
1037 | template <class InputIterator> |
1038 | void insert(InputIterator f, InputIterator l) { |
1039 | // specializes on iterator type |
1040 | insert(f, l, |
1041 | typename std::iterator_traits<InputIterator>::iterator_category()); |
1042 | } |
1043 | |
1044 | // DefaultValue is a functor that takes a key and returns a value_type |
1045 | // representing the default value to be inserted if none is found. |
1046 | template <class T, class K> |
1047 | value_type& find_or_insert(K&& key) { |
1048 | // First, double-check we're not inserting emptykey or delkey |
1049 | assert( |
1050 | (!settings.use_empty() || !equals(key, key_info.empty_key)) && |
1051 | "Inserting the empty key" ); |
1052 | assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && |
1053 | "Inserting the deleted key" ); |
1054 | const std::pair<size_type, size_type> pos = find_position(key); |
1055 | if (pos.first != ILLEGAL_BUCKET) { // object was already there |
1056 | return table[pos.first]; |
1057 | } else if (resize_delta(1)) { // needed to rehash to make room |
1058 | // Since we resized, we can't use pos, so recalculate where to insert. |
1059 | return *insert_noresize(std::forward<K>(key), std::forward<K>(key), T()).first; |
1060 | } else { // no need to rehash, insert right here |
1061 | return *insert_at(pos.second, std::forward<K>(key), T()); |
1062 | } |
1063 | } |
1064 | |
1065 | // DELETION ROUTINES |
1066 | size_type erase(const key_type& key) { |
1067 | // First, double-check we're not trying to erase delkey or emptyval. |
1068 | assert( |
1069 | (!settings.use_empty() || !equals(key, key_info.empty_key)) && |
1070 | "Erasing the empty key" ); |
1071 | assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && |
1072 | "Erasing the deleted key" ); |
1073 | const_iterator pos = find(key); // shrug: shouldn't need to be const |
1074 | if (pos != end()) { |
1075 | assert(!test_deleted(pos)); // or find() shouldn't have returned it |
1076 | set_deleted(pos); |
1077 | ++num_deleted; |
1078 | settings.set_consider_shrink( |
1079 | true); // will think about shrink after next insert |
1080 | return 1; // because we deleted one thing |
1081 | } else { |
1082 | return 0; // because we deleted nothing |
1083 | } |
1084 | } |
1085 | |
1086 | // We return the iterator past the deleted item. |
1087 | iterator erase(const_iterator pos) { |
1088 | if (pos == end()) return end(); // sanity check |
1089 | if (set_deleted(pos)) { // true if object has been newly deleted |
1090 | ++num_deleted; |
1091 | settings.set_consider_shrink( |
1092 | true); // will think about shrink after next insert |
1093 | } |
1094 | return iterator(this, const_cast<pointer>(pos.pos), const_cast<pointer>(pos.end), true); |
1095 | } |
1096 | |
1097 | iterator erase(const_iterator f, const_iterator l) { |
1098 | for (; f != l; ++f) { |
1099 | if (set_deleted(f)) // should always be true |
1100 | ++num_deleted; |
1101 | } |
1102 | settings.set_consider_shrink( |
1103 | true); // will think about shrink after next insert |
1104 | return iterator(this, const_cast<pointer>(f.pos), const_cast<pointer>(f.end), false); |
1105 | } |
1106 | |
1107 | // COMPARISON |
1108 | bool operator==(const dense_hashtable& ht) const { |
1109 | if (size() != ht.size()) { |
1110 | return false; |
1111 | } else if (this == &ht) { |
1112 | return true; |
1113 | } else { |
1114 | // Iterate through the elements in "this" and see if the |
1115 | // corresponding element is in ht |
1116 | for (const_iterator it = begin(); it != end(); ++it) { |
1117 | const_iterator it2 = ht.find(get_key(*it)); |
1118 | if ((it2 == ht.end()) || (*it != *it2)) { |
1119 | return false; |
1120 | } |
1121 | } |
1122 | return true; |
1123 | } |
1124 | } |
1125 | bool operator!=(const dense_hashtable& ht) const { return !(*this == ht); } |
1126 | |
1127 | // I/O |
1128 | // We support reading and writing hashtables to disk. Alas, since |
1129 | // I don't know how to write a hasher or key_equal, you have to make |
1130 | // sure everything but the table is the same. We compact before writing. |
1131 | private: |
1132 | // Every time the disk format changes, this should probably change too |
1133 | typedef unsigned long MagicNumberType; |
1134 | static const MagicNumberType MAGIC_NUMBER = 0x13578642; |
1135 | |
1136 | public: |
1137 | // I/O -- this is an add-on for writing hash table to disk |
1138 | // |
1139 | // INPUT and OUTPUT must be either a FILE, *or* a C++ stream |
1140 | // (istream, ostream, etc) *or* a class providing |
1141 | // Read(void*, size_t) and Write(const void*, size_t) |
1142 | // (respectively), which writes a buffer into a stream |
1143 | // (which the INPUT/OUTPUT instance presumably owns). |
1144 | |
1145 | typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer; |
1146 | |
1147 | // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&) |
1148 | template <typename ValueSerializer, typename OUTPUT> |
1149 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { |
1150 | squash_deleted(); // so we don't have to worry about delkey |
1151 | if (!sparsehash_internal::write_bigendian_number(fp, MAGIC_NUMBER, 4)) |
1152 | return false; |
1153 | if (!sparsehash_internal::write_bigendian_number(fp, num_buckets, 8)) |
1154 | return false; |
1155 | if (!sparsehash_internal::write_bigendian_number(fp, num_elements, 8)) |
1156 | return false; |
1157 | // Now write a bitmap of non-empty buckets. |
1158 | for (size_type i = 0; i < num_buckets; i += 8) { |
1159 | unsigned char bits = 0; |
1160 | for (int bit = 0; bit < 8; ++bit) { |
1161 | if (i + bit < num_buckets && !test_empty(i + bit)) bits |= (1 << bit); |
1162 | } |
1163 | if (!sparsehash_internal::write_data(fp, &bits, sizeof(bits))) |
1164 | return false; |
1165 | for (int bit = 0; bit < 8; ++bit) { |
1166 | if (bits & (1 << bit)) { |
1167 | if (!serializer(fp, table[i + bit])) return false; |
1168 | } |
1169 | } |
1170 | } |
1171 | return true; |
1172 | } |
1173 | |
1174 | // INPUT: anything we've written an overload of read_data() for. |
1175 | // ValueSerializer: a functor. operator()(INPUT*, value_type*) |
1176 | template <typename ValueSerializer, typename INPUT> |
1177 | bool unserialize(ValueSerializer serializer, INPUT* fp) { |
1178 | assert(settings.use_empty() && "empty_key not set for read" ); |
1179 | |
1180 | clear(); // just to be consistent |
1181 | MagicNumberType magic_read; |
1182 | if (!sparsehash_internal::read_bigendian_number(fp, &magic_read, 4)) |
1183 | return false; |
1184 | if (magic_read != MAGIC_NUMBER) { |
1185 | return false; |
1186 | } |
1187 | size_type new_num_buckets; |
1188 | if (!sparsehash_internal::read_bigendian_number(fp, &new_num_buckets, 8)) |
1189 | return false; |
1190 | clear_to_size(new_num_buckets); |
1191 | if (!sparsehash_internal::read_bigendian_number(fp, &num_elements, 8)) |
1192 | return false; |
1193 | |
1194 | // Read the bitmap of non-empty buckets. |
1195 | for (size_type i = 0; i < num_buckets; i += 8) { |
1196 | unsigned char bits; |
1197 | if (!sparsehash_internal::read_data(fp, &bits, sizeof(bits))) |
1198 | return false; |
1199 | for (int bit = 0; bit < 8; ++bit) { |
1200 | if (i + bit < num_buckets && (bits & (1 << bit))) { // not empty |
1201 | if (!serializer(fp, &table[i + bit])) return false; |
1202 | } |
1203 | } |
1204 | } |
1205 | return true; |
1206 | } |
1207 | |
1208 | private: |
1209 | template <class A> |
1210 | class alloc_impl : public A { |
1211 | public: |
1212 | typedef typename A::pointer pointer; |
1213 | typedef typename A::size_type size_type; |
1214 | |
1215 | // Convert a normal allocator to one that has realloc_or_die() |
1216 | alloc_impl(const A& a) : A(a) {} |
1217 | |
1218 | // realloc_or_die should only be used when using the default |
1219 | // allocator (libc_allocator_with_realloc). |
1220 | pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) { |
1221 | fprintf(stderr, |
1222 | "realloc_or_die is only supported for " |
1223 | "libc_allocator_with_realloc\n" ); |
1224 | exit(1); |
1225 | return NULL; |
1226 | } |
1227 | }; |
1228 | |
1229 | // A template specialization of alloc_impl for |
1230 | // libc_allocator_with_realloc that can handle realloc_or_die. |
1231 | template <class A> |
1232 | class alloc_impl<libc_allocator_with_realloc<A>> |
1233 | : public libc_allocator_with_realloc<A> { |
1234 | public: |
1235 | typedef typename libc_allocator_with_realloc<A>::pointer pointer; |
1236 | typedef typename libc_allocator_with_realloc<A>::size_type size_type; |
1237 | |
1238 | alloc_impl(const libc_allocator_with_realloc<A>& a) |
1239 | : libc_allocator_with_realloc<A>(a) {} |
1240 | |
1241 | pointer realloc_or_die(pointer ptr, size_type n) { |
1242 | pointer retval = this->reallocate(ptr, n); |
1243 | if (retval == NULL) { |
1244 | fprintf(stderr, |
1245 | "sparsehash: FATAL ERROR: failed to reallocate " |
1246 | "%lu elements for ptr %p" , |
1247 | static_cast<unsigned long>(n), static_cast<void*>(ptr)); |
1248 | exit(1); |
1249 | } |
1250 | return retval; |
1251 | } |
1252 | }; |
1253 | |
1254 | // Package allocator with emptyval to eliminate memory needed for |
1255 | // the zero-size allocator. |
1256 | // If new fields are added to this class, we should add them to |
1257 | // operator= and swap. |
1258 | class ValInfo : public alloc_impl<value_alloc_type> { |
1259 | public: |
1260 | typedef typename alloc_impl<value_alloc_type>::value_type value_type; |
1261 | |
1262 | ValInfo(const alloc_impl<value_alloc_type>& a) |
1263 | : alloc_impl<value_alloc_type>(a) {} |
1264 | }; |
1265 | |
1266 | // Package functors with another class to eliminate memory needed for |
1267 | // zero-size functors. Since ExtractKey and hasher's operator() might |
1268 | // have the same function signature, they must be packaged in |
1269 | // different classes. |
1270 | struct Settings |
1271 | : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type, |
1272 | HT_MIN_BUCKETS> { |
1273 | explicit Settings(const hasher& hf) |
1274 | : sparsehash_internal::sh_hashtable_settings<key_type, hasher, |
1275 | size_type, HT_MIN_BUCKETS>( |
1276 | hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {} |
1277 | }; |
1278 | |
1279 | // Packages ExtractKey and SetKey functors. |
1280 | class KeyInfo : public ExtractKey, public SetKey, public EqualKey { |
1281 | public: |
1282 | KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq) |
1283 | : ExtractKey(ek), SetKey(sk), EqualKey(eq) {} |
1284 | |
1285 | // We want to return the exact same type as ExtractKey: Key or const Key& |
1286 | template <typename V> |
1287 | typename ExtractKey::result_type get_key(V&& v) const { |
1288 | return ExtractKey::operator()(std::forward<V>(v)); |
1289 | } |
1290 | void set_key(pointer v, const key_type& k) const { |
1291 | SetKey::operator()(v, k); |
1292 | } |
1293 | void construct_key(pointer v, const key_type& k) const { |
1294 | SetKey::operator()(v, k, true); |
1295 | } |
1296 | template <typename K1, typename K2> |
1297 | bool equals(const K1& a, const K2& b) const { |
1298 | return EqualKey::operator()(a, b); |
1299 | } |
1300 | |
1301 | // Which key marks deleted entries. |
1302 | // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!) |
1303 | typename std::remove_const<key_type>::type delkey; |
1304 | typename std::remove_const<key_type>::type empty_key; |
1305 | }; |
1306 | |
1307 | // Utility functions to access the templated operators |
1308 | template <typename K> |
1309 | size_type hash(const K& v) const { return settings.hash(v); } |
1310 | template <typename K1, typename K2> |
1311 | bool equals(const K1& a, const K2& b) const { |
1312 | return key_info.equals(a, b); |
1313 | } |
1314 | template <typename V> |
1315 | typename ExtractKey::result_type get_key(V&& v) const { |
1316 | return key_info.get_key(std::forward<V>(v)); |
1317 | } |
1318 | void set_key(pointer v, const key_type& k) const { key_info.set_key(v, k); } |
1319 | void construct_key(pointer v, const key_type& k) const { key_info.construct_key(v, k); } |
1320 | |
1321 | private: |
1322 | // Actual data |
1323 | Settings settings; |
1324 | KeyInfo key_info; |
1325 | |
1326 | size_type num_deleted; // how many occupied buckets are marked deleted |
1327 | size_type num_elements; |
1328 | size_type num_buckets; |
1329 | ValInfo val_info; // holds emptyval, and also the allocator |
1330 | pointer table; |
1331 | }; |
1332 | |
1333 | // We need a global swap as well |
1334 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
1335 | inline void swap(dense_hashtable<V, K, HF, ExK, SetK, EqK, A>& x, |
1336 | dense_hashtable<V, K, HF, ExK, SetK, EqK, A>& y) { |
1337 | x.swap(y); |
1338 | } |
1339 | |
1340 | #undef JUMP_ |
1341 | |
1342 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
1343 | const typename dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::size_type |
1344 | dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::ILLEGAL_BUCKET; |
1345 | |
1346 | // How full we let the table get before we resize. Knuth says .8 is |
1347 | // good -- higher causes us to probe too much, though saves memory. |
1348 | // However, we go with .5, getting better performance at the cost of |
1349 | // more space (a trade-off densehashtable explicitly chooses to make). |
1350 | // Feel free to play around with different values, though, via |
1351 | // max_load_factor() and/or set_resizing_parameters(). |
1352 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
1353 | const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = 50; |
1354 | |
1355 | // How empty we let the table get before we resize lower. |
1356 | // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing. |
1357 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
1358 | const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT = |
1359 | static_cast<int>( |
1360 | 0.4 * dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT); |
1361 | |
1362 | } // namespace google |
1363 | |