1 | // Copyright (c) 2005, Google Inc. |
2 | // All rights reserved. |
3 | // |
4 | // Redistribution and use in source and binary forms, with or without |
5 | // modification, are permitted provided that the following conditions are |
6 | // met: |
7 | // |
8 | // * Redistributions of source code must retain the above copyright |
9 | // notice, this list of conditions and the following disclaimer. |
10 | // * Redistributions in binary form must reproduce the above |
11 | // copyright notice, this list of conditions and the following disclaimer |
12 | // in the documentation and/or other materials provided with the |
13 | // distribution. |
14 | // * Neither the name of Google Inc. nor the names of its |
15 | // contributors may be used to endorse or promote products derived from |
16 | // this software without specific prior written permission. |
17 | // |
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | |
30 | // --- |
31 | // |
32 | // A sparse hashtable is a particular implementation of |
33 | // a hashtable: one that is meant to minimize memory use. |
34 | // It does this by using a *sparse table* (cf sparsetable.h), |
35 | // which uses between 1 and 2 bits to store empty buckets |
36 | // (we may need another bit for hashtables that support deletion). |
37 | // |
38 | // When empty buckets are so cheap, an appealing hashtable |
39 | // implementation is internal probing, in which the hashtable |
40 | // is a single table, and collisions are resolved by trying |
41 | // to insert again in another bucket. The most cache-efficient |
42 | // internal probing schemes are linear probing (which suffers, |
43 | // alas, from clumping) and quadratic probing, which is what |
44 | // we implement by default. |
45 | // |
46 | // Deleted buckets are a bit of a pain. We have to somehow mark |
47 | // deleted buckets (the probing must distinguish them from empty |
48 | // buckets). The most principled way is to have another bitmap, |
49 | // but that's annoying and takes up space. Instead we let the |
50 | // user specify an "impossible" key. We set deleted buckets |
51 | // to have the impossible key. |
52 | // |
53 | // Note it is possible to change the value of the delete key |
54 | // on the fly; you can even remove it, though after that point |
55 | // the hashtable is insert_only until you set it again. |
56 | // |
57 | // You probably shouldn't use this code directly. Use |
58 | // sparse_hash_map<> or sparse_hash_set<> instead. |
59 | // |
60 | // You can modify the following, below: |
61 | // HT_OCCUPANCY_PCT -- how full before we double size |
62 | // HT_EMPTY_PCT -- how empty before we halve size |
63 | // HT_MIN_BUCKETS -- smallest bucket size |
64 | // HT_DEFAULT_STARTING_BUCKETS -- default bucket size at construct-time |
65 | // |
66 | // You can also change enlarge_factor (which defaults to |
67 | // HT_OCCUPANCY_PCT), and shrink_factor (which defaults to |
68 | // HT_EMPTY_PCT) with set_resizing_parameters(). |
69 | // |
70 | // How to decide what values to use? |
71 | // shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good. |
72 | // HT_MIN_BUCKETS is probably unnecessary since you can specify |
73 | // (indirectly) the starting number of buckets at construct-time. |
74 | // For enlarge_factor, you can use this chart to try to trade-off |
75 | // expected lookup time to the space taken up. By default, this |
76 | // code uses quadratic probing, though you can change it to linear |
77 | // via _JUMP below if you really want to. |
78 | // |
79 | // From |
80 | // http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html |
81 | // NUMBER OF PROBES / LOOKUP Successful Unsuccessful |
82 | // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L) |
83 | // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2 |
84 | // |
85 | // -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99 |
86 | // QUADRATIC COLLISION RES. |
87 | // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11 |
88 | // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6 |
89 | // LINEAR COLLISION RES. |
90 | // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5 |
91 | // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0 |
92 | // |
93 | // The value type is required to be copy constructible and default |
94 | // constructible, but it need not be (and commonly isn't) assignable. |
95 | |
96 | #pragma once |
97 | |
98 | #include <assert.h> |
99 | #include <algorithm> // For swap(), eg |
100 | #include <iterator> // for iterator tags |
101 | #include <limits> // for numeric_limits |
102 | #include <utility> // for pair |
103 | #include <type_traits> // for remove_const |
104 | #include <sparsehash/internal/hashtable-common.h> |
105 | #include <sparsehash/sparsetable> // IWYU pragma: export |
106 | #include <stdexcept> // For length_error |
107 | |
108 | namespace google { |
109 | |
110 | #ifndef SPARSEHASH_STAT_UPDATE |
111 | #define SPARSEHASH_STAT_UPDATE(x) ((void)0) |
112 | #endif |
113 | |
114 | // The probing method |
115 | // Linear probing |
116 | // #define JUMP_(key, num_probes) ( 1 ) |
117 | // Quadratic probing |
118 | #define JUMP_(key, num_probes) (num_probes) |
119 | |
120 | // The smaller this is, the faster lookup is (because the group bitmap is |
121 | // smaller) and the faster insert is, because there's less to move. |
122 | // On the other hand, there are more groups. Since group::size_type is |
123 | // a short, this number should be of the form 32*x + 16 to avoid waste. |
124 | static const uint16_t DEFAULT_GROUP_SIZE = 48; // fits in 1.5 words |
125 | |
126 | // Hashtable class, used to implement the hashed associative containers |
127 | // hash_set and hash_map. |
128 | // |
129 | // Value: what is stored in the table (each bucket is a Value). |
130 | // Key: something in a 1-to-1 correspondence to a Value, that can be used |
131 | // to search for a Value in the table (find() takes a Key). |
132 | // HashFcn: Takes a Key and returns an integer, the more unique the better. |
133 | // ExtractKey: given a Value, returns the unique Key associated with it. |
134 | // Must inherit from unary_function, or at least have a |
135 | // result_type enum indicating the return type of operator(). |
136 | // SetKey: given a Value* and a Key, modifies the value such that |
137 | // ExtractKey(value) == key. We guarantee this is only called |
138 | // with key == deleted_key. |
139 | // EqualKey: Given two Keys, says whether they are the same (that is, |
140 | // if they are both associated with the same Value). |
141 | // Alloc: STL allocator to use to allocate memory. |
142 | |
143 | template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey, |
144 | class EqualKey, class Alloc> |
145 | class sparse_hashtable; |
146 | |
147 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
148 | struct sparse_hashtable_iterator; |
149 | |
150 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
151 | struct sparse_hashtable_const_iterator; |
152 | |
153 | // As far as iterating, we're basically just a sparsetable |
154 | // that skips over deleted elements. |
155 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
156 | struct sparse_hashtable_iterator { |
157 | private: |
158 | using value_alloc_type = |
159 | typename std::allocator_traits<A>::template rebind_alloc<V>; |
160 | |
161 | public: |
162 | typedef sparse_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator; |
163 | typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> |
164 | const_iterator; |
165 | typedef typename sparsetable<V, DEFAULT_GROUP_SIZE, |
166 | value_alloc_type>::nonempty_iterator st_iterator; |
167 | |
168 | typedef std::forward_iterator_tag iterator_category; // very little defined! |
169 | typedef V value_type; |
170 | typedef typename value_alloc_type::difference_type difference_type; |
171 | typedef typename value_alloc_type::size_type size_type; |
172 | typedef typename value_alloc_type::reference reference; |
173 | typedef typename value_alloc_type::pointer pointer; |
174 | |
175 | // "Real" constructor and default constructor |
176 | sparse_hashtable_iterator( |
177 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it, |
178 | st_iterator it_end) |
179 | : ht(h), pos(it), end(it_end) { |
180 | advance_past_deleted(); |
181 | } |
182 | sparse_hashtable_iterator() {} // not ever used internally |
183 | // The default destructor is fine; we don't define one |
184 | // The default operator= is fine; we don't define one |
185 | |
186 | // Happy dereferencer |
187 | reference operator*() const { return *pos; } |
188 | pointer operator->() const { return &(operator*()); } |
189 | |
190 | // Arithmetic. The only hard part is making sure that |
191 | // we're not on a marked-deleted array element |
192 | void advance_past_deleted() { |
193 | while (pos != end && ht->test_deleted(*this)) ++pos; |
194 | } |
195 | iterator& operator++() { |
196 | assert(pos != end); |
197 | ++pos; |
198 | advance_past_deleted(); |
199 | return *this; |
200 | } |
201 | iterator operator++(int) { |
202 | iterator tmp(*this); |
203 | ++*this; |
204 | return tmp; |
205 | } |
206 | |
207 | // Comparison. |
208 | bool operator==(const iterator& it) const { return pos == it.pos; } |
209 | bool operator!=(const iterator& it) const { return pos != it.pos; } |
210 | |
211 | // The actual data |
212 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht; |
213 | st_iterator pos, end; |
214 | }; |
215 | |
216 | // Now do it all again, but with const-ness! |
217 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
218 | struct sparse_hashtable_const_iterator { |
219 | private: |
220 | using value_alloc_type = |
221 | typename std::allocator_traits<A>::template rebind_alloc<V>; |
222 | |
223 | public: |
224 | typedef sparse_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator; |
225 | typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> |
226 | const_iterator; |
227 | typedef typename sparsetable<V, DEFAULT_GROUP_SIZE, |
228 | value_alloc_type>::const_nonempty_iterator |
229 | st_iterator; |
230 | |
231 | typedef std::forward_iterator_tag iterator_category; // very little defined! |
232 | typedef V value_type; |
233 | typedef typename value_alloc_type::difference_type difference_type; |
234 | typedef typename value_alloc_type::size_type size_type; |
235 | typedef typename value_alloc_type::const_reference reference; |
236 | typedef typename value_alloc_type::const_pointer pointer; |
237 | |
238 | // "Real" constructor and default constructor |
239 | sparse_hashtable_const_iterator( |
240 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it, |
241 | st_iterator it_end) |
242 | : ht(h), pos(it), end(it_end) { |
243 | advance_past_deleted(); |
244 | } |
245 | // This lets us convert regular iterators to const iterators |
246 | sparse_hashtable_const_iterator() {} // never used internally |
247 | sparse_hashtable_const_iterator(const iterator& it) |
248 | : ht(it.ht), pos(it.pos), end(it.end) {} |
249 | // The default destructor is fine; we don't define one |
250 | // The default operator= is fine; we don't define one |
251 | |
252 | // Happy dereferencer |
253 | reference operator*() const { return *pos; } |
254 | pointer operator->() const { return &(operator*()); } |
255 | |
256 | // Arithmetic. The only hard part is making sure that |
257 | // we're not on a marked-deleted array element |
258 | void advance_past_deleted() { |
259 | while (pos != end && ht->test_deleted(*this)) ++pos; |
260 | } |
261 | const_iterator& operator++() { |
262 | assert(pos != end); |
263 | ++pos; |
264 | advance_past_deleted(); |
265 | return *this; |
266 | } |
267 | const_iterator operator++(int) { |
268 | const_iterator tmp(*this); |
269 | ++*this; |
270 | return tmp; |
271 | } |
272 | |
273 | // Comparison. |
274 | bool operator==(const const_iterator& it) const { return pos == it.pos; } |
275 | bool operator!=(const const_iterator& it) const { return pos != it.pos; } |
276 | |
277 | // The actual data |
278 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht; |
279 | st_iterator pos, end; |
280 | }; |
281 | |
282 | // And once again, but this time freeing up memory as we iterate |
283 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
284 | struct sparse_hashtable_destructive_iterator { |
285 | private: |
286 | using value_alloc_type = |
287 | typename std::allocator_traits<A>::template rebind_alloc<V>; |
288 | |
289 | public: |
290 | typedef sparse_hashtable_destructive_iterator<V, K, HF, ExK, SetK, EqK, A> |
291 | iterator; |
292 | typedef |
293 | typename sparsetable<V, DEFAULT_GROUP_SIZE, |
294 | value_alloc_type>::destructive_iterator st_iterator; |
295 | |
296 | typedef std::forward_iterator_tag iterator_category; // very little defined! |
297 | typedef V value_type; |
298 | typedef typename value_alloc_type::difference_type difference_type; |
299 | typedef typename value_alloc_type::size_type size_type; |
300 | typedef typename value_alloc_type::reference reference; |
301 | typedef typename value_alloc_type::pointer pointer; |
302 | |
303 | // "Real" constructor and default constructor |
304 | sparse_hashtable_destructive_iterator( |
305 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it, |
306 | st_iterator it_end) |
307 | : ht(h), pos(it), end(it_end) { |
308 | advance_past_deleted(); |
309 | } |
310 | sparse_hashtable_destructive_iterator() {} // never used internally |
311 | // The default destructor is fine; we don't define one |
312 | // The default operator= is fine; we don't define one |
313 | |
314 | // Happy dereferencer |
315 | reference operator*() const { return *pos; } |
316 | pointer operator->() const { return &(operator*()); } |
317 | |
318 | // Arithmetic. The only hard part is making sure that |
319 | // we're not on a marked-deleted array element |
320 | void advance_past_deleted() { |
321 | while (pos != end && ht->test_deleted(*this)) ++pos; |
322 | } |
323 | iterator& operator++() { |
324 | assert(pos != end); |
325 | ++pos; |
326 | advance_past_deleted(); |
327 | return *this; |
328 | } |
329 | iterator operator++(int) { |
330 | iterator tmp(*this); |
331 | ++*this; |
332 | return tmp; |
333 | } |
334 | |
335 | // Comparison. |
336 | bool operator==(const iterator& it) const { return pos == it.pos; } |
337 | bool operator!=(const iterator& it) const { return pos != it.pos; } |
338 | |
339 | // The actual data |
340 | const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht; |
341 | st_iterator pos, end; |
342 | }; |
343 | |
344 | template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey, |
345 | class EqualKey, class Alloc> |
346 | class sparse_hashtable { |
347 | private: |
348 | using value_alloc_type = |
349 | typename std::allocator_traits<Alloc>::template rebind_alloc<Value>; |
350 | |
351 | public: |
352 | typedef Key key_type; |
353 | typedef Value value_type; |
354 | typedef HashFcn hasher; |
355 | typedef EqualKey key_equal; |
356 | typedef Alloc allocator_type; |
357 | |
358 | typedef typename value_alloc_type::size_type size_type; |
359 | typedef typename value_alloc_type::difference_type difference_type; |
360 | typedef typename value_alloc_type::reference reference; |
361 | typedef typename value_alloc_type::const_reference const_reference; |
362 | typedef typename value_alloc_type::pointer pointer; |
363 | typedef typename value_alloc_type::const_pointer const_pointer; |
364 | typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey, |
365 | EqualKey, Alloc> iterator; |
366 | |
367 | typedef sparse_hashtable_const_iterator< |
368 | Value, Key, HashFcn, ExtractKey, SetKey, EqualKey, Alloc> const_iterator; |
369 | |
370 | typedef sparse_hashtable_destructive_iterator<Value, Key, HashFcn, ExtractKey, |
371 | SetKey, EqualKey, |
372 | Alloc> destructive_iterator; |
373 | |
374 | // These come from tr1. For us they're the same as regular iterators. |
375 | typedef iterator local_iterator; |
376 | typedef const_iterator const_local_iterator; |
377 | |
378 | // How full we let the table get before we resize, by default. |
379 | // Knuth says .8 is good -- higher causes us to probe too much, |
380 | // though it saves memory. |
381 | static const int HT_OCCUPANCY_PCT; // = 80 (out of 100); |
382 | |
383 | // How empty we let the table get before we resize lower, by default. |
384 | // (0.0 means never resize lower.) |
385 | // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing |
386 | static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT; |
387 | |
388 | // Minimum size we're willing to let hashtables be. |
389 | // Must be a power of two, and at least 4. |
390 | // Note, however, that for a given hashtable, the initial size is a |
391 | // function of the first constructor arg, and may be >HT_MIN_BUCKETS. |
392 | static const size_type HT_MIN_BUCKETS = 4; |
393 | |
394 | // By default, if you don't specify a hashtable size at |
395 | // construction-time, we use this size. Must be a power of two, and |
396 | // at least HT_MIN_BUCKETS. |
397 | static const size_type HT_DEFAULT_STARTING_BUCKETS = 32; |
398 | |
399 | // ITERATOR FUNCTIONS |
400 | iterator begin() { |
401 | return iterator(this, table.nonempty_begin(), table.nonempty_end()); |
402 | } |
403 | iterator end() { |
404 | return iterator(this, table.nonempty_end(), table.nonempty_end()); |
405 | } |
406 | const_iterator begin() const { |
407 | return const_iterator(this, table.nonempty_begin(), table.nonempty_end()); |
408 | } |
409 | const_iterator end() const { |
410 | return const_iterator(this, table.nonempty_end(), table.nonempty_end()); |
411 | } |
412 | |
413 | // These come from tr1 unordered_map. They iterate over 'bucket' n. |
414 | // For sparsehashtable, we could consider each 'group' to be a bucket, |
415 | // I guess, but I don't really see the point. We'll just consider |
416 | // bucket n to be the n-th element of the sparsetable, if it's occupied, |
417 | // or some empty element, otherwise. |
418 | local_iterator begin(size_type i) { |
419 | if (table.test(i)) |
420 | return local_iterator(this, table.get_iter(i), table.nonempty_end()); |
421 | else |
422 | return local_iterator(this, table.nonempty_end(), table.nonempty_end()); |
423 | } |
424 | local_iterator end(size_type i) { |
425 | local_iterator it = begin(i); |
426 | if (table.test(i) && !test_deleted(i)) ++it; |
427 | return it; |
428 | } |
429 | const_local_iterator begin(size_type i) const { |
430 | if (table.test(i)) |
431 | return const_local_iterator(this, table.get_iter(i), |
432 | table.nonempty_end()); |
433 | else |
434 | return const_local_iterator(this, table.nonempty_end(), |
435 | table.nonempty_end()); |
436 | } |
437 | const_local_iterator end(size_type i) const { |
438 | const_local_iterator it = begin(i); |
439 | if (table.test(i) && !test_deleted(i)) ++it; |
440 | return it; |
441 | } |
442 | |
443 | // This is used when resizing |
444 | destructive_iterator destructive_begin() { |
445 | return destructive_iterator(this, table.destructive_begin(), |
446 | table.destructive_end()); |
447 | } |
448 | destructive_iterator destructive_end() { |
449 | return destructive_iterator(this, table.destructive_end(), |
450 | table.destructive_end()); |
451 | } |
452 | |
453 | // ACCESSOR FUNCTIONS for the things we templatize on, basically |
454 | hasher hash_funct() const { return settings; } |
455 | key_equal key_eq() const { return key_info; } |
456 | allocator_type get_allocator() const { return table.get_allocator(); } |
457 | |
458 | // Accessor function for statistics gathering. |
459 | int num_table_copies() const { return settings.num_ht_copies(); } |
460 | |
461 | private: |
462 | // We need to copy values when we set the special marker for deleted |
463 | // elements, but, annoyingly, we can't just use the copy assignment |
464 | // operator because value_type might not be assignable (it's often |
465 | // pair<const X, Y>). We use explicit destructor invocation and |
466 | // placement new to get around this. Arg. |
467 | void set_value(pointer dst, const_reference src) { |
468 | dst->~value_type(); // delete the old value, if any |
469 | new (dst) value_type(src); |
470 | } |
471 | |
472 | // This is used as a tag for the copy constructor, saying to destroy its |
473 | // arg We have two ways of destructively copying: with potentially growing |
474 | // the hashtable as we copy, and without. To make sure the outside world |
475 | // can't do a destructive copy, we make the typename private. |
476 | enum MoveDontCopyT { MoveDontCopy, MoveDontGrow }; |
477 | |
478 | // DELETE HELPER FUNCTIONS |
479 | // This lets the user describe a key that will indicate deleted |
480 | // table entries. This key should be an "impossible" entry -- |
481 | // if you try to insert it for real, you won't be able to retrieve it! |
482 | // (NB: while you pass in an entire value, only the key part is looked |
483 | // at. This is just because I don't know how to assign just a key.) |
484 | private: |
485 | void squash_deleted() { // gets rid of any deleted entries we have |
486 | if (num_deleted) { // get rid of deleted before writing |
487 | sparse_hashtable tmp(MoveDontGrow, *this); |
488 | swap(tmp); // now we are tmp |
489 | } |
490 | assert(num_deleted == 0); |
491 | } |
492 | |
493 | // Test if the given key is the deleted indicator. Requires |
494 | // num_deleted > 0, for correctness of read(), and because that |
495 | // guarantees that key_info.delkey is valid. |
496 | bool test_deleted_key(const key_type& key) const { |
497 | assert(num_deleted > 0); |
498 | return equals(key_info.delkey, key); |
499 | } |
500 | |
501 | public: |
502 | void set_deleted_key(const key_type& key) { |
503 | // It's only safe to change what "deleted" means if we purge deleted |
504 | // guys |
505 | squash_deleted(); |
506 | settings.set_use_deleted(true); |
507 | key_info.delkey = key; |
508 | } |
509 | void clear_deleted_key() { |
510 | squash_deleted(); |
511 | settings.set_use_deleted(false); |
512 | } |
513 | key_type deleted_key() const { |
514 | assert(settings.use_deleted() && |
515 | "Must set deleted key before calling deleted_key" ); |
516 | return key_info.delkey; |
517 | } |
518 | |
519 | // These are public so the iterators can use them |
520 | // True if the item at position bucknum is "deleted" marker |
521 | bool test_deleted(size_type bucknum) const { |
522 | // Invariant: !use_deleted() implies num_deleted is 0. |
523 | assert(settings.use_deleted() || num_deleted == 0); |
524 | return num_deleted > 0 && table.test(bucknum) && |
525 | test_deleted_key(get_key(table.unsafe_get(bucknum))); |
526 | } |
527 | bool test_deleted(const iterator& it) const { |
528 | // Invariant: !use_deleted() implies num_deleted is 0. |
529 | assert(settings.use_deleted() || num_deleted == 0); |
530 | return num_deleted > 0 && test_deleted_key(get_key(*it)); |
531 | } |
532 | bool test_deleted(const const_iterator& it) const { |
533 | // Invariant: !use_deleted() implies num_deleted is 0. |
534 | assert(settings.use_deleted() || num_deleted == 0); |
535 | return num_deleted > 0 && test_deleted_key(get_key(*it)); |
536 | } |
537 | bool test_deleted(const destructive_iterator& it) const { |
538 | // Invariant: !use_deleted() implies num_deleted is 0. |
539 | assert(settings.use_deleted() || num_deleted == 0); |
540 | return num_deleted > 0 && test_deleted_key(get_key(*it)); |
541 | } |
542 | |
543 | private: |
544 | void check_use_deleted(const char* caller) { |
545 | (void)caller; // could log it if the assert failed |
546 | assert(settings.use_deleted()); |
547 | } |
548 | |
549 | // Set it so test_deleted is true. true if object didn't used to be |
550 | // deleted. |
551 | // TODO(csilvers): make these private (also in densehashtable.h) |
552 | bool set_deleted(iterator& it) { |
553 | check_use_deleted("set_deleted()" ); |
554 | bool retval = !test_deleted(it); |
555 | // &* converts from iterator to value-type. |
556 | set_key(&(*it), key_info.delkey); |
557 | return retval; |
558 | } |
559 | // Set it so test_deleted is false. true if object used to be deleted. |
560 | bool clear_deleted(iterator& it) { |
561 | check_use_deleted("clear_deleted()" ); |
562 | // Happens automatically when we assign something else in its place. |
563 | return test_deleted(it); |
564 | } |
565 | |
566 | // We also allow to set/clear the deleted bit on a const iterator. |
567 | // We allow a const_iterator for the same reason you can delete a |
568 | // const pointer: it's convenient, and semantically you can't use |
569 | // 'it' after it's been deleted anyway, so its const-ness doesn't |
570 | // really matter. |
571 | bool set_deleted(const_iterator& it) { |
572 | check_use_deleted("set_deleted()" ); |
573 | bool retval = !test_deleted(it); |
574 | set_key(const_cast<pointer>(&(*it)), key_info.delkey); |
575 | return retval; |
576 | } |
577 | // Set it so test_deleted is false. true if object used to be deleted. |
578 | bool clear_deleted(const_iterator& it) { |
579 | check_use_deleted("clear_deleted()" ); |
580 | return test_deleted(it); |
581 | } |
582 | |
583 | // FUNCTIONS CONCERNING SIZE |
584 | public: |
585 | size_type size() const { return table.num_nonempty() - num_deleted; } |
586 | size_type max_size() const { return table.max_size(); } |
587 | bool empty() const { return size() == 0; } |
588 | size_type bucket_count() const { return table.size(); } |
589 | size_type max_bucket_count() const { return max_size(); } |
590 | // These are tr1 methods. Their idea of 'bucket' doesn't map well to |
591 | // what we do. We just say every bucket has 0 or 1 items in it. |
592 | size_type bucket_size(size_type i) const { |
593 | return begin(i) == end(i) ? 0 : 1; |
594 | } |
595 | |
596 | private: |
597 | // Because of the above, size_type(-1) is never legal; use it for errors |
598 | static const size_type ILLEGAL_BUCKET = size_type(-1); |
599 | |
600 | // Used after a string of deletes. Returns true if we actually shrunk. |
601 | // TODO(csilvers): take a delta so we can take into account inserts |
602 | // done after shrinking. Maybe make part of the Settings class? |
603 | bool maybe_shrink() { |
604 | assert(table.num_nonempty() >= num_deleted); |
605 | assert((bucket_count() & (bucket_count() - 1)) == 0); // is a power of two |
606 | assert(bucket_count() >= HT_MIN_BUCKETS); |
607 | bool retval = false; |
608 | |
609 | // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS, |
610 | // we'll never shrink until you get relatively big, and we'll never |
611 | // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something |
612 | // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will |
613 | // shrink us down to HT_MIN_BUCKETS buckets, which is too small. |
614 | const size_type num_remain = table.num_nonempty() - num_deleted; |
615 | const size_type shrink_threshold = settings.shrink_threshold(); |
616 | if (shrink_threshold > 0 && num_remain < shrink_threshold && |
617 | bucket_count() > HT_DEFAULT_STARTING_BUCKETS) { |
618 | const float shrink_factor = settings.shrink_factor(); |
619 | size_type sz = bucket_count() / 2; // find how much we should shrink |
620 | while (sz > HT_DEFAULT_STARTING_BUCKETS && |
621 | num_remain < static_cast<size_type>(sz * shrink_factor)) { |
622 | sz /= 2; // stay a power of 2 |
623 | } |
624 | sparse_hashtable tmp(MoveDontCopy, *this, sz); |
625 | swap(tmp); // now we are tmp |
626 | retval = true; |
627 | } |
628 | settings.set_consider_shrink(false); // because we just considered it |
629 | return retval; |
630 | } |
631 | |
632 | // We'll let you resize a hashtable -- though this makes us copy all! |
633 | // When you resize, you say, "make it big enough for this many more |
634 | // elements" |
635 | // Returns true if we actually resized, false if size was already ok. |
636 | bool resize_delta(size_type delta) { |
637 | bool did_resize = false; |
638 | if (settings.consider_shrink()) { // see if lots of deletes happened |
639 | if (maybe_shrink()) did_resize = true; |
640 | } |
641 | if (table.num_nonempty() >= |
642 | (std::numeric_limits<size_type>::max)() - delta) { |
643 | throw std::length_error("resize overflow" ); |
644 | } |
645 | if (bucket_count() >= HT_MIN_BUCKETS && |
646 | (table.num_nonempty() + delta) <= settings.enlarge_threshold()) |
647 | return did_resize; // we're ok as we are |
648 | |
649 | // Sometimes, we need to resize just to get rid of all the |
650 | // "deleted" buckets that are clogging up the hashtable. So when |
651 | // deciding whether to resize, count the deleted buckets (which |
652 | // are currently taking up room). But later, when we decide what |
653 | // size to resize to, *don't* count deleted buckets, since they |
654 | // get discarded during the resize. |
655 | const size_type needed_size = |
656 | settings.min_buckets(table.num_nonempty() + delta, 0); |
657 | if (needed_size <= bucket_count()) // we have enough buckets |
658 | return did_resize; |
659 | |
660 | size_type resize_to = settings.min_buckets( |
661 | table.num_nonempty() - num_deleted + delta, bucket_count()); |
662 | if (resize_to < needed_size && // may double resize_to |
663 | resize_to < (std::numeric_limits<size_type>::max)() / 2) { |
664 | // This situation means that we have enough deleted elements, |
665 | // that once we purge them, we won't actually have needed to |
666 | // grow. But we may want to grow anyway: if we just purge one |
667 | // element, say, we'll have to grow anyway next time we |
668 | // insert. Might as well grow now, since we're already going |
669 | // through the trouble of copying (in order to purge the |
670 | // deleted elements). |
671 | const size_type target = |
672 | static_cast<size_type>(settings.shrink_size(resize_to * 2)); |
673 | if (table.num_nonempty() - num_deleted + delta >= target) { |
674 | // Good, we won't be below the shrink threshhold even if we |
675 | // double. |
676 | resize_to *= 2; |
677 | } |
678 | } |
679 | |
680 | sparse_hashtable tmp(MoveDontCopy, *this, resize_to); |
681 | swap(tmp); // now we are tmp |
682 | return true; |
683 | } |
684 | |
685 | // Used to actually do the rehashing when we grow/shrink a hashtable |
686 | void copy_from(const sparse_hashtable& ht, size_type min_buckets_wanted) { |
687 | clear(); // clear table, set num_deleted to 0 |
688 | |
689 | // If we need to change the size of our table, do it now |
690 | const size_type resize_to = |
691 | settings.min_buckets(ht.size(), min_buckets_wanted); |
692 | if (resize_to > bucket_count()) { // we don't have enough buckets |
693 | table.resize(resize_to); // sets the number of buckets |
694 | settings.reset_thresholds(bucket_count()); |
695 | } |
696 | |
697 | // We use a normal iterator to get non-deleted bcks from ht |
698 | // We could use insert() here, but since we know there are |
699 | // no duplicates and no deleted items, we can be more efficient |
700 | assert((bucket_count() & (bucket_count() - 1)) == 0); // a power of two |
701 | for (const_iterator it = ht.begin(); it != ht.end(); ++it) { |
702 | size_type num_probes = 0; // how many times we've probed |
703 | size_type bucknum; |
704 | const size_type bucket_count_minus_one = bucket_count() - 1; |
705 | for (bucknum = hash(get_key(*it)) & bucket_count_minus_one; |
706 | table.test(bucknum); // not empty |
707 | bucknum = |
708 | (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) { |
709 | ++num_probes; |
710 | assert(num_probes < bucket_count() && |
711 | "Hashtable is full: an error in key_equal<> or hash<>" ); |
712 | } |
713 | table.set(bucknum, *it); // copies the value to here |
714 | } |
715 | settings.inc_num_ht_copies(); |
716 | } |
717 | |
718 | // Implementation is like copy_from, but it destroys the table of the |
719 | // "from" guy by freeing sparsetable memory as we iterate. This is |
720 | // useful in resizing, since we're throwing away the "from" guy anyway. |
721 | void move_from(MoveDontCopyT mover, sparse_hashtable& ht, |
722 | size_type min_buckets_wanted) { |
723 | clear(); // clear table, set num_deleted to 0 |
724 | |
725 | // If we need to change the size of our table, do it now |
726 | size_type resize_to; |
727 | if (mover == MoveDontGrow) |
728 | resize_to = ht.bucket_count(); // keep same size as old ht |
729 | else // MoveDontCopy |
730 | resize_to = settings.min_buckets(ht.size(), min_buckets_wanted); |
731 | if (resize_to > bucket_count()) { // we don't have enough buckets |
732 | table.resize(resize_to); // sets the number of buckets |
733 | settings.reset_thresholds(bucket_count()); |
734 | } |
735 | |
736 | // We use a normal iterator to get non-deleted bcks from ht |
737 | // We could use insert() here, but since we know there are |
738 | // no duplicates and no deleted items, we can be more efficient |
739 | assert((bucket_count() & (bucket_count() - 1)) == 0); // a power of two |
740 | // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM(): |
741 | for (destructive_iterator it = ht.destructive_begin(); |
742 | it != ht.destructive_end(); ++it) { |
743 | size_type num_probes = 0; // how many times we've probed |
744 | size_type bucknum; |
745 | for (bucknum = hash(get_key(*it)) & (bucket_count() - 1); // h % buck_cnt |
746 | table.test(bucknum); // not empty |
747 | bucknum = |
748 | (bucknum + JUMP_(key, num_probes)) & (bucket_count() - 1)) { |
749 | ++num_probes; |
750 | assert(num_probes < bucket_count() && |
751 | "Hashtable is full: an error in key_equal<> or hash<>" ); |
752 | } |
753 | table.set(bucknum, *it); // copies the value to here |
754 | } |
755 | settings.inc_num_ht_copies(); |
756 | } |
757 | |
758 | // Required by the spec for hashed associative container |
759 | public: |
760 | // Though the docs say this should be num_buckets, I think it's much |
761 | // more useful as num_elements. As a special feature, calling with |
762 | // req_elements==0 will cause us to shrink if we can, saving space. |
763 | void resize(size_type req_elements) { // resize to this or larger |
764 | if (settings.consider_shrink() || req_elements == 0) maybe_shrink(); |
765 | if (req_elements > table.num_nonempty()) // we only grow |
766 | resize_delta(req_elements - table.num_nonempty()); |
767 | } |
768 | |
769 | // Get and change the value of shrink_factor and enlarge_factor. The |
770 | // description at the beginning of this file explains how to choose |
771 | // the values. Setting the shrink parameter to 0.0 ensures that the |
772 | // table never shrinks. |
773 | void get_resizing_parameters(float* shrink, float* grow) const { |
774 | *shrink = settings.shrink_factor(); |
775 | *grow = settings.enlarge_factor(); |
776 | } |
777 | void set_resizing_parameters(float shrink, float grow) { |
778 | settings.set_resizing_parameters(shrink, grow); |
779 | settings.reset_thresholds(bucket_count()); |
780 | } |
781 | |
782 | // CONSTRUCTORS -- as required by the specs, we take a size, |
783 | // but also let you specify a hashfunction, key comparator, |
784 | // and key extractor. We also define a copy constructor and =. |
785 | // DESTRUCTOR -- the default is fine, surprisingly. |
786 | explicit sparse_hashtable(size_type expected_max_items_in_table = 0, |
787 | const HashFcn& hf = HashFcn(), |
788 | const EqualKey& eql = EqualKey(), |
789 | const ExtractKey& ext = ExtractKey(), |
790 | const SetKey& set = SetKey(), |
791 | const Alloc& alloc = Alloc()) |
792 | : settings(hf), |
793 | key_info(ext, set, eql), |
794 | num_deleted(0), |
795 | table((expected_max_items_in_table == 0 |
796 | ? HT_DEFAULT_STARTING_BUCKETS |
797 | : settings.min_buckets(expected_max_items_in_table, 0)), |
798 | alloc) { |
799 | settings.reset_thresholds(bucket_count()); |
800 | } |
801 | |
802 | // As a convenience for resize(), we allow an optional second argument |
803 | // which lets you make this new hashtable a different size than ht. |
804 | // We also provide a mechanism of saying you want to "move" the ht argument |
805 | // into us instead of copying. |
806 | sparse_hashtable(const sparse_hashtable& ht, |
807 | size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) |
808 | : settings(ht.settings), |
809 | key_info(ht.key_info), |
810 | num_deleted(0), |
811 | table(0, ht.get_allocator()) { |
812 | settings.reset_thresholds(bucket_count()); |
813 | copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries |
814 | } |
815 | sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht, |
816 | size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) |
817 | : settings(ht.settings), |
818 | key_info(ht.key_info), |
819 | num_deleted(0), |
820 | table(0, ht.get_allocator()) { |
821 | settings.reset_thresholds(bucket_count()); |
822 | move_from(mover, ht, min_buckets_wanted); // ignores deleted entries |
823 | } |
824 | |
825 | sparse_hashtable& operator=(const sparse_hashtable& ht) { |
826 | if (&ht == this) return *this; // don't copy onto ourselves |
827 | settings = ht.settings; |
828 | key_info = ht.key_info; |
829 | num_deleted = ht.num_deleted; |
830 | // copy_from() calls clear and sets num_deleted to 0 too |
831 | copy_from(ht, HT_MIN_BUCKETS); |
832 | // we purposefully don't copy the allocator, which may not be copyable |
833 | return *this; |
834 | } |
835 | |
836 | // Many STL algorithms use swap instead of copy constructors |
837 | void swap(sparse_hashtable& ht) { |
838 | std::swap(settings, ht.settings); |
839 | std::swap(key_info, ht.key_info); |
840 | std::swap(num_deleted, ht.num_deleted); |
841 | table.swap(ht.table); |
842 | settings.reset_thresholds(bucket_count()); // also resets consider_shrink |
843 | ht.settings.reset_thresholds(ht.bucket_count()); |
844 | // we purposefully don't swap the allocator, which may not be swap-able |
845 | } |
846 | |
847 | // It's always nice to be able to clear a table without deallocating it |
848 | void clear() { |
849 | if (!empty() || (num_deleted != 0)) { |
850 | table.clear(); |
851 | } |
852 | settings.reset_thresholds(bucket_count()); |
853 | num_deleted = 0; |
854 | } |
855 | |
856 | // LOOKUP ROUTINES |
857 | private: |
858 | // Returns a pair of positions: 1st where the object is, 2nd where |
859 | // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET |
860 | // if object is not found; 2nd is ILLEGAL_BUCKET if it is. |
861 | // Note: because of deletions where-to-insert is not trivial: it's the |
862 | // first deleted bucket we see, as long as we don't find the key later |
863 | template <typename K> |
864 | std::pair<size_type, size_type> find_position(const K& key) const { |
865 | size_type num_probes = 0; // how many times we've probed |
866 | const size_type bucket_count_minus_one = bucket_count() - 1; |
867 | size_type bucknum = hash(key) & bucket_count_minus_one; |
868 | size_type insert_pos = ILLEGAL_BUCKET; // where we would insert |
869 | SPARSEHASH_STAT_UPDATE(total_lookups += 1); |
870 | while (1) { // probe until something happens |
871 | if (!table.test(bucknum)) { // bucket is empty |
872 | SPARSEHASH_STAT_UPDATE(total_probes += num_probes); |
873 | if (insert_pos == ILLEGAL_BUCKET) // found no prior place to insert |
874 | return std::pair<size_type, size_type>(ILLEGAL_BUCKET, bucknum); |
875 | else |
876 | return std::pair<size_type, size_type>(ILLEGAL_BUCKET, insert_pos); |
877 | } else if (test_deleted(bucknum)) { // keep searching, but mark to insert |
878 | if (insert_pos == ILLEGAL_BUCKET) insert_pos = bucknum; |
879 | } else if (equals(key, get_key(table.unsafe_get(bucknum)))) { |
880 | SPARSEHASH_STAT_UPDATE(total_probes += num_probes); |
881 | return std::pair<size_type, size_type>(bucknum, ILLEGAL_BUCKET); |
882 | } |
883 | ++num_probes; // we're doing another probe |
884 | bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; |
885 | assert(num_probes < bucket_count() && |
886 | "Hashtable is full: an error in key_equal<> or hash<>" ); |
887 | } |
888 | } |
889 | |
890 | public: |
891 | template <typename K> |
892 | iterator find(const K& key) { |
893 | if (size() == 0) return end(); |
894 | std::pair<size_type, size_type> pos = find_position(key); |
895 | if (pos.first == ILLEGAL_BUCKET) // alas, not there |
896 | return end(); |
897 | else |
898 | return iterator(this, table.get_iter(pos.first), table.nonempty_end()); |
899 | } |
900 | |
901 | template <typename K> |
902 | const_iterator find(const K& key) const { |
903 | if (size() == 0) return end(); |
904 | std::pair<size_type, size_type> pos = find_position(key); |
905 | if (pos.first == ILLEGAL_BUCKET) // alas, not there |
906 | return end(); |
907 | else |
908 | return const_iterator(this, table.get_iter(pos.first), |
909 | table.nonempty_end()); |
910 | } |
911 | |
912 | // This is a tr1 method: the bucket a given key is in, or what bucket |
913 | // it would be put in, if it were to be inserted. Shrug. |
914 | size_type bucket(const key_type& key) const { |
915 | std::pair<size_type, size_type> pos = find_position(key); |
916 | return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first; |
917 | } |
918 | |
919 | // Counts how many elements have key key. For maps, it's either 0 or 1. |
920 | template <typename K> |
921 | size_type count(const K& key) const { |
922 | std::pair<size_type, size_type> pos = find_position(key); |
923 | return pos.first == ILLEGAL_BUCKET ? 0 : 1; |
924 | } |
925 | |
926 | // Likewise, equal_range doesn't really make sense for us. Oh well. |
927 | template <typename K> |
928 | std::pair<iterator, iterator> equal_range(const K& key) { |
929 | iterator pos = find(key); // either an iterator or end |
930 | if (pos == end()) { |
931 | return std::pair<iterator, iterator>(pos, pos); |
932 | } else { |
933 | const iterator startpos = pos++; |
934 | return std::pair<iterator, iterator>(startpos, pos); |
935 | } |
936 | } |
937 | template <typename K> |
938 | std::pair<const_iterator, const_iterator> equal_range( |
939 | const K& key) const { |
940 | const_iterator pos = find(key); // either an iterator or end |
941 | if (pos == end()) { |
942 | return std::pair<const_iterator, const_iterator>(pos, pos); |
943 | } else { |
944 | const const_iterator startpos = pos++; |
945 | return std::pair<const_iterator, const_iterator>(startpos, pos); |
946 | } |
947 | } |
948 | |
949 | // INSERTION ROUTINES |
950 | private: |
951 | // Private method used by insert_noresize and find_or_insert. |
952 | iterator insert_at(const_reference obj, size_type pos) { |
953 | if (size() >= max_size()) { |
954 | throw std::length_error("insert overflow" ); |
955 | } |
956 | if (test_deleted(pos)) { // just replace if it's been deleted |
957 | // The set() below will undelete this object. We just worry about |
958 | // stats |
959 | assert(num_deleted > 0); |
960 | --num_deleted; // used to be, now it isn't |
961 | } |
962 | table.set(pos, obj); |
963 | return iterator(this, table.get_iter(pos), table.nonempty_end()); |
964 | } |
965 | |
966 | // If you know *this is big enough to hold obj, use this routine |
967 | std::pair<iterator, bool> insert_noresize(const_reference obj) { |
968 | // First, double-check we're not inserting delkey |
969 | assert( |
970 | (!settings.use_deleted() || !equals(get_key(obj), key_info.delkey)) && |
971 | "Inserting the deleted key" ); |
972 | const std::pair<size_type, size_type> pos = find_position(get_key(obj)); |
973 | if (pos.first != ILLEGAL_BUCKET) { // object was already there |
974 | return std::pair<iterator, bool>( |
975 | iterator(this, table.get_iter(pos.first), table.nonempty_end()), |
976 | false); // false: we didn't insert |
977 | } else { // pos.second says where to put it |
978 | return std::pair<iterator, bool>(insert_at(obj, pos.second), true); |
979 | } |
980 | } |
981 | |
982 | // Specializations of insert(it, it) depending on the power of the iterator: |
983 | // (1) Iterator supports operator-, resize before inserting |
984 | template <class ForwardIterator> |
985 | void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) { |
986 | size_t dist = std::distance(f, l); |
987 | if (dist >= (std::numeric_limits<size_type>::max)()) { |
988 | throw std::length_error("insert-range overflow" ); |
989 | } |
990 | resize_delta(static_cast<size_type>(dist)); |
991 | for (; dist > 0; --dist, ++f) { |
992 | insert_noresize(*f); |
993 | } |
994 | } |
995 | |
996 | // (2) Arbitrary iterator, can't tell how much to resize |
997 | template <class InputIterator> |
998 | void insert(InputIterator f, InputIterator l, std::input_iterator_tag) { |
999 | for (; f != l; ++f) insert(*f); |
1000 | } |
1001 | |
1002 | public: |
1003 | // This is the normal insert routine, used by the outside world |
1004 | std::pair<iterator, bool> insert(const_reference obj) { |
1005 | resize_delta(1); // adding an object, grow if need be |
1006 | return insert_noresize(obj); |
1007 | } |
1008 | |
1009 | // When inserting a lot at a time, we specialize on the type of iterator |
1010 | template <class InputIterator> |
1011 | void insert(InputIterator f, InputIterator l) { |
1012 | // specializes on iterator type |
1013 | insert(f, l, |
1014 | typename std::iterator_traits<InputIterator>::iterator_category()); |
1015 | } |
1016 | |
1017 | // DefaultValue is a functor that takes a key and returns a value_type |
1018 | // representing the default value to be inserted if none is found. |
1019 | template <class DefaultValue> |
1020 | value_type& find_or_insert(const key_type& key) { |
1021 | // First, double-check we're not inserting delkey |
1022 | assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && |
1023 | "Inserting the deleted key" ); |
1024 | const std::pair<size_type, size_type> pos = find_position(key); |
1025 | DefaultValue default_value; |
1026 | if (pos.first != ILLEGAL_BUCKET) { // object was already there |
1027 | return *table.get_iter(pos.first); |
1028 | } else if (resize_delta(1)) { // needed to rehash to make room |
1029 | // Since we resized, we can't use pos, so recalculate where to |
1030 | // insert. |
1031 | return *insert_noresize(default_value(key)).first; |
1032 | } else { // no need to rehash, insert right here |
1033 | return *insert_at(default_value(key), pos.second); |
1034 | } |
1035 | } |
1036 | |
1037 | // DELETION ROUTINES |
1038 | size_type erase(const key_type& key) { |
1039 | // First, double-check we're not erasing delkey. |
1040 | assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && |
1041 | "Erasing the deleted key" ); |
1042 | assert(!settings.use_deleted() || !equals(key, key_info.delkey)); |
1043 | const_iterator pos = find(key); // shrug: shouldn't need to be const |
1044 | if (pos != end()) { |
1045 | assert(!test_deleted(pos)); // or find() shouldn't have returned it |
1046 | set_deleted(pos); |
1047 | ++num_deleted; |
1048 | // will think about shrink after next insert |
1049 | settings.set_consider_shrink(true); |
1050 | return 1; // because we deleted one thing |
1051 | } else { |
1052 | return 0; // because we deleted nothing |
1053 | } |
1054 | } |
1055 | |
1056 | // We return the iterator past the deleted item. |
1057 | void erase(iterator pos) { |
1058 | if (pos == end()) return; // sanity check |
1059 | if (set_deleted(pos)) { // true if object has been newly deleted |
1060 | ++num_deleted; |
1061 | // will think about shrink after next insert |
1062 | settings.set_consider_shrink(true); |
1063 | } |
1064 | } |
1065 | |
1066 | void erase(iterator f, iterator l) { |
1067 | for (; f != l; ++f) { |
1068 | if (set_deleted(f)) // should always be true |
1069 | ++num_deleted; |
1070 | } |
1071 | // will think about shrink after next insert |
1072 | settings.set_consider_shrink(true); |
1073 | } |
1074 | |
1075 | // We allow you to erase a const_iterator just like we allow you to |
1076 | // erase an iterator. This is in parallel to 'delete': you can delete |
1077 | // a const pointer just like a non-const pointer. The logic is that |
1078 | // you can't use the object after it's erased anyway, so it doesn't matter |
1079 | // if it's const or not. |
1080 | void erase(const_iterator pos) { |
1081 | if (pos == end()) return; // sanity check |
1082 | if (set_deleted(pos)) { // true if object has been newly deleted |
1083 | ++num_deleted; |
1084 | // will think about shrink after next insert |
1085 | settings.set_consider_shrink(true); |
1086 | } |
1087 | } |
1088 | void erase(const_iterator f, const_iterator l) { |
1089 | for (; f != l; ++f) { |
1090 | if (set_deleted(f)) // should always be true |
1091 | ++num_deleted; |
1092 | } |
1093 | // will think about shrink after next insert |
1094 | settings.set_consider_shrink(true); |
1095 | } |
1096 | |
1097 | // COMPARISON |
1098 | bool operator==(const sparse_hashtable& ht) const { |
1099 | if (size() != ht.size()) { |
1100 | return false; |
1101 | } else if (this == &ht) { |
1102 | return true; |
1103 | } else { |
1104 | // Iterate through the elements in "this" and see if the |
1105 | // corresponding element is in ht |
1106 | for (const_iterator it = begin(); it != end(); ++it) { |
1107 | const_iterator it2 = ht.find(get_key(*it)); |
1108 | if ((it2 == ht.end()) || (*it != *it2)) { |
1109 | return false; |
1110 | } |
1111 | } |
1112 | return true; |
1113 | } |
1114 | } |
1115 | bool operator!=(const sparse_hashtable& ht) const { return !(*this == ht); } |
1116 | |
1117 | // I/O |
1118 | // We support reading and writing hashtables to disk. NOTE that |
1119 | // this only stores the hashtable metadata, not the stuff you've |
1120 | // actually put in the hashtable! Alas, since I don't know how to |
1121 | // write a hasher or key_equal, you have to make sure everything |
1122 | // but the table is the same. We compact before writing. |
1123 | // |
1124 | // The OUTPUT type needs to support a Write() operation. File and |
1125 | // OutputBuffer are appropriate types to pass in. |
1126 | // |
1127 | // The INPUT type needs to support a Read() operation. File and |
1128 | // InputBuffer are appropriate types to pass in. |
1129 | template <typename OUTPUT> |
1130 | bool write_metadata(OUTPUT* fp) { |
1131 | squash_deleted(); // so we don't have to worry about delkey |
1132 | return table.write_metadata(fp); |
1133 | } |
1134 | |
1135 | template <typename INPUT> |
1136 | bool read_metadata(INPUT* fp) { |
1137 | num_deleted = 0; // since we got rid before writing |
1138 | const bool result = table.read_metadata(fp); |
1139 | settings.reset_thresholds(bucket_count()); |
1140 | return result; |
1141 | } |
1142 | |
1143 | // Only meaningful if value_type is a POD. |
1144 | template <typename OUTPUT> |
1145 | bool write_nopointer_data(OUTPUT* fp) { |
1146 | return table.write_nopointer_data(fp); |
1147 | } |
1148 | |
1149 | // Only meaningful if value_type is a POD. |
1150 | template <typename INPUT> |
1151 | bool read_nopointer_data(INPUT* fp) { |
1152 | return table.read_nopointer_data(fp); |
1153 | } |
1154 | |
1155 | // INPUT and OUTPUT must be either a FILE, *or* a C++ stream |
1156 | // (istream, ostream, etc) *or* a class providing |
1157 | // Read(void*, size_t) and Write(const void*, size_t) |
1158 | // (respectively), which writes a buffer into a stream |
1159 | // (which the INPUT/OUTPUT instance presumably owns). |
1160 | |
1161 | typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer; |
1162 | |
1163 | // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&) |
1164 | template <typename ValueSerializer, typename OUTPUT> |
1165 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { |
1166 | squash_deleted(); // so we don't have to worry about delkey |
1167 | return table.serialize(serializer, fp); |
1168 | } |
1169 | |
1170 | // ValueSerializer: a functor. operator()(INPUT*, value_type*) |
1171 | template <typename ValueSerializer, typename INPUT> |
1172 | bool unserialize(ValueSerializer serializer, INPUT* fp) { |
1173 | num_deleted = 0; // since we got rid before writing |
1174 | const bool result = table.unserialize(serializer, fp); |
1175 | settings.reset_thresholds(bucket_count()); |
1176 | return result; |
1177 | } |
1178 | |
1179 | private: |
1180 | // Table is the main storage class. |
1181 | typedef sparsetable<value_type, DEFAULT_GROUP_SIZE, value_alloc_type> Table; |
1182 | |
1183 | // Package templated functors with the other types to eliminate memory |
1184 | // needed for storing these zero-size operators. Since ExtractKey and |
1185 | // hasher's operator() might have the same function signature, they |
1186 | // must be packaged in different classes. |
1187 | struct Settings |
1188 | : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type, |
1189 | HT_MIN_BUCKETS> { |
1190 | explicit Settings(const hasher& hf) |
1191 | : sparsehash_internal::sh_hashtable_settings<key_type, hasher, |
1192 | size_type, HT_MIN_BUCKETS>( |
1193 | hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {} |
1194 | }; |
1195 | |
1196 | // KeyInfo stores delete key and packages zero-size functors: |
1197 | // ExtractKey and SetKey. |
1198 | class KeyInfo : public ExtractKey, public SetKey, public EqualKey { |
1199 | public: |
1200 | KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq) |
1201 | : ExtractKey(ek), SetKey(sk), EqualKey(eq) {} |
1202 | // We want to return the exact same type as ExtractKey: Key or const |
1203 | // Key& |
1204 | typename ExtractKey::result_type get_key(const_reference v) const { |
1205 | return ExtractKey::operator()(v); |
1206 | } |
1207 | void set_key(pointer v, const key_type& k) const { |
1208 | SetKey::operator()(v, k); |
1209 | } |
1210 | template <typename K1, typename K2> |
1211 | bool equals(const K1& a, const K2& b) const { |
1212 | return EqualKey::operator()(a, b); |
1213 | } |
1214 | |
1215 | // Which key marks deleted entries. |
1216 | // TODO(csilvers): make a pointer, and get rid of use_deleted |
1217 | // (benchmark!) |
1218 | typename std::remove_const<key_type>::type delkey; |
1219 | }; |
1220 | |
1221 | // Utility functions to access the templated operators |
1222 | template <typename K> |
1223 | size_type hash(const K& v) const { return settings.hash(v); } |
1224 | template <typename K1, typename K2> |
1225 | bool equals(const K1& a, const K2& b) const { |
1226 | return key_info.equals(a, b); |
1227 | } |
1228 | typename ExtractKey::result_type get_key(const_reference v) const { |
1229 | return key_info.get_key(v); |
1230 | } |
1231 | void set_key(pointer v, const key_type& k) const { key_info.set_key(v, k); } |
1232 | |
1233 | private: |
1234 | // Actual data |
1235 | Settings settings; |
1236 | KeyInfo key_info; |
1237 | size_type num_deleted; // how many occupied buckets are marked deleted |
1238 | Table table; // holds num_buckets and num_elements too |
1239 | }; |
1240 | |
1241 | // We need a global swap as well |
1242 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
1243 | inline void swap(sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>& x, |
1244 | sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>& y) { |
1245 | x.swap(y); |
1246 | } |
1247 | |
1248 | #undef JUMP_ |
1249 | |
1250 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
1251 | const typename sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::size_type |
1252 | sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::ILLEGAL_BUCKET; |
1253 | |
1254 | // How full we let the table get before we resize. Knuth says .8 is |
1255 | // good -- higher causes us to probe too much, though saves memory |
1256 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
1257 | const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = 80; |
1258 | |
1259 | // How empty we let the table get before we resize lower. |
1260 | // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing |
1261 | template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> |
1262 | const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT = |
1263 | static_cast<int>( |
1264 | 0.4 * sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT); |
1265 | } |
1266 | |