1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31//
32// A sparse hashtable is a particular implementation of
33// a hashtable: one that is meant to minimize memory use.
34// It does this by using a *sparse table* (cf sparsetable.h),
35// which uses between 1 and 2 bits to store empty buckets
36// (we may need another bit for hashtables that support deletion).
37//
38// When empty buckets are so cheap, an appealing hashtable
39// implementation is internal probing, in which the hashtable
40// is a single table, and collisions are resolved by trying
41// to insert again in another bucket. The most cache-efficient
42// internal probing schemes are linear probing (which suffers,
43// alas, from clumping) and quadratic probing, which is what
44// we implement by default.
45//
46// Deleted buckets are a bit of a pain. We have to somehow mark
47// deleted buckets (the probing must distinguish them from empty
48// buckets). The most principled way is to have another bitmap,
49// but that's annoying and takes up space. Instead we let the
50// user specify an "impossible" key. We set deleted buckets
51// to have the impossible key.
52//
53// Note it is possible to change the value of the delete key
54// on the fly; you can even remove it, though after that point
55// the hashtable is insert_only until you set it again.
56//
57// You probably shouldn't use this code directly. Use
58// sparse_hash_map<> or sparse_hash_set<> instead.
59//
60// You can modify the following, below:
61// HT_OCCUPANCY_PCT -- how full before we double size
62// HT_EMPTY_PCT -- how empty before we halve size
63// HT_MIN_BUCKETS -- smallest bucket size
64// HT_DEFAULT_STARTING_BUCKETS -- default bucket size at construct-time
65//
66// You can also change enlarge_factor (which defaults to
67// HT_OCCUPANCY_PCT), and shrink_factor (which defaults to
68// HT_EMPTY_PCT) with set_resizing_parameters().
69//
70// How to decide what values to use?
71// shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good.
72// HT_MIN_BUCKETS is probably unnecessary since you can specify
73// (indirectly) the starting number of buckets at construct-time.
74// For enlarge_factor, you can use this chart to try to trade-off
75// expected lookup time to the space taken up. By default, this
76// code uses quadratic probing, though you can change it to linear
77// via _JUMP below if you really want to.
78//
79// From
80// http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
81// NUMBER OF PROBES / LOOKUP Successful Unsuccessful
82// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
83// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
84//
85// -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
86// QUADRATIC COLLISION RES.
87// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
88// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
89// LINEAR COLLISION RES.
90// probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5
91// probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0
92//
93// The value type is required to be copy constructible and default
94// constructible, but it need not be (and commonly isn't) assignable.
95
96#pragma once
97
98#include <assert.h>
99#include <algorithm> // For swap(), eg
100#include <iterator> // for iterator tags
101#include <limits> // for numeric_limits
102#include <utility> // for pair
103#include <type_traits> // for remove_const
104#include <sparsehash/internal/hashtable-common.h>
105#include <sparsehash/sparsetable> // IWYU pragma: export
106#include <stdexcept> // For length_error
107
108namespace google {
109
110#ifndef SPARSEHASH_STAT_UPDATE
111#define SPARSEHASH_STAT_UPDATE(x) ((void)0)
112#endif
113
114// The probing method
115// Linear probing
116// #define JUMP_(key, num_probes) ( 1 )
117// Quadratic probing
118#define JUMP_(key, num_probes) (num_probes)
119
120// The smaller this is, the faster lookup is (because the group bitmap is
121// smaller) and the faster insert is, because there's less to move.
122// On the other hand, there are more groups. Since group::size_type is
123// a short, this number should be of the form 32*x + 16 to avoid waste.
124static const uint16_t DEFAULT_GROUP_SIZE = 48; // fits in 1.5 words
125
126// Hashtable class, used to implement the hashed associative containers
127// hash_set and hash_map.
128//
129// Value: what is stored in the table (each bucket is a Value).
130// Key: something in a 1-to-1 correspondence to a Value, that can be used
131// to search for a Value in the table (find() takes a Key).
132// HashFcn: Takes a Key and returns an integer, the more unique the better.
133// ExtractKey: given a Value, returns the unique Key associated with it.
134// Must inherit from unary_function, or at least have a
135// result_type enum indicating the return type of operator().
136// SetKey: given a Value* and a Key, modifies the value such that
137// ExtractKey(value) == key. We guarantee this is only called
138// with key == deleted_key.
139// EqualKey: Given two Keys, says whether they are the same (that is,
140// if they are both associated with the same Value).
141// Alloc: STL allocator to use to allocate memory.
142
143template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey,
144 class EqualKey, class Alloc>
145class sparse_hashtable;
146
147template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
148struct sparse_hashtable_iterator;
149
150template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
151struct sparse_hashtable_const_iterator;
152
153// As far as iterating, we're basically just a sparsetable
154// that skips over deleted elements.
155template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
156struct sparse_hashtable_iterator {
157 private:
158 using value_alloc_type =
159 typename std::allocator_traits<A>::template rebind_alloc<V>;
160
161 public:
162 typedef sparse_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
163 typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
164 const_iterator;
165 typedef typename sparsetable<V, DEFAULT_GROUP_SIZE,
166 value_alloc_type>::nonempty_iterator st_iterator;
167
168 typedef std::forward_iterator_tag iterator_category; // very little defined!
169 typedef V value_type;
170 typedef typename value_alloc_type::difference_type difference_type;
171 typedef typename value_alloc_type::size_type size_type;
172 typedef typename value_alloc_type::reference reference;
173 typedef typename value_alloc_type::pointer pointer;
174
175 // "Real" constructor and default constructor
176 sparse_hashtable_iterator(
177 const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it,
178 st_iterator it_end)
179 : ht(h), pos(it), end(it_end) {
180 advance_past_deleted();
181 }
182 sparse_hashtable_iterator() {} // not ever used internally
183 // The default destructor is fine; we don't define one
184 // The default operator= is fine; we don't define one
185
186 // Happy dereferencer
187 reference operator*() const { return *pos; }
188 pointer operator->() const { return &(operator*()); }
189
190 // Arithmetic. The only hard part is making sure that
191 // we're not on a marked-deleted array element
192 void advance_past_deleted() {
193 while (pos != end && ht->test_deleted(*this)) ++pos;
194 }
195 iterator& operator++() {
196 assert(pos != end);
197 ++pos;
198 advance_past_deleted();
199 return *this;
200 }
201 iterator operator++(int) {
202 iterator tmp(*this);
203 ++*this;
204 return tmp;
205 }
206
207 // Comparison.
208 bool operator==(const iterator& it) const { return pos == it.pos; }
209 bool operator!=(const iterator& it) const { return pos != it.pos; }
210
211 // The actual data
212 const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht;
213 st_iterator pos, end;
214};
215
216// Now do it all again, but with const-ness!
217template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
218struct sparse_hashtable_const_iterator {
219 private:
220 using value_alloc_type =
221 typename std::allocator_traits<A>::template rebind_alloc<V>;
222
223 public:
224 typedef sparse_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
225 typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
226 const_iterator;
227 typedef typename sparsetable<V, DEFAULT_GROUP_SIZE,
228 value_alloc_type>::const_nonempty_iterator
229 st_iterator;
230
231 typedef std::forward_iterator_tag iterator_category; // very little defined!
232 typedef V value_type;
233 typedef typename value_alloc_type::difference_type difference_type;
234 typedef typename value_alloc_type::size_type size_type;
235 typedef typename value_alloc_type::const_reference reference;
236 typedef typename value_alloc_type::const_pointer pointer;
237
238 // "Real" constructor and default constructor
239 sparse_hashtable_const_iterator(
240 const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it,
241 st_iterator it_end)
242 : ht(h), pos(it), end(it_end) {
243 advance_past_deleted();
244 }
245 // This lets us convert regular iterators to const iterators
246 sparse_hashtable_const_iterator() {} // never used internally
247 sparse_hashtable_const_iterator(const iterator& it)
248 : ht(it.ht), pos(it.pos), end(it.end) {}
249 // The default destructor is fine; we don't define one
250 // The default operator= is fine; we don't define one
251
252 // Happy dereferencer
253 reference operator*() const { return *pos; }
254 pointer operator->() const { return &(operator*()); }
255
256 // Arithmetic. The only hard part is making sure that
257 // we're not on a marked-deleted array element
258 void advance_past_deleted() {
259 while (pos != end && ht->test_deleted(*this)) ++pos;
260 }
261 const_iterator& operator++() {
262 assert(pos != end);
263 ++pos;
264 advance_past_deleted();
265 return *this;
266 }
267 const_iterator operator++(int) {
268 const_iterator tmp(*this);
269 ++*this;
270 return tmp;
271 }
272
273 // Comparison.
274 bool operator==(const const_iterator& it) const { return pos == it.pos; }
275 bool operator!=(const const_iterator& it) const { return pos != it.pos; }
276
277 // The actual data
278 const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht;
279 st_iterator pos, end;
280};
281
282// And once again, but this time freeing up memory as we iterate
283template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
284struct sparse_hashtable_destructive_iterator {
285 private:
286 using value_alloc_type =
287 typename std::allocator_traits<A>::template rebind_alloc<V>;
288
289 public:
290 typedef sparse_hashtable_destructive_iterator<V, K, HF, ExK, SetK, EqK, A>
291 iterator;
292 typedef
293 typename sparsetable<V, DEFAULT_GROUP_SIZE,
294 value_alloc_type>::destructive_iterator st_iterator;
295
296 typedef std::forward_iterator_tag iterator_category; // very little defined!
297 typedef V value_type;
298 typedef typename value_alloc_type::difference_type difference_type;
299 typedef typename value_alloc_type::size_type size_type;
300 typedef typename value_alloc_type::reference reference;
301 typedef typename value_alloc_type::pointer pointer;
302
303 // "Real" constructor and default constructor
304 sparse_hashtable_destructive_iterator(
305 const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it,
306 st_iterator it_end)
307 : ht(h), pos(it), end(it_end) {
308 advance_past_deleted();
309 }
310 sparse_hashtable_destructive_iterator() {} // never used internally
311 // The default destructor is fine; we don't define one
312 // The default operator= is fine; we don't define one
313
314 // Happy dereferencer
315 reference operator*() const { return *pos; }
316 pointer operator->() const { return &(operator*()); }
317
318 // Arithmetic. The only hard part is making sure that
319 // we're not on a marked-deleted array element
320 void advance_past_deleted() {
321 while (pos != end && ht->test_deleted(*this)) ++pos;
322 }
323 iterator& operator++() {
324 assert(pos != end);
325 ++pos;
326 advance_past_deleted();
327 return *this;
328 }
329 iterator operator++(int) {
330 iterator tmp(*this);
331 ++*this;
332 return tmp;
333 }
334
335 // Comparison.
336 bool operator==(const iterator& it) const { return pos == it.pos; }
337 bool operator!=(const iterator& it) const { return pos != it.pos; }
338
339 // The actual data
340 const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht;
341 st_iterator pos, end;
342};
343
344template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey,
345 class EqualKey, class Alloc>
346class sparse_hashtable {
347 private:
348 using value_alloc_type =
349 typename std::allocator_traits<Alloc>::template rebind_alloc<Value>;
350
351 public:
352 typedef Key key_type;
353 typedef Value value_type;
354 typedef HashFcn hasher;
355 typedef EqualKey key_equal;
356 typedef Alloc allocator_type;
357
358 typedef typename value_alloc_type::size_type size_type;
359 typedef typename value_alloc_type::difference_type difference_type;
360 typedef typename value_alloc_type::reference reference;
361 typedef typename value_alloc_type::const_reference const_reference;
362 typedef typename value_alloc_type::pointer pointer;
363 typedef typename value_alloc_type::const_pointer const_pointer;
364 typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey,
365 EqualKey, Alloc> iterator;
366
367 typedef sparse_hashtable_const_iterator<
368 Value, Key, HashFcn, ExtractKey, SetKey, EqualKey, Alloc> const_iterator;
369
370 typedef sparse_hashtable_destructive_iterator<Value, Key, HashFcn, ExtractKey,
371 SetKey, EqualKey,
372 Alloc> destructive_iterator;
373
374 // These come from tr1. For us they're the same as regular iterators.
375 typedef iterator local_iterator;
376 typedef const_iterator const_local_iterator;
377
378 // How full we let the table get before we resize, by default.
379 // Knuth says .8 is good -- higher causes us to probe too much,
380 // though it saves memory.
381 static const int HT_OCCUPANCY_PCT; // = 80 (out of 100);
382
383 // How empty we let the table get before we resize lower, by default.
384 // (0.0 means never resize lower.)
385 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
386 static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
387
388 // Minimum size we're willing to let hashtables be.
389 // Must be a power of two, and at least 4.
390 // Note, however, that for a given hashtable, the initial size is a
391 // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
392 static const size_type HT_MIN_BUCKETS = 4;
393
394 // By default, if you don't specify a hashtable size at
395 // construction-time, we use this size. Must be a power of two, and
396 // at least HT_MIN_BUCKETS.
397 static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
398
399 // ITERATOR FUNCTIONS
400 iterator begin() {
401 return iterator(this, table.nonempty_begin(), table.nonempty_end());
402 }
403 iterator end() {
404 return iterator(this, table.nonempty_end(), table.nonempty_end());
405 }
406 const_iterator begin() const {
407 return const_iterator(this, table.nonempty_begin(), table.nonempty_end());
408 }
409 const_iterator end() const {
410 return const_iterator(this, table.nonempty_end(), table.nonempty_end());
411 }
412
413 // These come from tr1 unordered_map. They iterate over 'bucket' n.
414 // For sparsehashtable, we could consider each 'group' to be a bucket,
415 // I guess, but I don't really see the point. We'll just consider
416 // bucket n to be the n-th element of the sparsetable, if it's occupied,
417 // or some empty element, otherwise.
418 local_iterator begin(size_type i) {
419 if (table.test(i))
420 return local_iterator(this, table.get_iter(i), table.nonempty_end());
421 else
422 return local_iterator(this, table.nonempty_end(), table.nonempty_end());
423 }
424 local_iterator end(size_type i) {
425 local_iterator it = begin(i);
426 if (table.test(i) && !test_deleted(i)) ++it;
427 return it;
428 }
429 const_local_iterator begin(size_type i) const {
430 if (table.test(i))
431 return const_local_iterator(this, table.get_iter(i),
432 table.nonempty_end());
433 else
434 return const_local_iterator(this, table.nonempty_end(),
435 table.nonempty_end());
436 }
437 const_local_iterator end(size_type i) const {
438 const_local_iterator it = begin(i);
439 if (table.test(i) && !test_deleted(i)) ++it;
440 return it;
441 }
442
443 // This is used when resizing
444 destructive_iterator destructive_begin() {
445 return destructive_iterator(this, table.destructive_begin(),
446 table.destructive_end());
447 }
448 destructive_iterator destructive_end() {
449 return destructive_iterator(this, table.destructive_end(),
450 table.destructive_end());
451 }
452
453 // ACCESSOR FUNCTIONS for the things we templatize on, basically
454 hasher hash_funct() const { return settings; }
455 key_equal key_eq() const { return key_info; }
456 allocator_type get_allocator() const { return table.get_allocator(); }
457
458 // Accessor function for statistics gathering.
459 int num_table_copies() const { return settings.num_ht_copies(); }
460
461 private:
462 // We need to copy values when we set the special marker for deleted
463 // elements, but, annoyingly, we can't just use the copy assignment
464 // operator because value_type might not be assignable (it's often
465 // pair<const X, Y>). We use explicit destructor invocation and
466 // placement new to get around this. Arg.
467 void set_value(pointer dst, const_reference src) {
468 dst->~value_type(); // delete the old value, if any
469 new (dst) value_type(src);
470 }
471
472 // This is used as a tag for the copy constructor, saying to destroy its
473 // arg We have two ways of destructively copying: with potentially growing
474 // the hashtable as we copy, and without. To make sure the outside world
475 // can't do a destructive copy, we make the typename private.
476 enum MoveDontCopyT { MoveDontCopy, MoveDontGrow };
477
478 // DELETE HELPER FUNCTIONS
479 // This lets the user describe a key that will indicate deleted
480 // table entries. This key should be an "impossible" entry --
481 // if you try to insert it for real, you won't be able to retrieve it!
482 // (NB: while you pass in an entire value, only the key part is looked
483 // at. This is just because I don't know how to assign just a key.)
484 private:
485 void squash_deleted() { // gets rid of any deleted entries we have
486 if (num_deleted) { // get rid of deleted before writing
487 sparse_hashtable tmp(MoveDontGrow, *this);
488 swap(tmp); // now we are tmp
489 }
490 assert(num_deleted == 0);
491 }
492
493 // Test if the given key is the deleted indicator. Requires
494 // num_deleted > 0, for correctness of read(), and because that
495 // guarantees that key_info.delkey is valid.
496 bool test_deleted_key(const key_type& key) const {
497 assert(num_deleted > 0);
498 return equals(key_info.delkey, key);
499 }
500
501 public:
502 void set_deleted_key(const key_type& key) {
503 // It's only safe to change what "deleted" means if we purge deleted
504 // guys
505 squash_deleted();
506 settings.set_use_deleted(true);
507 key_info.delkey = key;
508 }
509 void clear_deleted_key() {
510 squash_deleted();
511 settings.set_use_deleted(false);
512 }
513 key_type deleted_key() const {
514 assert(settings.use_deleted() &&
515 "Must set deleted key before calling deleted_key");
516 return key_info.delkey;
517 }
518
519 // These are public so the iterators can use them
520 // True if the item at position bucknum is "deleted" marker
521 bool test_deleted(size_type bucknum) const {
522 // Invariant: !use_deleted() implies num_deleted is 0.
523 assert(settings.use_deleted() || num_deleted == 0);
524 return num_deleted > 0 && table.test(bucknum) &&
525 test_deleted_key(get_key(table.unsafe_get(bucknum)));
526 }
527 bool test_deleted(const iterator& it) const {
528 // Invariant: !use_deleted() implies num_deleted is 0.
529 assert(settings.use_deleted() || num_deleted == 0);
530 return num_deleted > 0 && test_deleted_key(get_key(*it));
531 }
532 bool test_deleted(const const_iterator& it) const {
533 // Invariant: !use_deleted() implies num_deleted is 0.
534 assert(settings.use_deleted() || num_deleted == 0);
535 return num_deleted > 0 && test_deleted_key(get_key(*it));
536 }
537 bool test_deleted(const destructive_iterator& it) const {
538 // Invariant: !use_deleted() implies num_deleted is 0.
539 assert(settings.use_deleted() || num_deleted == 0);
540 return num_deleted > 0 && test_deleted_key(get_key(*it));
541 }
542
543 private:
544 void check_use_deleted(const char* caller) {
545 (void)caller; // could log it if the assert failed
546 assert(settings.use_deleted());
547 }
548
549 // Set it so test_deleted is true. true if object didn't used to be
550 // deleted.
551 // TODO(csilvers): make these private (also in densehashtable.h)
552 bool set_deleted(iterator& it) {
553 check_use_deleted("set_deleted()");
554 bool retval = !test_deleted(it);
555 // &* converts from iterator to value-type.
556 set_key(&(*it), key_info.delkey);
557 return retval;
558 }
559 // Set it so test_deleted is false. true if object used to be deleted.
560 bool clear_deleted(iterator& it) {
561 check_use_deleted("clear_deleted()");
562 // Happens automatically when we assign something else in its place.
563 return test_deleted(it);
564 }
565
566 // We also allow to set/clear the deleted bit on a const iterator.
567 // We allow a const_iterator for the same reason you can delete a
568 // const pointer: it's convenient, and semantically you can't use
569 // 'it' after it's been deleted anyway, so its const-ness doesn't
570 // really matter.
571 bool set_deleted(const_iterator& it) {
572 check_use_deleted("set_deleted()");
573 bool retval = !test_deleted(it);
574 set_key(const_cast<pointer>(&(*it)), key_info.delkey);
575 return retval;
576 }
577 // Set it so test_deleted is false. true if object used to be deleted.
578 bool clear_deleted(const_iterator& it) {
579 check_use_deleted("clear_deleted()");
580 return test_deleted(it);
581 }
582
583 // FUNCTIONS CONCERNING SIZE
584 public:
585 size_type size() const { return table.num_nonempty() - num_deleted; }
586 size_type max_size() const { return table.max_size(); }
587 bool empty() const { return size() == 0; }
588 size_type bucket_count() const { return table.size(); }
589 size_type max_bucket_count() const { return max_size(); }
590 // These are tr1 methods. Their idea of 'bucket' doesn't map well to
591 // what we do. We just say every bucket has 0 or 1 items in it.
592 size_type bucket_size(size_type i) const {
593 return begin(i) == end(i) ? 0 : 1;
594 }
595
596 private:
597 // Because of the above, size_type(-1) is never legal; use it for errors
598 static const size_type ILLEGAL_BUCKET = size_type(-1);
599
600 // Used after a string of deletes. Returns true if we actually shrunk.
601 // TODO(csilvers): take a delta so we can take into account inserts
602 // done after shrinking. Maybe make part of the Settings class?
603 bool maybe_shrink() {
604 assert(table.num_nonempty() >= num_deleted);
605 assert((bucket_count() & (bucket_count() - 1)) == 0); // is a power of two
606 assert(bucket_count() >= HT_MIN_BUCKETS);
607 bool retval = false;
608
609 // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
610 // we'll never shrink until you get relatively big, and we'll never
611 // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
612 // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
613 // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
614 const size_type num_remain = table.num_nonempty() - num_deleted;
615 const size_type shrink_threshold = settings.shrink_threshold();
616 if (shrink_threshold > 0 && num_remain < shrink_threshold &&
617 bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
618 const float shrink_factor = settings.shrink_factor();
619 size_type sz = bucket_count() / 2; // find how much we should shrink
620 while (sz > HT_DEFAULT_STARTING_BUCKETS &&
621 num_remain < static_cast<size_type>(sz * shrink_factor)) {
622 sz /= 2; // stay a power of 2
623 }
624 sparse_hashtable tmp(MoveDontCopy, *this, sz);
625 swap(tmp); // now we are tmp
626 retval = true;
627 }
628 settings.set_consider_shrink(false); // because we just considered it
629 return retval;
630 }
631
632 // We'll let you resize a hashtable -- though this makes us copy all!
633 // When you resize, you say, "make it big enough for this many more
634 // elements"
635 // Returns true if we actually resized, false if size was already ok.
636 bool resize_delta(size_type delta) {
637 bool did_resize = false;
638 if (settings.consider_shrink()) { // see if lots of deletes happened
639 if (maybe_shrink()) did_resize = true;
640 }
641 if (table.num_nonempty() >=
642 (std::numeric_limits<size_type>::max)() - delta) {
643 throw std::length_error("resize overflow");
644 }
645 if (bucket_count() >= HT_MIN_BUCKETS &&
646 (table.num_nonempty() + delta) <= settings.enlarge_threshold())
647 return did_resize; // we're ok as we are
648
649 // Sometimes, we need to resize just to get rid of all the
650 // "deleted" buckets that are clogging up the hashtable. So when
651 // deciding whether to resize, count the deleted buckets (which
652 // are currently taking up room). But later, when we decide what
653 // size to resize to, *don't* count deleted buckets, since they
654 // get discarded during the resize.
655 const size_type needed_size =
656 settings.min_buckets(table.num_nonempty() + delta, 0);
657 if (needed_size <= bucket_count()) // we have enough buckets
658 return did_resize;
659
660 size_type resize_to = settings.min_buckets(
661 table.num_nonempty() - num_deleted + delta, bucket_count());
662 if (resize_to < needed_size && // may double resize_to
663 resize_to < (std::numeric_limits<size_type>::max)() / 2) {
664 // This situation means that we have enough deleted elements,
665 // that once we purge them, we won't actually have needed to
666 // grow. But we may want to grow anyway: if we just purge one
667 // element, say, we'll have to grow anyway next time we
668 // insert. Might as well grow now, since we're already going
669 // through the trouble of copying (in order to purge the
670 // deleted elements).
671 const size_type target =
672 static_cast<size_type>(settings.shrink_size(resize_to * 2));
673 if (table.num_nonempty() - num_deleted + delta >= target) {
674 // Good, we won't be below the shrink threshhold even if we
675 // double.
676 resize_to *= 2;
677 }
678 }
679
680 sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
681 swap(tmp); // now we are tmp
682 return true;
683 }
684
685 // Used to actually do the rehashing when we grow/shrink a hashtable
686 void copy_from(const sparse_hashtable& ht, size_type min_buckets_wanted) {
687 clear(); // clear table, set num_deleted to 0
688
689 // If we need to change the size of our table, do it now
690 const size_type resize_to =
691 settings.min_buckets(ht.size(), min_buckets_wanted);
692 if (resize_to > bucket_count()) { // we don't have enough buckets
693 table.resize(resize_to); // sets the number of buckets
694 settings.reset_thresholds(bucket_count());
695 }
696
697 // We use a normal iterator to get non-deleted bcks from ht
698 // We could use insert() here, but since we know there are
699 // no duplicates and no deleted items, we can be more efficient
700 assert((bucket_count() & (bucket_count() - 1)) == 0); // a power of two
701 for (const_iterator it = ht.begin(); it != ht.end(); ++it) {
702 size_type num_probes = 0; // how many times we've probed
703 size_type bucknum;
704 const size_type bucket_count_minus_one = bucket_count() - 1;
705 for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
706 table.test(bucknum); // not empty
707 bucknum =
708 (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
709 ++num_probes;
710 assert(num_probes < bucket_count() &&
711 "Hashtable is full: an error in key_equal<> or hash<>");
712 }
713 table.set(bucknum, *it); // copies the value to here
714 }
715 settings.inc_num_ht_copies();
716 }
717
718 // Implementation is like copy_from, but it destroys the table of the
719 // "from" guy by freeing sparsetable memory as we iterate. This is
720 // useful in resizing, since we're throwing away the "from" guy anyway.
721 void move_from(MoveDontCopyT mover, sparse_hashtable& ht,
722 size_type min_buckets_wanted) {
723 clear(); // clear table, set num_deleted to 0
724
725 // If we need to change the size of our table, do it now
726 size_type resize_to;
727 if (mover == MoveDontGrow)
728 resize_to = ht.bucket_count(); // keep same size as old ht
729 else // MoveDontCopy
730 resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
731 if (resize_to > bucket_count()) { // we don't have enough buckets
732 table.resize(resize_to); // sets the number of buckets
733 settings.reset_thresholds(bucket_count());
734 }
735
736 // We use a normal iterator to get non-deleted bcks from ht
737 // We could use insert() here, but since we know there are
738 // no duplicates and no deleted items, we can be more efficient
739 assert((bucket_count() & (bucket_count() - 1)) == 0); // a power of two
740 // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():
741 for (destructive_iterator it = ht.destructive_begin();
742 it != ht.destructive_end(); ++it) {
743 size_type num_probes = 0; // how many times we've probed
744 size_type bucknum;
745 for (bucknum = hash(get_key(*it)) & (bucket_count() - 1); // h % buck_cnt
746 table.test(bucknum); // not empty
747 bucknum =
748 (bucknum + JUMP_(key, num_probes)) & (bucket_count() - 1)) {
749 ++num_probes;
750 assert(num_probes < bucket_count() &&
751 "Hashtable is full: an error in key_equal<> or hash<>");
752 }
753 table.set(bucknum, *it); // copies the value to here
754 }
755 settings.inc_num_ht_copies();
756 }
757
758 // Required by the spec for hashed associative container
759 public:
760 // Though the docs say this should be num_buckets, I think it's much
761 // more useful as num_elements. As a special feature, calling with
762 // req_elements==0 will cause us to shrink if we can, saving space.
763 void resize(size_type req_elements) { // resize to this or larger
764 if (settings.consider_shrink() || req_elements == 0) maybe_shrink();
765 if (req_elements > table.num_nonempty()) // we only grow
766 resize_delta(req_elements - table.num_nonempty());
767 }
768
769 // Get and change the value of shrink_factor and enlarge_factor. The
770 // description at the beginning of this file explains how to choose
771 // the values. Setting the shrink parameter to 0.0 ensures that the
772 // table never shrinks.
773 void get_resizing_parameters(float* shrink, float* grow) const {
774 *shrink = settings.shrink_factor();
775 *grow = settings.enlarge_factor();
776 }
777 void set_resizing_parameters(float shrink, float grow) {
778 settings.set_resizing_parameters(shrink, grow);
779 settings.reset_thresholds(bucket_count());
780 }
781
782 // CONSTRUCTORS -- as required by the specs, we take a size,
783 // but also let you specify a hashfunction, key comparator,
784 // and key extractor. We also define a copy constructor and =.
785 // DESTRUCTOR -- the default is fine, surprisingly.
786 explicit sparse_hashtable(size_type expected_max_items_in_table = 0,
787 const HashFcn& hf = HashFcn(),
788 const EqualKey& eql = EqualKey(),
789 const ExtractKey& ext = ExtractKey(),
790 const SetKey& set = SetKey(),
791 const Alloc& alloc = Alloc())
792 : settings(hf),
793 key_info(ext, set, eql),
794 num_deleted(0),
795 table((expected_max_items_in_table == 0
796 ? HT_DEFAULT_STARTING_BUCKETS
797 : settings.min_buckets(expected_max_items_in_table, 0)),
798 alloc) {
799 settings.reset_thresholds(bucket_count());
800 }
801
802 // As a convenience for resize(), we allow an optional second argument
803 // which lets you make this new hashtable a different size than ht.
804 // We also provide a mechanism of saying you want to "move" the ht argument
805 // into us instead of copying.
806 sparse_hashtable(const sparse_hashtable& ht,
807 size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
808 : settings(ht.settings),
809 key_info(ht.key_info),
810 num_deleted(0),
811 table(0, ht.get_allocator()) {
812 settings.reset_thresholds(bucket_count());
813 copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries
814 }
815 sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht,
816 size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
817 : settings(ht.settings),
818 key_info(ht.key_info),
819 num_deleted(0),
820 table(0, ht.get_allocator()) {
821 settings.reset_thresholds(bucket_count());
822 move_from(mover, ht, min_buckets_wanted); // ignores deleted entries
823 }
824
825 sparse_hashtable& operator=(const sparse_hashtable& ht) {
826 if (&ht == this) return *this; // don't copy onto ourselves
827 settings = ht.settings;
828 key_info = ht.key_info;
829 num_deleted = ht.num_deleted;
830 // copy_from() calls clear and sets num_deleted to 0 too
831 copy_from(ht, HT_MIN_BUCKETS);
832 // we purposefully don't copy the allocator, which may not be copyable
833 return *this;
834 }
835
836 // Many STL algorithms use swap instead of copy constructors
837 void swap(sparse_hashtable& ht) {
838 std::swap(settings, ht.settings);
839 std::swap(key_info, ht.key_info);
840 std::swap(num_deleted, ht.num_deleted);
841 table.swap(ht.table);
842 settings.reset_thresholds(bucket_count()); // also resets consider_shrink
843 ht.settings.reset_thresholds(ht.bucket_count());
844 // we purposefully don't swap the allocator, which may not be swap-able
845 }
846
847 // It's always nice to be able to clear a table without deallocating it
848 void clear() {
849 if (!empty() || (num_deleted != 0)) {
850 table.clear();
851 }
852 settings.reset_thresholds(bucket_count());
853 num_deleted = 0;
854 }
855
856 // LOOKUP ROUTINES
857 private:
858 // Returns a pair of positions: 1st where the object is, 2nd where
859 // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET
860 // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
861 // Note: because of deletions where-to-insert is not trivial: it's the
862 // first deleted bucket we see, as long as we don't find the key later
863 template <typename K>
864 std::pair<size_type, size_type> find_position(const K& key) const {
865 size_type num_probes = 0; // how many times we've probed
866 const size_type bucket_count_minus_one = bucket_count() - 1;
867 size_type bucknum = hash(key) & bucket_count_minus_one;
868 size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
869 SPARSEHASH_STAT_UPDATE(total_lookups += 1);
870 while (1) { // probe until something happens
871 if (!table.test(bucknum)) { // bucket is empty
872 SPARSEHASH_STAT_UPDATE(total_probes += num_probes);
873 if (insert_pos == ILLEGAL_BUCKET) // found no prior place to insert
874 return std::pair<size_type, size_type>(ILLEGAL_BUCKET, bucknum);
875 else
876 return std::pair<size_type, size_type>(ILLEGAL_BUCKET, insert_pos);
877 } else if (test_deleted(bucknum)) { // keep searching, but mark to insert
878 if (insert_pos == ILLEGAL_BUCKET) insert_pos = bucknum;
879 } else if (equals(key, get_key(table.unsafe_get(bucknum)))) {
880 SPARSEHASH_STAT_UPDATE(total_probes += num_probes);
881 return std::pair<size_type, size_type>(bucknum, ILLEGAL_BUCKET);
882 }
883 ++num_probes; // we're doing another probe
884 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
885 assert(num_probes < bucket_count() &&
886 "Hashtable is full: an error in key_equal<> or hash<>");
887 }
888 }
889
890 public:
891 template <typename K>
892 iterator find(const K& key) {
893 if (size() == 0) return end();
894 std::pair<size_type, size_type> pos = find_position(key);
895 if (pos.first == ILLEGAL_BUCKET) // alas, not there
896 return end();
897 else
898 return iterator(this, table.get_iter(pos.first), table.nonempty_end());
899 }
900
901 template <typename K>
902 const_iterator find(const K& key) const {
903 if (size() == 0) return end();
904 std::pair<size_type, size_type> pos = find_position(key);
905 if (pos.first == ILLEGAL_BUCKET) // alas, not there
906 return end();
907 else
908 return const_iterator(this, table.get_iter(pos.first),
909 table.nonempty_end());
910 }
911
912 // This is a tr1 method: the bucket a given key is in, or what bucket
913 // it would be put in, if it were to be inserted. Shrug.
914 size_type bucket(const key_type& key) const {
915 std::pair<size_type, size_type> pos = find_position(key);
916 return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first;
917 }
918
919 // Counts how many elements have key key. For maps, it's either 0 or 1.
920 template <typename K>
921 size_type count(const K& key) const {
922 std::pair<size_type, size_type> pos = find_position(key);
923 return pos.first == ILLEGAL_BUCKET ? 0 : 1;
924 }
925
926 // Likewise, equal_range doesn't really make sense for us. Oh well.
927 template <typename K>
928 std::pair<iterator, iterator> equal_range(const K& key) {
929 iterator pos = find(key); // either an iterator or end
930 if (pos == end()) {
931 return std::pair<iterator, iterator>(pos, pos);
932 } else {
933 const iterator startpos = pos++;
934 return std::pair<iterator, iterator>(startpos, pos);
935 }
936 }
937 template <typename K>
938 std::pair<const_iterator, const_iterator> equal_range(
939 const K& key) const {
940 const_iterator pos = find(key); // either an iterator or end
941 if (pos == end()) {
942 return std::pair<const_iterator, const_iterator>(pos, pos);
943 } else {
944 const const_iterator startpos = pos++;
945 return std::pair<const_iterator, const_iterator>(startpos, pos);
946 }
947 }
948
949 // INSERTION ROUTINES
950 private:
951 // Private method used by insert_noresize and find_or_insert.
952 iterator insert_at(const_reference obj, size_type pos) {
953 if (size() >= max_size()) {
954 throw std::length_error("insert overflow");
955 }
956 if (test_deleted(pos)) { // just replace if it's been deleted
957 // The set() below will undelete this object. We just worry about
958 // stats
959 assert(num_deleted > 0);
960 --num_deleted; // used to be, now it isn't
961 }
962 table.set(pos, obj);
963 return iterator(this, table.get_iter(pos), table.nonempty_end());
964 }
965
966 // If you know *this is big enough to hold obj, use this routine
967 std::pair<iterator, bool> insert_noresize(const_reference obj) {
968 // First, double-check we're not inserting delkey
969 assert(
970 (!settings.use_deleted() || !equals(get_key(obj), key_info.delkey)) &&
971 "Inserting the deleted key");
972 const std::pair<size_type, size_type> pos = find_position(get_key(obj));
973 if (pos.first != ILLEGAL_BUCKET) { // object was already there
974 return std::pair<iterator, bool>(
975 iterator(this, table.get_iter(pos.first), table.nonempty_end()),
976 false); // false: we didn't insert
977 } else { // pos.second says where to put it
978 return std::pair<iterator, bool>(insert_at(obj, pos.second), true);
979 }
980 }
981
982 // Specializations of insert(it, it) depending on the power of the iterator:
983 // (1) Iterator supports operator-, resize before inserting
984 template <class ForwardIterator>
985 void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) {
986 size_t dist = std::distance(f, l);
987 if (dist >= (std::numeric_limits<size_type>::max)()) {
988 throw std::length_error("insert-range overflow");
989 }
990 resize_delta(static_cast<size_type>(dist));
991 for (; dist > 0; --dist, ++f) {
992 insert_noresize(*f);
993 }
994 }
995
996 // (2) Arbitrary iterator, can't tell how much to resize
997 template <class InputIterator>
998 void insert(InputIterator f, InputIterator l, std::input_iterator_tag) {
999 for (; f != l; ++f) insert(*f);
1000 }
1001
1002 public:
1003 // This is the normal insert routine, used by the outside world
1004 std::pair<iterator, bool> insert(const_reference obj) {
1005 resize_delta(1); // adding an object, grow if need be
1006 return insert_noresize(obj);
1007 }
1008
1009 // When inserting a lot at a time, we specialize on the type of iterator
1010 template <class InputIterator>
1011 void insert(InputIterator f, InputIterator l) {
1012 // specializes on iterator type
1013 insert(f, l,
1014 typename std::iterator_traits<InputIterator>::iterator_category());
1015 }
1016
1017 // DefaultValue is a functor that takes a key and returns a value_type
1018 // representing the default value to be inserted if none is found.
1019 template <class DefaultValue>
1020 value_type& find_or_insert(const key_type& key) {
1021 // First, double-check we're not inserting delkey
1022 assert((!settings.use_deleted() || !equals(key, key_info.delkey)) &&
1023 "Inserting the deleted key");
1024 const std::pair<size_type, size_type> pos = find_position(key);
1025 DefaultValue default_value;
1026 if (pos.first != ILLEGAL_BUCKET) { // object was already there
1027 return *table.get_iter(pos.first);
1028 } else if (resize_delta(1)) { // needed to rehash to make room
1029 // Since we resized, we can't use pos, so recalculate where to
1030 // insert.
1031 return *insert_noresize(default_value(key)).first;
1032 } else { // no need to rehash, insert right here
1033 return *insert_at(default_value(key), pos.second);
1034 }
1035 }
1036
1037 // DELETION ROUTINES
1038 size_type erase(const key_type& key) {
1039 // First, double-check we're not erasing delkey.
1040 assert((!settings.use_deleted() || !equals(key, key_info.delkey)) &&
1041 "Erasing the deleted key");
1042 assert(!settings.use_deleted() || !equals(key, key_info.delkey));
1043 const_iterator pos = find(key); // shrug: shouldn't need to be const
1044 if (pos != end()) {
1045 assert(!test_deleted(pos)); // or find() shouldn't have returned it
1046 set_deleted(pos);
1047 ++num_deleted;
1048 // will think about shrink after next insert
1049 settings.set_consider_shrink(true);
1050 return 1; // because we deleted one thing
1051 } else {
1052 return 0; // because we deleted nothing
1053 }
1054 }
1055
1056 // We return the iterator past the deleted item.
1057 void erase(iterator pos) {
1058 if (pos == end()) return; // sanity check
1059 if (set_deleted(pos)) { // true if object has been newly deleted
1060 ++num_deleted;
1061 // will think about shrink after next insert
1062 settings.set_consider_shrink(true);
1063 }
1064 }
1065
1066 void erase(iterator f, iterator l) {
1067 for (; f != l; ++f) {
1068 if (set_deleted(f)) // should always be true
1069 ++num_deleted;
1070 }
1071 // will think about shrink after next insert
1072 settings.set_consider_shrink(true);
1073 }
1074
1075 // We allow you to erase a const_iterator just like we allow you to
1076 // erase an iterator. This is in parallel to 'delete': you can delete
1077 // a const pointer just like a non-const pointer. The logic is that
1078 // you can't use the object after it's erased anyway, so it doesn't matter
1079 // if it's const or not.
1080 void erase(const_iterator pos) {
1081 if (pos == end()) return; // sanity check
1082 if (set_deleted(pos)) { // true if object has been newly deleted
1083 ++num_deleted;
1084 // will think about shrink after next insert
1085 settings.set_consider_shrink(true);
1086 }
1087 }
1088 void erase(const_iterator f, const_iterator l) {
1089 for (; f != l; ++f) {
1090 if (set_deleted(f)) // should always be true
1091 ++num_deleted;
1092 }
1093 // will think about shrink after next insert
1094 settings.set_consider_shrink(true);
1095 }
1096
1097 // COMPARISON
1098 bool operator==(const sparse_hashtable& ht) const {
1099 if (size() != ht.size()) {
1100 return false;
1101 } else if (this == &ht) {
1102 return true;
1103 } else {
1104 // Iterate through the elements in "this" and see if the
1105 // corresponding element is in ht
1106 for (const_iterator it = begin(); it != end(); ++it) {
1107 const_iterator it2 = ht.find(get_key(*it));
1108 if ((it2 == ht.end()) || (*it != *it2)) {
1109 return false;
1110 }
1111 }
1112 return true;
1113 }
1114 }
1115 bool operator!=(const sparse_hashtable& ht) const { return !(*this == ht); }
1116
1117 // I/O
1118 // We support reading and writing hashtables to disk. NOTE that
1119 // this only stores the hashtable metadata, not the stuff you've
1120 // actually put in the hashtable! Alas, since I don't know how to
1121 // write a hasher or key_equal, you have to make sure everything
1122 // but the table is the same. We compact before writing.
1123 //
1124 // The OUTPUT type needs to support a Write() operation. File and
1125 // OutputBuffer are appropriate types to pass in.
1126 //
1127 // The INPUT type needs to support a Read() operation. File and
1128 // InputBuffer are appropriate types to pass in.
1129 template <typename OUTPUT>
1130 bool write_metadata(OUTPUT* fp) {
1131 squash_deleted(); // so we don't have to worry about delkey
1132 return table.write_metadata(fp);
1133 }
1134
1135 template <typename INPUT>
1136 bool read_metadata(INPUT* fp) {
1137 num_deleted = 0; // since we got rid before writing
1138 const bool result = table.read_metadata(fp);
1139 settings.reset_thresholds(bucket_count());
1140 return result;
1141 }
1142
1143 // Only meaningful if value_type is a POD.
1144 template <typename OUTPUT>
1145 bool write_nopointer_data(OUTPUT* fp) {
1146 return table.write_nopointer_data(fp);
1147 }
1148
1149 // Only meaningful if value_type is a POD.
1150 template <typename INPUT>
1151 bool read_nopointer_data(INPUT* fp) {
1152 return table.read_nopointer_data(fp);
1153 }
1154
1155 // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
1156 // (istream, ostream, etc) *or* a class providing
1157 // Read(void*, size_t) and Write(const void*, size_t)
1158 // (respectively), which writes a buffer into a stream
1159 // (which the INPUT/OUTPUT instance presumably owns).
1160
1161 typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
1162
1163 // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&)
1164 template <typename ValueSerializer, typename OUTPUT>
1165 bool serialize(ValueSerializer serializer, OUTPUT* fp) {
1166 squash_deleted(); // so we don't have to worry about delkey
1167 return table.serialize(serializer, fp);
1168 }
1169
1170 // ValueSerializer: a functor. operator()(INPUT*, value_type*)
1171 template <typename ValueSerializer, typename INPUT>
1172 bool unserialize(ValueSerializer serializer, INPUT* fp) {
1173 num_deleted = 0; // since we got rid before writing
1174 const bool result = table.unserialize(serializer, fp);
1175 settings.reset_thresholds(bucket_count());
1176 return result;
1177 }
1178
1179 private:
1180 // Table is the main storage class.
1181 typedef sparsetable<value_type, DEFAULT_GROUP_SIZE, value_alloc_type> Table;
1182
1183 // Package templated functors with the other types to eliminate memory
1184 // needed for storing these zero-size operators. Since ExtractKey and
1185 // hasher's operator() might have the same function signature, they
1186 // must be packaged in different classes.
1187 struct Settings
1188 : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
1189 HT_MIN_BUCKETS> {
1190 explicit Settings(const hasher& hf)
1191 : sparsehash_internal::sh_hashtable_settings<key_type, hasher,
1192 size_type, HT_MIN_BUCKETS>(
1193 hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
1194 };
1195
1196 // KeyInfo stores delete key and packages zero-size functors:
1197 // ExtractKey and SetKey.
1198 class KeyInfo : public ExtractKey, public SetKey, public EqualKey {
1199 public:
1200 KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
1201 : ExtractKey(ek), SetKey(sk), EqualKey(eq) {}
1202 // We want to return the exact same type as ExtractKey: Key or const
1203 // Key&
1204 typename ExtractKey::result_type get_key(const_reference v) const {
1205 return ExtractKey::operator()(v);
1206 }
1207 void set_key(pointer v, const key_type& k) const {
1208 SetKey::operator()(v, k);
1209 }
1210 template <typename K1, typename K2>
1211 bool equals(const K1& a, const K2& b) const {
1212 return EqualKey::operator()(a, b);
1213 }
1214
1215 // Which key marks deleted entries.
1216 // TODO(csilvers): make a pointer, and get rid of use_deleted
1217 // (benchmark!)
1218 typename std::remove_const<key_type>::type delkey;
1219 };
1220
1221 // Utility functions to access the templated operators
1222 template <typename K>
1223 size_type hash(const K& v) const { return settings.hash(v); }
1224 template <typename K1, typename K2>
1225 bool equals(const K1& a, const K2& b) const {
1226 return key_info.equals(a, b);
1227 }
1228 typename ExtractKey::result_type get_key(const_reference v) const {
1229 return key_info.get_key(v);
1230 }
1231 void set_key(pointer v, const key_type& k) const { key_info.set_key(v, k); }
1232
1233 private:
1234 // Actual data
1235 Settings settings;
1236 KeyInfo key_info;
1237 size_type num_deleted; // how many occupied buckets are marked deleted
1238 Table table; // holds num_buckets and num_elements too
1239};
1240
1241// We need a global swap as well
1242template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1243inline void swap(sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>& x,
1244 sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>& y) {
1245 x.swap(y);
1246}
1247
1248#undef JUMP_
1249
1250template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1251const typename sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::size_type
1252 sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::ILLEGAL_BUCKET;
1253
1254// How full we let the table get before we resize. Knuth says .8 is
1255// good -- higher causes us to probe too much, though saves memory
1256template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1257const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = 80;
1258
1259// How empty we let the table get before we resize lower.
1260// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
1261template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1262const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT =
1263 static_cast<int>(
1264 0.4 * sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT);
1265}
1266