1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31//
32// A dense hashtable is a particular implementation of
33// a hashtable: one that is meant to minimize memory allocation.
34// It does this by using an array to store all the data. We
35// steal a value from the key space to indicate "empty" array
36// elements (ie indices where no item lives) and another to indicate
37// "deleted" elements.
38//
39// (Note it is possible to change the value of the delete key
40// on the fly; you can even remove it, though after that point
41// the hashtable is insert_only until you set it again. The empty
42// value however can't be changed.)
43//
44// To minimize allocation and pointer overhead, we use internal
45// probing, in which the hashtable is a single table, and collisions
46// are resolved by trying to insert again in another bucket. The
47// most cache-efficient internal probing schemes are linear probing
48// (which suffers, alas, from clumping) and quadratic probing, which
49// is what we implement by default.
50//
51// Type requirements: value_type is required to be Copy Constructible
52// and Default Constructible. It is not required to be (and commonly
53// isn't) Assignable.
54//
55// You probably shouldn't use this code directly. Use dense_hash_map<>
56// or dense_hash_set<> instead.
57
58// You can change the following below:
59// HT_OCCUPANCY_PCT -- how full before we double size
60// HT_EMPTY_PCT -- how empty before we halve size
61// HT_MIN_BUCKETS -- default smallest bucket size
62//
63// You can also change enlarge_factor (which defaults to
64// HT_OCCUPANCY_PCT), and shrink_factor (which defaults to
65// HT_EMPTY_PCT) with set_resizing_parameters().
66//
67// How to decide what values to use?
68// shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good.
69// HT_MIN_BUCKETS is probably unnecessary since you can specify
70// (indirectly) the starting number of buckets at construct-time.
71// For enlarge_factor, you can use this chart to try to trade-off
72// expected lookup time to the space taken up. By default, this
73// code uses quadratic probing, though you can change it to linear
74// via JUMP_ below if you really want to.
75//
76// From
77// http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
78// NUMBER OF PROBES / LOOKUP Successful Unsuccessful
79// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
80// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
81//
82// -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
83// QUADRATIC COLLISION RES.
84// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
85// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
86// LINEAR COLLISION RES.
87// probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5
88// probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0
89
90#pragma once
91
92#include <assert.h>
93#include <stdio.h> // for FILE, fwrite, fread
94#include <algorithm> // For swap(), eg
95#include <iterator> // For iterator tags
96#include <limits> // for numeric_limits
97#include <memory> // For uninitialized_fill
98#include <utility> // for pair
99#include <stdexcept> // For length_error
100#include <type_traits>
101#include <sparsehash/internal/hashtable-common.h>
102#include <sparsehash/internal/libc_allocator_with_realloc.h>
103
104namespace google {
105
106// The probing method
107// Linear probing
108// #define JUMP_(key, num_probes) ( 1 )
109// Quadratic probing
110#define JUMP_(key, num_probes) (num_probes)
111
112// Hashtable class, used to implement the hashed associative containers
113// hash_set and hash_map.
114
115// Value: what is stored in the table (each bucket is a Value).
116// Key: something in a 1-to-1 correspondence to a Value, that can be used
117// to search for a Value in the table (find() takes a Key).
118// HashFcn: Takes a Key and returns an integer, the more unique the better.
119// ExtractKey: given a Value, returns the unique Key associated with it.
120// Must inherit from unary_function, or at least have a
121// result_type enum indicating the return type of operator().
122// SetKey: given a Value* and a Key, modifies the value such that
123// ExtractKey(value) == key. We guarantee this is only called
124// with key == deleted_key or key == empty_key.
125// EqualKey: Given two Keys, says whether they are the same (that is,
126// if they are both associated with the same Value).
127// Alloc: STL allocator to use to allocate memory.
128
129template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey,
130 class EqualKey, class Alloc>
131class dense_hashtable;
132
133template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
134struct dense_hashtable_iterator;
135
136template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
137struct dense_hashtable_const_iterator;
138
139// We're just an array, but we need to skip over empty and deleted elements
140template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
141struct dense_hashtable_iterator {
142 private:
143 using value_alloc_type =
144 typename std::allocator_traits<A>::template rebind_alloc<V>;
145
146 public:
147 typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
148 typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
149 const_iterator;
150
151 typedef std::forward_iterator_tag iterator_category; // very little defined!
152 typedef V value_type;
153 typedef typename value_alloc_type::difference_type difference_type;
154 typedef typename value_alloc_type::size_type size_type;
155 typedef typename value_alloc_type::reference reference;
156 typedef typename value_alloc_type::pointer pointer;
157
158 // "Real" constructor and default constructor
159 dense_hashtable_iterator(
160 const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, pointer it,
161 pointer it_end, bool advance)
162 : ht(h), pos(it), end(it_end) {
163 if (advance) advance_past_empty_and_deleted();
164 }
165 dense_hashtable_iterator() {}
166 // The default destructor is fine; we don't define one
167 // The default operator= is fine; we don't define one
168
169 // Happy dereferencer
170 reference operator*() const { return *pos; }
171 pointer operator->() const { return &(operator*()); }
172
173 // Arithmetic. The only hard part is making sure that
174 // we're not on an empty or marked-deleted array element
175 void advance_past_empty_and_deleted() {
176 while (pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)))
177 ++pos;
178 }
179 iterator& operator++() {
180 assert(pos != end);
181 ++pos;
182 advance_past_empty_and_deleted();
183 return *this;
184 }
185 iterator operator++(int) {
186 iterator tmp(*this);
187 ++*this;
188 return tmp;
189 }
190
191 // Comparison.
192 bool operator==(const iterator& it) const { return pos == it.pos; }
193 bool operator!=(const iterator& it) const { return pos != it.pos; }
194
195 // The actual data
196 const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht;
197 pointer pos, end;
198};
199
200// Now do it all again, but with const-ness!
201template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
202struct dense_hashtable_const_iterator {
203 private:
204 using value_alloc_type =
205 typename std::allocator_traits<A>::template rebind_alloc<V>;
206
207 public:
208 typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
209 typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
210 const_iterator;
211
212 typedef std::forward_iterator_tag iterator_category; // very little defined!
213 typedef V value_type;
214 typedef typename value_alloc_type::difference_type difference_type;
215 typedef typename value_alloc_type::size_type size_type;
216 typedef typename value_alloc_type::const_reference reference;
217 typedef typename value_alloc_type::const_pointer pointer;
218
219 // "Real" constructor and default constructor
220 dense_hashtable_const_iterator(
221 const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, pointer it,
222 pointer it_end, bool advance)
223 : ht(h), pos(it), end(it_end) {
224 if (advance) advance_past_empty_and_deleted();
225 }
226 dense_hashtable_const_iterator() : ht(NULL), pos(pointer()), end(pointer()) {}
227 // This lets us convert regular iterators to const iterators
228 dense_hashtable_const_iterator(const iterator& it)
229 : ht(it.ht), pos(it.pos), end(it.end) {}
230 // The default destructor is fine; we don't define one
231 // The default operator= is fine; we don't define one
232
233 // Happy dereferencer
234 reference operator*() const { return *pos; }
235 pointer operator->() const { return &(operator*()); }
236
237 // Arithmetic. The only hard part is making sure that
238 // we're not on an empty or marked-deleted array element
239 void advance_past_empty_and_deleted() {
240 while (pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)))
241 ++pos;
242 }
243 const_iterator& operator++() {
244 assert(pos != end);
245 ++pos;
246 advance_past_empty_and_deleted();
247 return *this;
248 }
249 const_iterator operator++(int) {
250 const_iterator tmp(*this);
251 ++*this;
252 return tmp;
253 }
254
255 // Comparison.
256 bool operator==(const const_iterator& it) const { return pos == it.pos; }
257 bool operator!=(const const_iterator& it) const { return pos != it.pos; }
258
259 // The actual data
260 const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht;
261 pointer pos, end;
262};
263
264template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey,
265 class EqualKey, class Alloc>
266class dense_hashtable {
267 private:
268 using value_alloc_type =
269 typename std::allocator_traits<Alloc>::template rebind_alloc<Value>;
270
271 public:
272 typedef Key key_type;
273 typedef Value value_type;
274 typedef HashFcn hasher;
275 typedef EqualKey key_equal;
276 typedef Alloc allocator_type;
277
278 typedef typename value_alloc_type::size_type size_type;
279 typedef typename value_alloc_type::difference_type difference_type;
280 typedef typename value_alloc_type::reference reference;
281 typedef typename value_alloc_type::const_reference const_reference;
282 typedef typename value_alloc_type::pointer pointer;
283 typedef typename value_alloc_type::const_pointer const_pointer;
284 typedef dense_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey,
285 EqualKey, Alloc> iterator;
286
287 typedef dense_hashtable_const_iterator<
288 Value, Key, HashFcn, ExtractKey, SetKey, EqualKey, Alloc> const_iterator;
289
290 // These come from tr1. For us they're the same as regular iterators.
291 typedef iterator local_iterator;
292 typedef const_iterator const_local_iterator;
293
294 // How full we let the table get before we resize, by default.
295 // Knuth says .8 is good -- higher causes us to probe too much,
296 // though it saves memory.
297 static const int HT_OCCUPANCY_PCT; // defined at the bottom of this file
298
299 // How empty we let the table get before we resize lower, by default.
300 // (0.0 means never resize lower.)
301 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
302 static const int HT_EMPTY_PCT; // defined at the bottom of this file
303
304 // Minimum size we're willing to let hashtables be.
305 // Must be a power of two, and at least 4.
306 // Note, however, that for a given hashtable, the initial size is a
307 // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
308 static const size_type HT_MIN_BUCKETS = 4;
309
310 // By default, if you don't specify a hashtable size at
311 // construction-time, we use this size. Must be a power of two, and
312 // at least HT_MIN_BUCKETS.
313 static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
314
315 // ITERATOR FUNCTIONS
316 iterator begin() { return iterator(this, table, table + num_buckets, true); }
317 iterator end() {
318 return iterator(this, table + num_buckets, table + num_buckets, true);
319 }
320 const_iterator begin() const {
321 return const_iterator(this, table, table + num_buckets, true);
322 }
323 const_iterator end() const {
324 return const_iterator(this, table + num_buckets, table + num_buckets, true);
325 }
326
327 // These come from tr1 unordered_map. They iterate over 'bucket' n.
328 // We'll just consider bucket n to be the n-th element of the table.
329 local_iterator begin(size_type i) {
330 return local_iterator(this, table + i, table + i + 1, false);
331 }
332 local_iterator end(size_type i) {
333 local_iterator it = begin(i);
334 if (!test_empty(i) && !test_deleted(i)) ++it;
335 return it;
336 }
337 const_local_iterator begin(size_type i) const {
338 return const_local_iterator(this, table + i, table + i + 1, false);
339 }
340 const_local_iterator end(size_type i) const {
341 const_local_iterator it = begin(i);
342 if (!test_empty(i) && !test_deleted(i)) ++it;
343 return it;
344 }
345
346 // ACCESSOR FUNCTIONS for the things we templatize on, basically
347 hasher hash_funct() const { return settings; }
348 key_equal key_eq() const { return key_info; }
349 allocator_type get_allocator() const { return allocator_type(val_info); }
350
351 // Accessor function for statistics gathering.
352 int num_table_copies() const { return settings.num_ht_copies(); }
353
354 private:
355 // Annoyingly, we can't copy values around, because they might have
356 // const components (they're probably pair<const X, Y>). We use
357 // explicit destructor invocation and placement new to get around
358 // this. Arg.
359 template <typename... Args>
360 void set_value(pointer dst, Args&&... args) {
361 dst->~value_type(); // delete the old value, if any
362 new (dst) value_type(std::forward<Args>(args)...);
363 }
364
365 void destroy_buckets(size_type first, size_type last) {
366 for (; first != last; ++first) table[first].~value_type();
367 }
368
369 // DELETE HELPER FUNCTIONS
370 // This lets the user describe a key that will indicate deleted
371 // table entries. This key should be an "impossible" entry --
372 // if you try to insert it for real, you won't be able to retrieve it!
373 // (NB: while you pass in an entire value, only the key part is looked
374 // at. This is just because I don't know how to assign just a key.)
375 private:
376 void squash_deleted() { // gets rid of any deleted entries we have
377 if (num_deleted) { // get rid of deleted before writing
378 size_type resize_to = settings.min_buckets(
379 num_elements, bucket_count());
380 dense_hashtable tmp(std::move(*this), resize_to); // copying will get rid of deleted
381 swap(tmp); // now we are tmp
382 }
383 assert(num_deleted == 0);
384 }
385
386 // Test if the given key is the deleted indicator. Requires
387 // num_deleted > 0, for correctness of read(), and because that
388 // guarantees that key_info.delkey is valid.
389 bool test_deleted_key(const key_type& key) const {
390 assert(num_deleted > 0);
391 return equals(key_info.delkey, key);
392 }
393
394 public:
395 void set_deleted_key(const key_type& key) {
396 // the empty indicator (if specified) and the deleted indicator
397 // must be different
398 assert(
399 (!settings.use_empty() || !equals(key, key_info.empty_key)) &&
400 "Passed the empty-key to set_deleted_key");
401 // It's only safe to change what "deleted" means if we purge deleted guys
402 squash_deleted();
403 settings.set_use_deleted(true);
404 key_info.delkey = key;
405 }
406 void clear_deleted_key() {
407 squash_deleted();
408 settings.set_use_deleted(false);
409 }
410 key_type deleted_key() const {
411 assert(settings.use_deleted() &&
412 "Must set deleted key before calling deleted_key");
413 return key_info.delkey;
414 }
415
416 // These are public so the iterators can use them
417 // True if the item at position bucknum is "deleted" marker
418 bool test_deleted(size_type bucknum) const {
419 // Invariant: !use_deleted() implies num_deleted is 0.
420 assert(settings.use_deleted() || num_deleted == 0);
421 return num_deleted > 0 && test_deleted_key(get_key(table[bucknum]));
422 }
423 bool test_deleted(const iterator& it) const {
424 // Invariant: !use_deleted() implies num_deleted is 0.
425 assert(settings.use_deleted() || num_deleted == 0);
426 return num_deleted > 0 && test_deleted_key(get_key(*it));
427 }
428 bool test_deleted(const const_iterator& it) const {
429 // Invariant: !use_deleted() implies num_deleted is 0.
430 assert(settings.use_deleted() || num_deleted == 0);
431 return num_deleted > 0 && test_deleted_key(get_key(*it));
432 }
433
434 private:
435 void check_use_deleted(const char* caller) {
436 (void)caller; // could log it if the assert failed
437 assert(settings.use_deleted());
438 }
439
440 // Set it so test_deleted is true. true if object didn't used to be deleted.
441 bool set_deleted(iterator& it) {
442 check_use_deleted("set_deleted()");
443 bool retval = !test_deleted(it);
444 // &* converts from iterator to value-type.
445 set_key(&(*it), key_info.delkey);
446 return retval;
447 }
448 // Set it so test_deleted is false. true if object used to be deleted.
449 bool clear_deleted(iterator& it) {
450 check_use_deleted("clear_deleted()");
451 // Happens automatically when we assign something else in its place.
452 return test_deleted(it);
453 }
454
455 // We also allow to set/clear the deleted bit on a const iterator.
456 // We allow a const_iterator for the same reason you can delete a
457 // const pointer: it's convenient, and semantically you can't use
458 // 'it' after it's been deleted anyway, so its const-ness doesn't
459 // really matter.
460 bool set_deleted(const_iterator& it) {
461 check_use_deleted("set_deleted()");
462 bool retval = !test_deleted(it);
463 set_key(const_cast<pointer>(&(*it)), key_info.delkey);
464 return retval;
465 }
466 // Set it so test_deleted is false. true if object used to be deleted.
467 bool clear_deleted(const_iterator& it) {
468 check_use_deleted("clear_deleted()");
469 return test_deleted(it);
470 }
471
472 // EMPTY HELPER FUNCTIONS
473 // This lets the user describe a key that will indicate empty (unused)
474 // table entries. This key should be an "impossible" entry --
475 // if you try to insert it for real, you won't be able to retrieve it!
476 // (NB: while you pass in an entire value, only the key part is looked
477 // at. This is just because I don't know how to assign just a key.)
478 public:
479 // These are public so the iterators can use them
480 // True if the item at position bucknum is "empty" marker
481 bool test_empty(size_type bucknum) const {
482 assert(settings.use_empty()); // we always need to know what's empty!
483 return equals(key_info.empty_key, get_key(table[bucknum]));
484 }
485 bool test_empty(const iterator& it) const {
486 assert(settings.use_empty()); // we always need to know what's empty!
487 return equals(key_info.empty_key, get_key(*it));
488 }
489 bool test_empty(const const_iterator& it) const {
490 assert(settings.use_empty()); // we always need to know what's empty!
491 return equals(key_info.empty_key, get_key(*it));
492 }
493
494 private:
495 void fill_range_with_empty(pointer table_start, size_type count) {
496 for (size_type i = 0; i < count; ++i)
497 {
498 construct_key(&table_start[i], key_info.empty_key);
499 }
500 }
501
502 public:
503 void set_empty_key(const key_type& key) {
504 // Once you set the empty key, you can't change it
505 assert(!settings.use_empty() && "Calling set_empty_key multiple times");
506 // The deleted indicator (if specified) and the empty indicator
507 // must be different.
508 assert(
509 (!settings.use_deleted() || !equals(key, key_info.delkey)) &&
510 "Setting the empty key the same as the deleted key");
511 settings.set_use_empty(true);
512 key_info.empty_key = key;
513
514 assert(!table); // must set before first use
515 // num_buckets was set in constructor even though table was NULL
516 table = val_info.allocate(num_buckets);
517 assert(table);
518 fill_range_with_empty(table, num_buckets);
519 }
520 key_type empty_key() const {
521 assert(settings.use_empty());
522 return key_info.empty_key;
523 }
524
525 // FUNCTIONS CONCERNING SIZE
526 public:
527 size_type size() const { return num_elements - num_deleted; }
528 size_type max_size() const { return val_info.max_size(); }
529 bool empty() const { return size() == 0; }
530 size_type bucket_count() const { return num_buckets; }
531 size_type max_bucket_count() const { return max_size(); }
532 size_type nonempty_bucket_count() const { return num_elements; }
533 // These are tr1 methods. Their idea of 'bucket' doesn't map well to
534 // what we do. We just say every bucket has 0 or 1 items in it.
535 size_type bucket_size(size_type i) const {
536 return begin(i) == end(i) ? 0 : 1;
537 }
538
539 private:
540 // Because of the above, size_type(-1) is never legal; use it for errors
541 static const size_type ILLEGAL_BUCKET = size_type(-1);
542
543 // Used after a string of deletes. Returns true if we actually shrunk.
544 // TODO(csilvers): take a delta so we can take into account inserts
545 // done after shrinking. Maybe make part of the Settings class?
546 bool maybe_shrink() {
547 assert(num_elements >= num_deleted);
548 assert((bucket_count() & (bucket_count() - 1)) == 0); // is a power of two
549 assert(bucket_count() >= HT_MIN_BUCKETS);
550 bool retval = false;
551
552 // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
553 // we'll never shrink until you get relatively big, and we'll never
554 // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
555 // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
556 // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
557 const size_type num_remain = num_elements - num_deleted;
558 const size_type shrink_threshold = settings.shrink_threshold();
559 if (shrink_threshold > 0 && num_remain < shrink_threshold &&
560 bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
561 const float shrink_factor = settings.shrink_factor();
562 size_type sz = bucket_count() / 2; // find how much we should shrink
563 while (sz > HT_DEFAULT_STARTING_BUCKETS &&
564 num_remain < sz * shrink_factor) {
565 sz /= 2; // stay a power of 2
566 }
567 dense_hashtable tmp(std::move(*this), sz); // Do the actual resizing
568 swap(tmp); // now we are tmp
569 retval = true;
570 }
571 settings.set_consider_shrink(false); // because we just considered it
572 return retval;
573 }
574
575 // We'll let you resize a hashtable -- though this makes us copy all!
576 // When you resize, you say, "make it big enough for this many more elements"
577 // Returns true if we actually resized, false if size was already ok.
578 bool resize_delta(size_type delta) {
579 bool did_resize = false;
580 if (settings.consider_shrink()) { // see if lots of deletes happened
581 if (maybe_shrink()) did_resize = true;
582 }
583 if (num_elements >= (std::numeric_limits<size_type>::max)() - delta) {
584 throw std::length_error("resize overflow");
585 }
586 if (bucket_count() >= HT_MIN_BUCKETS &&
587 (num_elements + delta) <= settings.enlarge_threshold())
588 return did_resize; // we're ok as we are
589
590 // Sometimes, we need to resize just to get rid of all the
591 // "deleted" buckets that are clogging up the hashtable. So when
592 // deciding whether to resize, count the deleted buckets (which
593 // are currently taking up room). But later, when we decide what
594 // size to resize to, *don't* count deleted buckets, since they
595 // get discarded during the resize.
596 size_type needed_size = settings.min_buckets(num_elements + delta, 0);
597 if (needed_size <= bucket_count()) // we have enough buckets
598 return did_resize;
599
600 size_type resize_to = settings.min_buckets(
601 num_elements - num_deleted + delta, bucket_count());
602
603 // When num_deleted is large, we may still grow but we do not want to
604 // over expand. So we reduce needed_size by a portion of num_deleted
605 // (the exact portion does not matter). This is especially helpful
606 // when min_load_factor is zero (no shrink at all) to avoid doubling
607 // the bucket count to infinity. See also test ResizeWithoutShrink.
608 needed_size = settings.min_buckets(num_elements - num_deleted / 4 + delta, 0);
609
610 if (resize_to < needed_size && // may double resize_to
611 resize_to < (std::numeric_limits<size_type>::max)() / 2) {
612 // This situation means that we have enough deleted elements,
613 // that once we purge them, we won't actually have needed to
614 // grow. But we may want to grow anyway: if we just purge one
615 // element, say, we'll have to grow anyway next time we
616 // insert. Might as well grow now, since we're already going
617 // through the trouble of copying (in order to purge the
618 // deleted elements).
619 const size_type target =
620 static_cast<size_type>(settings.shrink_size(resize_to * 2));
621 if (num_elements - num_deleted + delta >= target) {
622 // Good, we won't be below the shrink threshhold even if we double.
623 resize_to *= 2;
624 }
625 }
626 dense_hashtable tmp(std::move(*this), resize_to);
627 swap(tmp); // now we are tmp
628 return true;
629 }
630
631 // We require table be not-NULL and empty before calling this.
632 void resize_table(size_type /*old_size*/, size_type new_size,
633 std::true_type) {
634 table = val_info.realloc_or_die(table, new_size);
635 }
636
637 void resize_table(size_type old_size, size_type new_size, std::false_type) {
638 val_info.deallocate(table, old_size);
639 table = val_info.allocate(new_size);
640 }
641
642 // Used to actually do the rehashing when we grow/shrink a hashtable
643 template <typename Hashtable>
644 void copy_or_move_from(Hashtable&& ht, size_type min_buckets_wanted) {
645 clear_to_size(settings.min_buckets(ht.size(), min_buckets_wanted));
646
647 // We use a normal iterator to get non-deleted bcks from ht
648 // We could use insert() here, but since we know there are
649 // no duplicates and no deleted items, we can be more efficient
650 assert((bucket_count() & (bucket_count() - 1)) == 0); // a power of two
651 for (auto&& value : ht) {
652 size_type num_probes = 0; // how many times we've probed
653 size_type bucknum;
654 const size_type bucket_count_minus_one = bucket_count() - 1;
655 for (bucknum = hash(get_key(value)) & bucket_count_minus_one;
656 !test_empty(bucknum); // not empty
657 bucknum =
658 (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
659 ++num_probes;
660 assert(num_probes < bucket_count() &&
661 "Hashtable is full: an error in key_equal<> or hash<>");
662 }
663
664 using will_move = std::is_rvalue_reference<Hashtable&&>;
665 using value_t = typename std::conditional<will_move::value, value_type&&, const_reference>::type;
666
667 set_value(&table[bucknum], std::forward<value_t>(value));
668 num_elements++;
669 }
670 settings.inc_num_ht_copies();
671 }
672
673 // Required by the spec for hashed associative container
674 public:
675 // Though the docs say this should be num_buckets, I think it's much
676 // more useful as num_elements. As a special feature, calling with
677 // req_elements==0 will cause us to shrink if we can, saving space.
678 void resize(size_type req_elements) { // resize to this or larger
679 if (settings.consider_shrink() || req_elements == 0) maybe_shrink();
680 if (req_elements > num_elements) resize_delta(req_elements - num_elements);
681 }
682
683 // Get and change the value of shrink_factor and enlarge_factor. The
684 // description at the beginning of this file explains how to choose
685 // the values. Setting the shrink parameter to 0.0 ensures that the
686 // table never shrinks.
687 void get_resizing_parameters(float* shrink, float* grow) const {
688 *shrink = settings.shrink_factor();
689 *grow = settings.enlarge_factor();
690 }
691 void set_resizing_parameters(float shrink, float grow) {
692 settings.set_resizing_parameters(shrink, grow);
693 settings.reset_thresholds(bucket_count());
694 }
695
696 // CONSTRUCTORS -- as required by the specs, we take a size,
697 // but also let you specify a hashfunction, key comparator,
698 // and key extractor. We also define a copy constructor and =.
699 // DESTRUCTOR -- needs to free the table
700 explicit dense_hashtable(size_type expected_max_items_in_table = 0,
701 const HashFcn& hf = HashFcn(),
702 const EqualKey& eql = EqualKey(),
703 const ExtractKey& ext = ExtractKey(),
704 const SetKey& set = SetKey(),
705 const Alloc& alloc = Alloc())
706 : settings(hf),
707 key_info(ext, set, eql),
708 num_deleted(0),
709 num_elements(0),
710 num_buckets(expected_max_items_in_table == 0
711 ? HT_DEFAULT_STARTING_BUCKETS
712 : settings.min_buckets(expected_max_items_in_table, 0)),
713 val_info(alloc_impl<value_alloc_type>(alloc)),
714 table(NULL) {
715 // table is NULL until emptyval is set. However, we set num_buckets
716 // here so we know how much space to allocate once emptyval is set
717 settings.reset_thresholds(bucket_count());
718 }
719
720 // As a convenience for resize(), we allow an optional second argument
721 // which lets you make this new hashtable a different size than ht
722 dense_hashtable(const dense_hashtable& ht,
723 size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
724 : settings(ht.settings),
725 key_info(ht.key_info),
726 num_deleted(0),
727 num_elements(0),
728 num_buckets(0),
729 val_info(ht.val_info),
730 table(NULL) {
731 if (!ht.settings.use_empty()) {
732 // If use_empty isn't set, copy_from will crash, so we do our own copying.
733 assert(ht.empty());
734 num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted);
735 settings.reset_thresholds(bucket_count());
736 return;
737 }
738 settings.reset_thresholds(bucket_count());
739 copy_or_move_from(ht, min_buckets_wanted); // copy_or_move_from() ignores deleted entries
740 }
741
742 dense_hashtable(dense_hashtable&& ht)
743 : dense_hashtable() {
744 swap(ht);
745 }
746
747 dense_hashtable(dense_hashtable&& ht,
748 size_type min_buckets_wanted)
749 : settings(ht.settings),
750 key_info(ht.key_info),
751 num_deleted(0),
752 num_elements(0),
753 num_buckets(0),
754 val_info(std::move(ht.val_info)),
755 table(NULL) {
756 if (!ht.settings.use_empty()) {
757 // If use_empty isn't set, copy_or_move_from will crash, so we do our own copying.
758 assert(ht.empty());
759 num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted);
760 settings.reset_thresholds(bucket_count());
761 return;
762 }
763 settings.reset_thresholds(bucket_count());
764 copy_or_move_from(std::move(ht), min_buckets_wanted); // copy_or_move_from() ignores deleted entries
765 }
766
767 dense_hashtable& operator=(const dense_hashtable& ht) {
768 if (&ht == this) return *this; // don't copy onto ourselves
769 if (!ht.settings.use_empty()) {
770 assert(ht.empty());
771 dense_hashtable empty_table(ht); // empty table with ht's thresholds
772 this->swap(empty_table);
773 return *this;
774 }
775 settings = ht.settings;
776 key_info = ht.key_info;
777 // copy_or_move_from() calls clear and sets num_deleted to 0 too
778 copy_or_move_from(ht, HT_MIN_BUCKETS);
779 // we purposefully don't copy the allocator, which may not be copyable
780 return *this;
781 }
782
783 dense_hashtable& operator=(dense_hashtable&& ht) {
784 assert(&ht != this); // this should not happen
785 swap(ht);
786 return *this;
787 }
788
789 ~dense_hashtable() {
790 if (table) {
791 destroy_buckets(0, num_buckets);
792 val_info.deallocate(table, num_buckets);
793 }
794 }
795
796 // Many STL algorithms use swap instead of copy constructors
797 void swap(dense_hashtable& ht) {
798 std::swap(settings, ht.settings);
799 std::swap(key_info, ht.key_info);
800 std::swap(num_deleted, ht.num_deleted);
801 std::swap(num_elements, ht.num_elements);
802 std::swap(num_buckets, ht.num_buckets);
803 std::swap(table, ht.table);
804 settings.reset_thresholds(bucket_count()); // also resets consider_shrink
805 ht.settings.reset_thresholds(ht.bucket_count());
806 // we purposefully don't swap the allocator, which may not be swap-able
807 }
808
809 private:
810 void clear_to_size(size_type new_num_buckets) {
811 if (!table) {
812 table = val_info.allocate(new_num_buckets);
813 } else {
814 destroy_buckets(0, num_buckets);
815 if (new_num_buckets != num_buckets) { // resize, if necessary
816 typedef std::integral_constant<
817 bool, std::is_same<value_alloc_type,
818 libc_allocator_with_realloc<value_type>>::value>
819 realloc_ok;
820 resize_table(num_buckets, new_num_buckets, realloc_ok());
821 }
822 }
823 assert(table);
824 fill_range_with_empty(table, new_num_buckets);
825 num_elements = 0;
826 num_deleted = 0;
827 num_buckets = new_num_buckets; // our new size
828 settings.reset_thresholds(bucket_count());
829 }
830
831 public:
832 // It's always nice to be able to clear a table without deallocating it
833 void clear() {
834 // If the table is already empty, and the number of buckets is
835 // already as we desire, there's nothing to do.
836 const size_type new_num_buckets = settings.min_buckets(0, 0);
837 if (num_elements == 0 && new_num_buckets == num_buckets) {
838 return;
839 }
840 clear_to_size(new_num_buckets);
841 }
842
843 // Clear the table without resizing it.
844 // Mimicks the stl_hashtable's behaviour when clear()-ing in that it
845 // does not modify the bucket count
846 void clear_no_resize() {
847 if (num_elements > 0) {
848 assert(table);
849 destroy_buckets(0, num_buckets);
850 fill_range_with_empty(table, num_buckets);
851 }
852 // don't consider to shrink before another erase()
853 settings.reset_thresholds(bucket_count());
854 num_elements = 0;
855 num_deleted = 0;
856 }
857
858 // LOOKUP ROUTINES
859 private:
860 // Returns a pair of positions: 1st where the object is, 2nd where
861 // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET
862 // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
863 // Note: because of deletions where-to-insert is not trivial: it's the
864 // first deleted bucket we see, as long as we don't find the key later
865 template <typename K>
866 std::pair<size_type, size_type> find_position(const K& key) const {
867 size_type num_probes = 0; // how many times we've probed
868 const size_type bucket_count_minus_one = bucket_count() - 1;
869 size_type bucknum = hash(key) & bucket_count_minus_one;
870 size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
871 while (1) { // probe until something happens
872 if (test_empty(bucknum)) { // bucket is empty
873 if (insert_pos == ILLEGAL_BUCKET) // found no prior place to insert
874 return std::pair<size_type, size_type>(ILLEGAL_BUCKET, bucknum);
875 else
876 return std::pair<size_type, size_type>(ILLEGAL_BUCKET, insert_pos);
877
878 } else if (test_deleted(bucknum)) { // keep searching, but mark to insert
879 if (insert_pos == ILLEGAL_BUCKET) insert_pos = bucknum;
880
881 } else if (equals(key, get_key(table[bucknum]))) {
882 return std::pair<size_type, size_type>(bucknum, ILLEGAL_BUCKET);
883 }
884 ++num_probes; // we're doing another probe
885 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
886 assert(num_probes < bucket_count() &&
887 "Hashtable is full: an error in key_equal<> or hash<>");
888 }
889 }
890
891 public:
892 template <typename K>
893 iterator find(const K& key) {
894 if (size() == 0) return end();
895 std::pair<size_type, size_type> pos = find_position(key);
896 if (pos.first == ILLEGAL_BUCKET) // alas, not there
897 return end();
898 else
899 return iterator(this, table + pos.first, table + num_buckets, false);
900 }
901
902 template <typename K>
903 const_iterator find(const K& key) const {
904 if (size() == 0) return end();
905 std::pair<size_type, size_type> pos = find_position(key);
906 if (pos.first == ILLEGAL_BUCKET) // alas, not there
907 return end();
908 else
909 return const_iterator(this, table + pos.first, table + num_buckets,
910 false);
911 }
912
913 // This is a tr1 method: the bucket a given key is in, or what bucket
914 // it would be put in, if it were to be inserted. Shrug.
915 size_type bucket(const key_type& key) const {
916 std::pair<size_type, size_type> pos = find_position(key);
917 return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first;
918 }
919
920 // Counts how many elements have key key. For maps, it's either 0 or 1.
921 template <typename K>
922 size_type count(const K& key) const {
923 std::pair<size_type, size_type> pos = find_position(key);
924 return pos.first == ILLEGAL_BUCKET ? 0 : 1;
925 }
926
927 // Likewise, equal_range doesn't really make sense for us. Oh well.
928 template <typename K>
929 std::pair<iterator, iterator> equal_range(const K& key) {
930 iterator pos = find(key); // either an iterator or end
931 if (pos == end()) {
932 return std::pair<iterator, iterator>(pos, pos);
933 } else {
934 const iterator startpos = pos++;
935 return std::pair<iterator, iterator>(startpos, pos);
936 }
937 }
938 template <typename K>
939 std::pair<const_iterator, const_iterator> equal_range(
940 const K& key) const {
941 const_iterator pos = find(key); // either an iterator or end
942 if (pos == end()) {
943 return std::pair<const_iterator, const_iterator>(pos, pos);
944 } else {
945 const const_iterator startpos = pos++;
946 return std::pair<const_iterator, const_iterator>(startpos, pos);
947 }
948 }
949
950 // INSERTION ROUTINES
951 private:
952 // Private method used by insert_noresize and find_or_insert.
953 template <typename... Args>
954 iterator insert_at(size_type pos, Args&&... args) {
955 if (size() >= max_size()) {
956 throw std::length_error("insert overflow");
957 }
958 if (test_deleted(pos)) { // just replace if it's been del.
959 // shrug: shouldn't need to be const.
960 const_iterator delpos(this, table + pos, table + num_buckets, false);
961 clear_deleted(delpos);
962 assert(num_deleted > 0);
963 --num_deleted; // used to be, now it isn't
964 } else {
965 ++num_elements; // replacing an empty bucket
966 }
967 set_value(&table[pos], std::forward<Args>(args)...);
968 return iterator(this, table + pos, table + num_buckets, false);
969 }
970
971 // If you know *this is big enough to hold obj, use this routine
972 template <typename K, typename... Args>
973 std::pair<iterator, bool> insert_noresize(K&& key, Args&&... args) {
974 // First, double-check we're not inserting delkey or emptyval
975 assert(settings.use_empty() && "Inserting without empty key");
976 assert(!equals(std::forward<K>(key), key_info.empty_key) && "Inserting the empty key");
977 assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && "Inserting the deleted key");
978
979 const std::pair<size_type, size_type> pos = find_position(key);
980 if (pos.first != ILLEGAL_BUCKET) { // object was already there
981 return std::pair<iterator, bool>(
982 iterator(this, table + pos.first, table + num_buckets, false),
983 false); // false: we didn't insert
984 } else { // pos.second says where to put it
985 return std::pair<iterator, bool>(insert_at(pos.second, std::forward<Args>(args)...), true);
986 }
987 }
988
989 // Specializations of insert(it, it) depending on the power of the iterator:
990 // (1) Iterator supports operator-, resize before inserting
991 template <class ForwardIterator>
992 void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) {
993 size_t dist = std::distance(f, l);
994 if (dist >= (std::numeric_limits<size_type>::max)()) {
995 throw std::length_error("insert-range overflow");
996 }
997 resize_delta(static_cast<size_type>(dist));
998 for (; dist > 0; --dist, ++f) {
999 insert_noresize(get_key(*f), *f);
1000 }
1001 }
1002
1003 // (2) Arbitrary iterator, can't tell how much to resize
1004 template <class InputIterator>
1005 void insert(InputIterator f, InputIterator l, std::input_iterator_tag) {
1006 for (; f != l; ++f) insert(*f);
1007 }
1008
1009 public:
1010 // This is the normal insert routine, used by the outside world
1011 template <typename Arg>
1012 std::pair<iterator, bool> insert(Arg&& obj) {
1013 resize_delta(1); // adding an object, grow if need be
1014 return insert_noresize(get_key(std::forward<Arg>(obj)), std::forward<Arg>(obj));
1015 }
1016
1017 template <typename K, typename... Args>
1018 std::pair<iterator, bool> emplace(K&& key, Args&&... args) {
1019 resize_delta(1);
1020 // here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself
1021 return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...);
1022 }
1023
1024 template <typename K, typename... Args>
1025 std::pair<iterator, bool> emplace_hint(const_iterator hint, K&& key, Args&&... args) {
1026 resize_delta(1);
1027
1028 if (equals(key, hint->first)) {
1029 return {iterator(this, const_cast<pointer>(hint.pos), const_cast<pointer>(hint.end), false), false};
1030 }
1031
1032 // here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself
1033 return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...);
1034 }
1035
1036 // When inserting a lot at a time, we specialize on the type of iterator
1037 template <class InputIterator>
1038 void insert(InputIterator f, InputIterator l) {
1039 // specializes on iterator type
1040 insert(f, l,
1041 typename std::iterator_traits<InputIterator>::iterator_category());
1042 }
1043
1044 // DefaultValue is a functor that takes a key and returns a value_type
1045 // representing the default value to be inserted if none is found.
1046 template <class T, class K>
1047 value_type& find_or_insert(K&& key) {
1048 // First, double-check we're not inserting emptykey or delkey
1049 assert(
1050 (!settings.use_empty() || !equals(key, key_info.empty_key)) &&
1051 "Inserting the empty key");
1052 assert((!settings.use_deleted() || !equals(key, key_info.delkey)) &&
1053 "Inserting the deleted key");
1054 const std::pair<size_type, size_type> pos = find_position(key);
1055 if (pos.first != ILLEGAL_BUCKET) { // object was already there
1056 return table[pos.first];
1057 } else if (resize_delta(1)) { // needed to rehash to make room
1058 // Since we resized, we can't use pos, so recalculate where to insert.
1059 return *insert_noresize(std::forward<K>(key), std::forward<K>(key), T()).first;
1060 } else { // no need to rehash, insert right here
1061 return *insert_at(pos.second, std::forward<K>(key), T());
1062 }
1063 }
1064
1065 // DELETION ROUTINES
1066 size_type erase(const key_type& key) {
1067 // First, double-check we're not trying to erase delkey or emptyval.
1068 assert(
1069 (!settings.use_empty() || !equals(key, key_info.empty_key)) &&
1070 "Erasing the empty key");
1071 assert((!settings.use_deleted() || !equals(key, key_info.delkey)) &&
1072 "Erasing the deleted key");
1073 const_iterator pos = find(key); // shrug: shouldn't need to be const
1074 if (pos != end()) {
1075 assert(!test_deleted(pos)); // or find() shouldn't have returned it
1076 set_deleted(pos);
1077 ++num_deleted;
1078 settings.set_consider_shrink(
1079 true); // will think about shrink after next insert
1080 return 1; // because we deleted one thing
1081 } else {
1082 return 0; // because we deleted nothing
1083 }
1084 }
1085
1086 // We return the iterator past the deleted item.
1087 iterator erase(const_iterator pos) {
1088 if (pos == end()) return end(); // sanity check
1089 if (set_deleted(pos)) { // true if object has been newly deleted
1090 ++num_deleted;
1091 settings.set_consider_shrink(
1092 true); // will think about shrink after next insert
1093 }
1094 return iterator(this, const_cast<pointer>(pos.pos), const_cast<pointer>(pos.end), true);
1095 }
1096
1097 iterator erase(const_iterator f, const_iterator l) {
1098 for (; f != l; ++f) {
1099 if (set_deleted(f)) // should always be true
1100 ++num_deleted;
1101 }
1102 settings.set_consider_shrink(
1103 true); // will think about shrink after next insert
1104 return iterator(this, const_cast<pointer>(f.pos), const_cast<pointer>(f.end), false);
1105 }
1106
1107 // COMPARISON
1108 bool operator==(const dense_hashtable& ht) const {
1109 if (size() != ht.size()) {
1110 return false;
1111 } else if (this == &ht) {
1112 return true;
1113 } else {
1114 // Iterate through the elements in "this" and see if the
1115 // corresponding element is in ht
1116 for (const_iterator it = begin(); it != end(); ++it) {
1117 const_iterator it2 = ht.find(get_key(*it));
1118 if ((it2 == ht.end()) || (*it != *it2)) {
1119 return false;
1120 }
1121 }
1122 return true;
1123 }
1124 }
1125 bool operator!=(const dense_hashtable& ht) const { return !(*this == ht); }
1126
1127 // I/O
1128 // We support reading and writing hashtables to disk. Alas, since
1129 // I don't know how to write a hasher or key_equal, you have to make
1130 // sure everything but the table is the same. We compact before writing.
1131 private:
1132 // Every time the disk format changes, this should probably change too
1133 typedef unsigned long MagicNumberType;
1134 static const MagicNumberType MAGIC_NUMBER = 0x13578642;
1135
1136 public:
1137 // I/O -- this is an add-on for writing hash table to disk
1138 //
1139 // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
1140 // (istream, ostream, etc) *or* a class providing
1141 // Read(void*, size_t) and Write(const void*, size_t)
1142 // (respectively), which writes a buffer into a stream
1143 // (which the INPUT/OUTPUT instance presumably owns).
1144
1145 typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
1146
1147 // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&)
1148 template <typename ValueSerializer, typename OUTPUT>
1149 bool serialize(ValueSerializer serializer, OUTPUT* fp) {
1150 squash_deleted(); // so we don't have to worry about delkey
1151 if (!sparsehash_internal::write_bigendian_number(fp, MAGIC_NUMBER, 4))
1152 return false;
1153 if (!sparsehash_internal::write_bigendian_number(fp, num_buckets, 8))
1154 return false;
1155 if (!sparsehash_internal::write_bigendian_number(fp, num_elements, 8))
1156 return false;
1157 // Now write a bitmap of non-empty buckets.
1158 for (size_type i = 0; i < num_buckets; i += 8) {
1159 unsigned char bits = 0;
1160 for (int bit = 0; bit < 8; ++bit) {
1161 if (i + bit < num_buckets && !test_empty(i + bit)) bits |= (1 << bit);
1162 }
1163 if (!sparsehash_internal::write_data(fp, &bits, sizeof(bits)))
1164 return false;
1165 for (int bit = 0; bit < 8; ++bit) {
1166 if (bits & (1 << bit)) {
1167 if (!serializer(fp, table[i + bit])) return false;
1168 }
1169 }
1170 }
1171 return true;
1172 }
1173
1174 // INPUT: anything we've written an overload of read_data() for.
1175 // ValueSerializer: a functor. operator()(INPUT*, value_type*)
1176 template <typename ValueSerializer, typename INPUT>
1177 bool unserialize(ValueSerializer serializer, INPUT* fp) {
1178 assert(settings.use_empty() && "empty_key not set for read");
1179
1180 clear(); // just to be consistent
1181 MagicNumberType magic_read;
1182 if (!sparsehash_internal::read_bigendian_number(fp, &magic_read, 4))
1183 return false;
1184 if (magic_read != MAGIC_NUMBER) {
1185 return false;
1186 }
1187 size_type new_num_buckets;
1188 if (!sparsehash_internal::read_bigendian_number(fp, &new_num_buckets, 8))
1189 return false;
1190 clear_to_size(new_num_buckets);
1191 if (!sparsehash_internal::read_bigendian_number(fp, &num_elements, 8))
1192 return false;
1193
1194 // Read the bitmap of non-empty buckets.
1195 for (size_type i = 0; i < num_buckets; i += 8) {
1196 unsigned char bits;
1197 if (!sparsehash_internal::read_data(fp, &bits, sizeof(bits)))
1198 return false;
1199 for (int bit = 0; bit < 8; ++bit) {
1200 if (i + bit < num_buckets && (bits & (1 << bit))) { // not empty
1201 if (!serializer(fp, &table[i + bit])) return false;
1202 }
1203 }
1204 }
1205 return true;
1206 }
1207
1208 private:
1209 template <class A>
1210 class alloc_impl : public A {
1211 public:
1212 typedef typename A::pointer pointer;
1213 typedef typename A::size_type size_type;
1214
1215 // Convert a normal allocator to one that has realloc_or_die()
1216 alloc_impl(const A& a) : A(a) {}
1217
1218 // realloc_or_die should only be used when using the default
1219 // allocator (libc_allocator_with_realloc).
1220 pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) {
1221 fprintf(stderr,
1222 "realloc_or_die is only supported for "
1223 "libc_allocator_with_realloc\n");
1224 exit(1);
1225 return NULL;
1226 }
1227 };
1228
1229 // A template specialization of alloc_impl for
1230 // libc_allocator_with_realloc that can handle realloc_or_die.
1231 template <class A>
1232 class alloc_impl<libc_allocator_with_realloc<A>>
1233 : public libc_allocator_with_realloc<A> {
1234 public:
1235 typedef typename libc_allocator_with_realloc<A>::pointer pointer;
1236 typedef typename libc_allocator_with_realloc<A>::size_type size_type;
1237
1238 alloc_impl(const libc_allocator_with_realloc<A>& a)
1239 : libc_allocator_with_realloc<A>(a) {}
1240
1241 pointer realloc_or_die(pointer ptr, size_type n) {
1242 pointer retval = this->reallocate(ptr, n);
1243 if (retval == NULL) {
1244 fprintf(stderr,
1245 "sparsehash: FATAL ERROR: failed to reallocate "
1246 "%lu elements for ptr %p",
1247 static_cast<unsigned long>(n), static_cast<void*>(ptr));
1248 exit(1);
1249 }
1250 return retval;
1251 }
1252 };
1253
1254 // Package allocator with emptyval to eliminate memory needed for
1255 // the zero-size allocator.
1256 // If new fields are added to this class, we should add them to
1257 // operator= and swap.
1258 class ValInfo : public alloc_impl<value_alloc_type> {
1259 public:
1260 typedef typename alloc_impl<value_alloc_type>::value_type value_type;
1261
1262 ValInfo(const alloc_impl<value_alloc_type>& a)
1263 : alloc_impl<value_alloc_type>(a) {}
1264 };
1265
1266 // Package functors with another class to eliminate memory needed for
1267 // zero-size functors. Since ExtractKey and hasher's operator() might
1268 // have the same function signature, they must be packaged in
1269 // different classes.
1270 struct Settings
1271 : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
1272 HT_MIN_BUCKETS> {
1273 explicit Settings(const hasher& hf)
1274 : sparsehash_internal::sh_hashtable_settings<key_type, hasher,
1275 size_type, HT_MIN_BUCKETS>(
1276 hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
1277 };
1278
1279 // Packages ExtractKey and SetKey functors.
1280 class KeyInfo : public ExtractKey, public SetKey, public EqualKey {
1281 public:
1282 KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
1283 : ExtractKey(ek), SetKey(sk), EqualKey(eq) {}
1284
1285 // We want to return the exact same type as ExtractKey: Key or const Key&
1286 template <typename V>
1287 typename ExtractKey::result_type get_key(V&& v) const {
1288 return ExtractKey::operator()(std::forward<V>(v));
1289 }
1290 void set_key(pointer v, const key_type& k) const {
1291 SetKey::operator()(v, k);
1292 }
1293 void construct_key(pointer v, const key_type& k) const {
1294 SetKey::operator()(v, k, true);
1295 }
1296 template <typename K1, typename K2>
1297 bool equals(const K1& a, const K2& b) const {
1298 return EqualKey::operator()(a, b);
1299 }
1300
1301 // Which key marks deleted entries.
1302 // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!)
1303 typename std::remove_const<key_type>::type delkey;
1304 typename std::remove_const<key_type>::type empty_key;
1305 };
1306
1307 // Utility functions to access the templated operators
1308 template <typename K>
1309 size_type hash(const K& v) const { return settings.hash(v); }
1310 template <typename K1, typename K2>
1311 bool equals(const K1& a, const K2& b) const {
1312 return key_info.equals(a, b);
1313 }
1314 template <typename V>
1315 typename ExtractKey::result_type get_key(V&& v) const {
1316 return key_info.get_key(std::forward<V>(v));
1317 }
1318 void set_key(pointer v, const key_type& k) const { key_info.set_key(v, k); }
1319 void construct_key(pointer v, const key_type& k) const { key_info.construct_key(v, k); }
1320
1321 private:
1322 // Actual data
1323 Settings settings;
1324 KeyInfo key_info;
1325
1326 size_type num_deleted; // how many occupied buckets are marked deleted
1327 size_type num_elements;
1328 size_type num_buckets;
1329 ValInfo val_info; // holds emptyval, and also the allocator
1330 pointer table;
1331};
1332
1333// We need a global swap as well
1334template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1335inline void swap(dense_hashtable<V, K, HF, ExK, SetK, EqK, A>& x,
1336 dense_hashtable<V, K, HF, ExK, SetK, EqK, A>& y) {
1337 x.swap(y);
1338}
1339
1340#undef JUMP_
1341
1342template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1343const typename dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::size_type
1344 dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::ILLEGAL_BUCKET;
1345
1346// How full we let the table get before we resize. Knuth says .8 is
1347// good -- higher causes us to probe too much, though saves memory.
1348// However, we go with .5, getting better performance at the cost of
1349// more space (a trade-off densehashtable explicitly chooses to make).
1350// Feel free to play around with different values, though, via
1351// max_load_factor() and/or set_resizing_parameters().
1352template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1353const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = 50;
1354
1355// How empty we let the table get before we resize lower.
1356// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing.
1357template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1358const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT =
1359 static_cast<int>(
1360 0.4 * dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT);
1361
1362} // namespace google
1363