densehashtable.h source code [ClickHouse/contrib/sparsehash-c11/sparsehash/internal/densehashtable.h]

1	// Copyright (c) 2005, Google Inc.
2	// All rights reserved.
3	//
4	// Redistribution and use in source and binary forms, with or without
5	// modification, are permitted provided that the following conditions are
6	// met:
7	//
8	// Redistributions of source code must retain the above copyright*
9	// notice, this list of conditions and the following disclaimer.
10	// Redistributions in binary form must reproduce the above*
11	// copyright notice, this list of conditions and the following disclaimer
12	// in the documentation and/or other materials provided with the
13	// distribution.
14	// Neither the name of Google Inc. nor the names of its*
15	// contributors may be used to endorse or promote products derived from
16	// this software without specific prior written permission.
17	//
18	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30	// ---
31	//
32	// A dense hashtable is a particular implementation of
33	// a hashtable: one that is meant to minimize memory allocation.
34	// It does this by using an array to store all the data. We
35	// steal a value from the key space to indicate "empty" array
36	// elements (ie indices where no item lives) and another to indicate
37	// "deleted" elements.
38	//
39	// (Note it is possible to change the value of the delete key
40	// on the fly; you can even remove it, though after that point
41	// the hashtable is insert_only until you set it again. The empty
42	// value however can't be changed.)
43	//
44	// To minimize allocation and pointer overhead, we use internal
45	// probing, in which the hashtable is a single table, and collisions
46	// are resolved by trying to insert again in another bucket. The
47	// most cache-efficient internal probing schemes are linear probing
48	// (which suffers, alas, from clumping) and quadratic probing, which
49	// is what we implement by default.
50	//
51	// Type requirements: value_type is required to be Copy Constructible
52	// and Default Constructible. It is not required to be (and commonly
53	// isn't) Assignable.
54	//
55	// You probably shouldn't use this code directly. Use dense_hash_map<>
56	// or dense_hash_set<> instead.
57
58	// You can change the following below:
59	// HT_OCCUPANCY_PCT -- how full before we double size
60	// HT_EMPTY_PCT -- how empty before we halve size
61	// HT_MIN_BUCKETS -- default smallest bucket size
62	//
63	// You can also change enlarge_factor (which defaults to
64	// HT_OCCUPANCY_PCT), and shrink_factor (which defaults to
65	// HT_EMPTY_PCT) with set_resizing_parameters().
66	//
67	// How to decide what values to use?
68	// shrink_factor's default of .4 OCCUPANCY_PCT, is probably good.*
69	// HT_MIN_BUCKETS is probably unnecessary since you can specify
70	// (indirectly) the starting number of buckets at construct-time.
71	// For enlarge_factor, you can use this chart to try to trade-off
72	// expected lookup time to the space taken up. By default, this
73	// code uses quadratic probing, though you can change it to linear
74	// via JUMP_ below if you really want to.
75	//
76	// From
77	// http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
78	// NUMBER OF PROBES / LOOKUP Successful Unsuccessful
79	// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
80	// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
81	//
82	// -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
83	// QUADRATIC COLLISION RES.
84	// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
85	// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
86	// LINEAR COLLISION RES.
87	// probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5
88	// probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0
89
90	#pragma once
91
92	#include <assert.h>
93	#include <stdio.h> // for FILE, fwrite, fread
94	#include <algorithm> // For swap(), eg
95	#include <iterator> // For iterator tags
96	#include <limits> // for numeric_limits
97	#include <memory> // For uninitialized_fill
98	#include <utility> // for pair
99	#include <stdexcept> // For length_error
100	#include <type_traits>
101	#include <sparsehash/internal/hashtable-common.h>
102	#include <sparsehash/internal/libc_allocator_with_realloc.h>
103
104	namespace google {
105
106	// The probing method
107	// Linear probing
108	// #define JUMP_(key, num_probes) ( 1 )
109	// Quadratic probing
110	#define JUMP_(key, num_probes) (num_probes)
111
112	// Hashtable class, used to implement the hashed associative containers
113	// hash_set and hash_map.
114
115	// Value: what is stored in the table (each bucket is a Value).
116	// Key: something in a 1-to-1 correspondence to a Value, that can be used
117	// to search for a Value in the table (find() takes a Key).
118	// HashFcn: Takes a Key and returns an integer, the more unique the better.
119	// ExtractKey: given a Value, returns the unique Key associated with it.
120	// Must inherit from unary_function, or at least have a
121	// result_type enum indicating the return type of operator().
122	// SetKey: given a Value and a Key, modifies the value such that*
123	// ExtractKey(value) == key. We guarantee this is only called
124	// with key == deleted_key or key == empty_key.
125	// EqualKey: Given two Keys, says whether they are the same (that is,
126	// if they are both associated with the same Value).
127	// Alloc: STL allocator to use to allocate memory.
128
129	template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey,
130	class EqualKey, class Alloc>
131	class dense_hashtable;
132
133	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
134	struct dense_hashtable_iterator;
135
136	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
137	struct dense_hashtable_const_iterator;
138
139	// We're just an array, but we need to skip over empty and deleted elements
140	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
141	struct dense_hashtable_iterator {
142	private:
143	using value_alloc_type =
144	typename std::allocator_traits<A>::template rebind_alloc<V>;
145
146	public:
147	typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
148	typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
149	const_iterator;
150
151	typedef std::forward_iterator_tag iterator_category; // very little defined!
152	typedef V value_type;
153	typedef typename value_alloc_type::difference_type difference_type;
154	typedef typename value_alloc_type::size_type size_type;
155	typedef typename value_alloc_type::reference reference;
156	typedef typename value_alloc_type::pointer pointer;
157
158	// "Real" constructor and default constructor
159	dense_hashtable_iterator(
160	const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, pointer it,
161	pointer it_end, bool advance)
162	: ht(h), pos(it), end(it_end) {
163	if (advance) advance_past_empty_and_deleted();
164	}
165	dense_hashtable_iterator() {}
166	// The default destructor is fine; we don't define one
167	// The default operator= is fine; we don't define one
168
169	// Happy dereferencer
170	reference operator() const* { return *pos; }
171	pointer operator->() const { return &(operator*()); }
172
173	// Arithmetic. The only hard part is making sure that
174	// we're not on an empty or marked-deleted array element
175	void advance_past_empty_and_deleted() {
176	while (pos != end && (ht->test_empty(*this) \|\| ht->test_deleted(*this)))
177	++pos;
178	}
179	iterator& operator++() {
180	assert(pos != end);
181	++pos;
182	advance_past_empty_and_deleted();
183	return *this;
184	}
185	iterator operator++(int) {
186	iterator tmp(*this);
187	++*this;
188	return tmp;
189	}
190
191	// Comparison.
192	bool operator==(const iterator& it) const { return pos == it.pos; }
193	bool operator!=(const iterator& it) const { return pos != it.pos; }
194
195	// The actual data
196	const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht;
197	pointer pos, end;
198	};
199
200	// Now do it all again, but with const-ness!
201	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
202	struct dense_hashtable_const_iterator {
203	private:
204	using value_alloc_type =
205	typename std::allocator_traits<A>::template rebind_alloc<V>;
206
207	public:
208	typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
209	typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
210	const_iterator;
211
212	typedef std::forward_iterator_tag iterator_category; // very little defined!
213	typedef V value_type;
214	typedef typename value_alloc_type::difference_type difference_type;
215	typedef typename value_alloc_type::size_type size_type;
216	typedef typename value_alloc_type::const_reference reference;
217	typedef typename value_alloc_type::const_pointer pointer;
218
219	// "Real" constructor and default constructor
220	dense_hashtable_const_iterator(
221	const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, pointer it,
222	pointer it_end, bool advance)
223	: ht(h), pos(it), end(it_end) {
224	if (advance) advance_past_empty_and_deleted();
225	}
226	dense_hashtable_const_iterator() : ht(NULL), pos(pointer()), end(pointer()) {}
227	// This lets us convert regular iterators to const iterators
228	dense_hashtable_const_iterator(const iterator& it)
229	: ht(it.ht), pos(it.pos), end(it.end) {}
230	// The default destructor is fine; we don't define one
231	// The default operator= is fine; we don't define one
232
233	// Happy dereferencer
234	reference operator() const* { return *pos; }
235	pointer operator->() const { return &(operator*()); }
236
237	// Arithmetic. The only hard part is making sure that
238	// we're not on an empty or marked-deleted array element
239	void advance_past_empty_and_deleted() {
240	while (pos != end && (ht->test_empty(*this) \|\| ht->test_deleted(*this)))
241	++pos;
242	}
243	const_iterator& operator++() {
244	assert(pos != end);
245	++pos;
246	advance_past_empty_and_deleted();
247	return *this;
248	}
249	const_iterator operator++(int) {
250	const_iterator tmp(*this);
251	++*this;
252	return tmp;
253	}
254
255	// Comparison.
256	bool operator==(const const_iterator& it) const { return pos == it.pos; }
257	bool operator!=(const const_iterator& it) const { return pos != it.pos; }
258
259	// The actual data
260	const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht;
261	pointer pos, end;
262	};
263
264	template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey,
265	class EqualKey, class Alloc>
266	class dense_hashtable {
267	private:
268	using value_alloc_type =
269	typename std::allocator_traits<Alloc>::template rebind_alloc<Value>;
270
271	public:
272	typedef Key key_type;
273	typedef Value value_type;
274	typedef HashFcn hasher;
275	typedef EqualKey key_equal;
276	typedef Alloc allocator_type;
277
278	typedef typename value_alloc_type::size_type size_type;
279	typedef typename value_alloc_type::difference_type difference_type;
280	typedef typename value_alloc_type::reference reference;
281	typedef typename value_alloc_type::const_reference const_reference;
282	typedef typename value_alloc_type::pointer pointer;
283	typedef typename value_alloc_type::const_pointer const_pointer;
284	typedef dense_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey,
285	EqualKey, Alloc> iterator;
286
287	typedef dense_hashtable_const_iterator<
288	Value, Key, HashFcn, ExtractKey, SetKey, EqualKey, Alloc> const_iterator;
289
290	// These come from tr1. For us they're the same as regular iterators.
291	typedef iterator local_iterator;
292	typedef const_iterator const_local_iterator;
293
294	// How full we let the table get before we resize, by default.
295	// Knuth says .8 is good -- higher causes us to probe too much,
296	// though it saves memory.
297	static const int HT_OCCUPANCY_PCT; // defined at the bottom of this file
298
299	// How empty we let the table get before we resize lower, by default.
300	// (0.0 means never resize lower.)
301	// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
302	static const int HT_EMPTY_PCT; // defined at the bottom of this file
303
304	// Minimum size we're willing to let hashtables be.
305	// Must be a power of two, and at least 4.
306	// Note, however, that for a given hashtable, the initial size is a
307	// function of the first constructor arg, and may be >HT_MIN_BUCKETS.
308	static const size_type HT_MIN_BUCKETS = `4`;
309
310	// By default, if you don't specify a hashtable size at
311	// construction-time, we use this size. Must be a power of two, and
312	// at least HT_MIN_BUCKETS.
313	static const size_type HT_DEFAULT_STARTING_BUCKETS = `32`;
314
315	// ITERATOR FUNCTIONS
316	iterator begin() { return iterator(this, table, table + num_buckets, true); }
317	iterator end() {
318	return iterator(this, table + num_buckets, table + num_buckets, true);
319	}
320	const_iterator begin() const {
321	return const_iterator(this, table, table + num_buckets, true);
322	}
323	const_iterator end() const {
324	return const_iterator(this, table + num_buckets, table + num_buckets, true);
325	}
326
327	// These come from tr1 unordered_map. They iterate over 'bucket' n.
328	// We'll just consider bucket n to be the n-th element of the table.
329	local_iterator begin(size_type i) {
330	return local_iterator(this, table + i, table + i + `1`, false);
331	}
332	local_iterator end(size_type i) {
333	local_iterator it = begin(i);
334	if (!test_empty(i) && !test_deleted(i)) ++it;
335	return it;
336	}
337	const_local_iterator begin(size_type i) const {
338	return const_local_iterator(this, table + i, table + i + `1`, false);
339	}
340	const_local_iterator end(size_type i) const {
341	const_local_iterator it = begin(i);
342	if (!test_empty(i) && !test_deleted(i)) ++it;
343	return it;
344	}
345
346	// ACCESSOR FUNCTIONS for the things we templatize on, basically
347	hasher hash_funct() const { return settings; }
348	key_equal key_eq() const { return key_info; }
349	allocator_type get_allocator() const { return allocator_type(val_info); }
350
351	// Accessor function for statistics gathering.
352	int num_table_copies() const { return settings.num_ht_copies(); }
353
354	private:
355	// Annoyingly, we can't copy values around, because they might have
356	// const components (they're probably pair<const X, Y>). We use
357	// explicit destructor invocation and placement new to get around
358	// this. Arg.
359	template <typename... Args>
360	void set_value(pointer dst, Args&&... args) {
361	dst->~value_type(); // delete the old value, if any
362	new (dst) value_type(std::forward<Args>(args)...);
363	}
364
365	void destroy_buckets(size_type first, size_type last) {
366	for (; first != last; ++first) table[first].~value_type();
367	}
368
369	// DELETE HELPER FUNCTIONS
370	// This lets the user describe a key that will indicate deleted
371	// table entries. This key should be an "impossible" entry --
372	// if you try to insert it for real, you won't be able to retrieve it!
373	// (NB: while you pass in an entire value, only the key part is looked
374	// at. This is just because I don't know how to assign just a key.)
375	private:
376	void squash_deleted() { // gets rid of any deleted entries we have
377	if (num_deleted) { // get rid of deleted before writing
378	size_type resize_to = settings.min_buckets(
379	num_elements, bucket_count());
380	dense_hashtable tmp(std::move(*this), resize_to); // copying will get rid of deleted
381	swap(tmp); // now we are tmp
382	}
383	assert(num_deleted == `0`);
384	}
385
386	// Test if the given key is the deleted indicator. Requires
387	// num_deleted > 0, for correctness of read(), and because that
388	// guarantees that key_info.delkey is valid.
389	bool test_deleted_key(const key_type& key) const {
390	assert(num_deleted > `0`);
391	return equals(key_info.delkey, key);
392	}
393
394	public:
395	void set_deleted_key(const key_type& key) {
396	// the empty indicator (if specified) and the deleted indicator
397	// must be different
398	assert(
399	(!settings.use_empty() \|\| !equals(key, key_info.empty_key)) &&
400	"Passed the empty-key to set_deleted_key");
401	// It's only safe to change what "deleted" means if we purge deleted guys
402	squash_deleted();
403	settings.set_use_deleted(true);
404	key_info.delkey = key;
405	}
406	void clear_deleted_key() {
407	squash_deleted();
408	settings.set_use_deleted(false);
409	}
410	key_type deleted_key() const {
411	assert(settings.use_deleted() &&
412	"Must set deleted key before calling deleted_key");
413	return key_info.delkey;
414	}
415
416	// These are public so the iterators can use them
417	// True if the item at position bucknum is "deleted" marker
418	bool test_deleted(size_type bucknum) const {
419	// Invariant: !use_deleted() implies num_deleted is 0.
420	assert(settings.use_deleted() \|\| num_deleted == `0`);
421	return num_deleted > `0` && test_deleted_key(get_key(table[bucknum]));
422	}
423	bool test_deleted(const iterator& it) const {
424	// Invariant: !use_deleted() implies num_deleted is 0.
425	assert(settings.use_deleted() \|\| num_deleted == `0`);
426	return num_deleted > `0` && test_deleted_key(get_key(*it));
427	}
428	bool test_deleted(const const_iterator& it) const {
429	// Invariant: !use_deleted() implies num_deleted is 0.
430	assert(settings.use_deleted() \|\| num_deleted == `0`);
431	return num_deleted > `0` && test_deleted_key(get_key(*it));
432	}
433
434	private:
435	void check_use_deleted(const char* caller) {
436	(void)caller; // could log it if the assert failed
437	assert(settings.use_deleted());
438	}
439
440	// Set it so test_deleted is true. true if object didn't used to be deleted.
441	bool set_deleted(iterator& it) {
442	check_use_deleted("set_deleted()");
443	bool retval = !test_deleted(it);
444	// & converts from iterator to value-type.*
445	set_key(&(*it), key_info.delkey);
446	return retval;
447	}
448	// Set it so test_deleted is false. true if object used to be deleted.
449	bool clear_deleted(iterator& it) {
450	check_use_deleted("clear_deleted()");
451	// Happens automatically when we assign something else in its place.
452	return test_deleted(it);
453	}
454
455	// We also allow to set/clear the deleted bit on a const iterator.
456	// We allow a const_iterator for the same reason you can delete a
457	// const pointer: it's convenient, and semantically you can't use
458	// 'it' after it's been deleted anyway, so its const-ness doesn't
459	// really matter.
460	bool set_deleted(const_iterator& it) {
461	check_use_deleted("set_deleted()");
462	bool retval = !test_deleted(it);
463	set_key(const_cast<pointer>(&(*it)), key_info.delkey);
464	return retval;
465	}
466	// Set it so test_deleted is false. true if object used to be deleted.
467	bool clear_deleted(const_iterator& it) {
468	check_use_deleted("clear_deleted()");
469	return test_deleted(it);
470	}
471
472	// EMPTY HELPER FUNCTIONS
473	// This lets the user describe a key that will indicate empty (unused)
474	// table entries. This key should be an "impossible" entry --
475	// if you try to insert it for real, you won't be able to retrieve it!
476	// (NB: while you pass in an entire value, only the key part is looked
477	// at. This is just because I don't know how to assign just a key.)
478	public:
479	// These are public so the iterators can use them
480	// True if the item at position bucknum is "empty" marker
481	bool test_empty(size_type bucknum) const {
482	assert(settings.use_empty()); // we always need to know what's empty!
483	return equals(key_info.empty_key, get_key(table[bucknum]));
484	}
485	bool test_empty(const iterator& it) const {
486	assert(settings.use_empty()); // we always need to know what's empty!
487	return equals(key_info.empty_key, get_key(*it));
488	}
489	bool test_empty(const const_iterator& it) const {
490	assert(settings.use_empty()); // we always need to know what's empty!
491	return equals(key_info.empty_key, get_key(*it));
492	}
493
494	private:
495	void fill_range_with_empty(pointer table_start, size_type count) {
496	for (size_type i = `0`; i < count; ++i)
497	{
498	construct_key(&table_start[i], key_info.empty_key);
499	}
500	}
501
502	public:
503	void set_empty_key(const key_type& key) {
504	// Once you set the empty key, you can't change it
505	assert(!settings.use_empty() && "Calling set_empty_key multiple times");
506	// The deleted indicator (if specified) and the empty indicator
507	// must be different.
508	assert(
509	(!settings.use_deleted() \|\| !equals(key, key_info.delkey)) &&
510	"Setting the empty key the same as the deleted key");
511	settings.set_use_empty(true);
512	key_info.empty_key = key;
513
514	assert(!table); // must set before first use
515	// num_buckets was set in constructor even though table was NULL
516	table = val_info.allocate(num_buckets);
517	assert(table);
518	fill_range_with_empty(table, num_buckets);
519	}
520	key_type empty_key() const {
521	assert(settings.use_empty());
522	return key_info.empty_key;
523	}
524
525	// FUNCTIONS CONCERNING SIZE
526	public:
527	size_type size() const { return num_elements - num_deleted; }
528	size_type max_size() const { return val_info.max_size(); }
529	bool empty() const { return size() == `0`; }
530	size_type bucket_count() const { return num_buckets; }
531	size_type max_bucket_count() const { return max_size(); }
532	size_type nonempty_bucket_count() const { return num_elements; }
533	// These are tr1 methods. Their idea of 'bucket' doesn't map well to
534	// what we do. We just say every bucket has 0 or 1 items in it.
535	size_type bucket_size(size_type i) const {
536	return begin(i) == end(i) ? `0` : `1`;
537	}
538
539	private:
540	// Because of the above, size_type(-1) is never legal; use it for errors
541	static const size_type ILLEGAL_BUCKET = size_type(-`1`);
542
543	// Used after a string of deletes. Returns true if we actually shrunk.
544	// TODO(csilvers): take a delta so we can take into account inserts
545	// done after shrinking. Maybe make part of the Settings class?
546	bool maybe_shrink() {
547	assert(num_elements >= num_deleted);
548	assert((bucket_count() & (bucket_count() - `1`)) == `0`); // is a power of two
549	assert(bucket_count() >= HT_MIN_BUCKETS);
550	bool retval = false;
551
552	// If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
553	// we'll never shrink until you get relatively big, and we'll never
554	// shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
555	// like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
556	// shrink us down to HT_MIN_BUCKETS buckets, which is too small.
557	const size_type num_remain = num_elements - num_deleted;
558	const size_type shrink_threshold = settings.shrink_threshold();
559	if (shrink_threshold > `0` && num_remain < shrink_threshold &&
560	bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
561	const float shrink_factor = settings.shrink_factor();
562	size_type sz = bucket_count() / `2`; // find how much we should shrink
563	while (sz > HT_DEFAULT_STARTING_BUCKETS &&
564	num_remain < sz * shrink_factor) {
565	sz /= `2`; // stay a power of 2
566	}
567	dense_hashtable tmp(std::move(*this), sz); // Do the actual resizing
568	swap(tmp); // now we are tmp
569	retval = true;
570	}
571	settings.set_consider_shrink(false); // because we just considered it
572	return retval;
573	}
574
575	// We'll let you resize a hashtable -- though this makes us copy all!
576	// When you resize, you say, "make it big enough for this many more elements"
577	// Returns true if we actually resized, false if size was already ok.
578	bool resize_delta(size_type delta) {
579	bool did_resize = false;
580	if (settings.consider_shrink()) { // see if lots of deletes happened
581	if (maybe_shrink()) did_resize = true;
582	}
583	if (num_elements >= (std::numeric_limits<size_type>::max)() - delta) {
584	throw std::length_error ("resize overflow");
585	}
586	if (bucket_count() >= HT_MIN_BUCKETS &&
587	(num_elements + delta) <= settings.enlarge_threshold())
588	return did_resize; // we're ok as we are
589
590	// Sometimes, we need to resize just to get rid of all the
591	// "deleted" buckets that are clogging up the hashtable. So when
592	// deciding whether to resize, count the deleted buckets (which
593	// are currently taking up room). But later, when we decide what
594	// size to resize to, don't* count deleted buckets, since they*
595	// get discarded during the resize.
596	size_type needed_size = settings.min_buckets(num_elements + delta, `0`);
597	if (needed_size <= bucket_count()) // we have enough buckets
598	return did_resize;
599
600	size_type resize_to = settings.min_buckets(
601	num_elements - num_deleted + delta, bucket_count());
602
603	// When num_deleted is large, we may still grow but we do not want to
604	// over expand. So we reduce needed_size by a portion of num_deleted
605	// (the exact portion does not matter). This is especially helpful
606	// when min_load_factor is zero (no shrink at all) to avoid doubling
607	// the bucket count to infinity. See also test ResizeWithoutShrink.
608	needed_size = settings.min_buckets(num_elements - num_deleted / `4` + delta, `0`);
609
610	if (resize_to < needed_size && // may double resize_to
611	resize_to < (std::numeric_limits<size_type>::max)() / `2`) {
612	// This situation means that we have enough deleted elements,
613	// that once we purge them, we won't actually have needed to
614	// grow. But we may want to grow anyway: if we just purge one
615	// element, say, we'll have to grow anyway next time we
616	// insert. Might as well grow now, since we're already going
617	// through the trouble of copying (in order to purge the
618	// deleted elements).
619	const size_type target =
620	static_cast<size_type>(settings.shrink_size(resize_to * `2`));
621	if (num_elements - num_deleted + delta >= target) {
622	// Good, we won't be below the shrink threshhold even if we double.
623	resize_to *= `2`;
624	}
625	}
626	dense_hashtable tmp(std::move(*this), resize_to);
627	swap(tmp); // now we are tmp
628	return true;
629	}
630
631	// We require table be not-NULL and empty before calling this.
632	void resize_table(size_type /old_size/, size_type new_size,
633	std::true_type) {
634	table = val_info.realloc_or_die(table, new_size);
635	}
636
637	void resize_table(size_type old_size, size_type new_size, std::false_type) {
638	val_info.deallocate(table, old_size);
639	table = val_info.allocate(new_size);
640	}
641
642	// Used to actually do the rehashing when we grow/shrink a hashtable
643	template <typename Hashtable>
644	void copy_or_move_from(Hashtable&& ht, size_type min_buckets_wanted) {
645	clear_to_size(settings.min_buckets(ht.size(), min_buckets_wanted));
646
647	// We use a normal iterator to get non-deleted bcks from ht
648	// We could use insert() here, but since we know there are
649	// no duplicates and no deleted items, we can be more efficient
650	assert((bucket_count() & (bucket_count() - `1`)) == `0`); // a power of two
651	for (auto&& value : ht) {
652	size_type num_probes = `0`; // how many times we've probed
653	size_type bucknum;
654	const size_type bucket_count_minus_one = bucket_count() - `1`;
655	for (bucknum = hash(get_key(value)) & bucket_count_minus_one;
656	!test_empty(bucknum); // not empty
657	bucknum =
658	(bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
659	++num_probes;
660	assert(num_probes < bucket_count() &&
661	"Hashtable is full: an error in key_equal<> or hash<>");
662	}
663
664	using will_move = std::is_rvalue_reference<Hashtable&&>;
665	using value_t = typename std::conditional<will_move::value, value_type&&, const_reference>::type;
666
667	set_value(&table[bucknum], std::forward<value_t>(value));
668	num_elements++;
669	}
670	settings.inc_num_ht_copies();
671	}
672
673	// Required by the spec for hashed associative container
674	public:
675	// Though the docs say this should be num_buckets, I think it's much
676	// more useful as num_elements. As a special feature, calling with
677	// req_elements==0 will cause us to shrink if we can, saving space.
678	void resize(size_type req_elements) { // resize to this or larger
679	if (settings.consider_shrink() \|\| req_elements == `0`) maybe_shrink();
680	if (req_elements > num_elements) resize_delta(req_elements - num_elements);
681	}
682
683	// Get and change the value of shrink_factor and enlarge_factor. The
684	// description at the beginning of this file explains how to choose
685	// the values. Setting the shrink parameter to 0.0 ensures that the
686	// table never shrinks.
687	void get_resizing_parameters(float* shrink, float* grow) const {
688	*shrink = settings.shrink_factor();
689	*grow = settings.enlarge_factor();
690	}
691	void set_resizing_parameters(float shrink, float grow) {
692	settings.set_resizing_parameters(shrink, grow);
693	settings.reset_thresholds(bucket_count());
694	}
695
696	// CONSTRUCTORS -- as required by the specs, we take a size,
697	// but also let you specify a hashfunction, key comparator,
698	// and key extractor. We also define a copy constructor and =.
699	// DESTRUCTOR -- needs to free the table
700	explicit dense_hashtable(size_type expected_max_items_in_table = `0`,
701	const HashFcn& hf = HashFcn(),
702	const EqualKey& eql = EqualKey(),
703	const ExtractKey& ext = ExtractKey(),
704	const SetKey& set = SetKey(),
705	const Alloc& alloc = Alloc())
706	: settings(hf),
707	key_info(ext, set, eql),
708	num_deleted(`0`),
709	num_elements(`0`),
710	num_buckets(expected_max_items_in_table == `0`
711	? HT_DEFAULT_STARTING_BUCKETS
712	: settings.min_buckets(expected_max_items_in_table, `0`)),
713	val_info(alloc_impl<value_alloc_type>(alloc)),
714	table(NULL) {
715	// table is NULL until emptyval is set. However, we set num_buckets
716	// here so we know how much space to allocate once emptyval is set
717	settings.reset_thresholds(bucket_count());
718	}
719
720	// As a convenience for resize(), we allow an optional second argument
721	// which lets you make this new hashtable a different size than ht
722	dense_hashtable(const dense_hashtable& ht,
723	size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
724	: settings(ht.settings),
725	key_info(ht.key_info),
726	num_deleted(`0`),
727	num_elements(`0`),
728	num_buckets(`0`),
729	val_info(ht.val_info),
730	table(NULL) {
731	if (!ht.settings.use_empty()) {
732	// If use_empty isn't set, copy_from will crash, so we do our own copying.
733	assert(ht.empty());
734	num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted);
735	settings.reset_thresholds(bucket_count());
736	return;
737	}
738	settings.reset_thresholds(bucket_count());
739	copy_or_move_from(ht, min_buckets_wanted); // copy_or_move_from() ignores deleted entries
740	}
741
742	dense_hashtable(dense_hashtable&& ht)
743	: dense_hashtable() {
744	swap(ht);
745	}
746
747	dense_hashtable(dense_hashtable&& ht,
748	size_type min_buckets_wanted)
749	: settings(ht.settings),
750	key_info(ht.key_info),
751	num_deleted(`0`),
752	num_elements(`0`),
753	num_buckets(`0`),
754	val_info(std::move(ht.val_info)),
755	table(NULL) {
756	if (!ht.settings.use_empty()) {
757	// If use_empty isn't set, copy_or_move_from will crash, so we do our own copying.
758	assert(ht.empty());
759	num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted);
760	settings.reset_thresholds(bucket_count());
761	return;
762	}
763	settings.reset_thresholds(bucket_count());
764	copy_or_move_from(std::move(ht), min_buckets_wanted); // copy_or_move_from() ignores deleted entries
765	}
766
767	dense_hashtable& operator=(const dense_hashtable& ht) {
768	if (&ht == this) return *this; // don't copy onto ourselves
769	if (!ht.settings.use_empty()) {
770	assert(ht.empty());
771	dense_hashtable empty_table(ht); // empty table with ht's thresholds
772	this->swap(empty_table);
773	return *this;
774	}
775	settings = ht.settings;
776	key_info = ht.key_info;
777	// copy_or_move_from() calls clear and sets num_deleted to 0 too
778	copy_or_move_from(ht, HT_MIN_BUCKETS);
779	// we purposefully don't copy the allocator, which may not be copyable
780	return *this;
781	}
782
783	dense_hashtable& operator=(dense_hashtable&& ht) {
784	assert(&ht != this); // this should not happen
785	swap(ht);
786	return *this;
787	}
788
789	~dense_hashtable() {
790	if (table) {
791	destroy_buckets(`0`, num_buckets);
792	val_info.deallocate(table, num_buckets);
793	}
794	}
795
796	// Many STL algorithms use swap instead of copy constructors
797	void swap(dense_hashtable& ht) {
798	std::swap(settings, ht.settings);
799	std::swap(key_info, ht.key_info);
800	std::swap(num_deleted, ht.num_deleted);
801	std::swap(num_elements, ht.num_elements);
802	std::swap(num_buckets, ht.num_buckets);
803	std::swap(table, ht.table);
804	settings.reset_thresholds(bucket_count()); // also resets consider_shrink
805	ht.settings.reset_thresholds(ht.bucket_count());
806	// we purposefully don't swap the allocator, which may not be swap-able
807	}
808
809	private:
810	void clear_to_size(size_type new_num_buckets) {
811	if (!table) {
812	table = val_info.allocate(new_num_buckets);
813	} else {
814	destroy_buckets(`0`, num_buckets);
815	if (new_num_buckets != num_buckets) { // resize, if necessary
816	typedef std::integral_constant<
817	bool, std::is_same<value_alloc_type,
818	libc_allocator_with_realloc<value_type>>::value>
819	realloc_ok;
820	resize_table(num_buckets, new_num_buckets, realloc_ok());
821	}
822	}
823	assert(table);
824	fill_range_with_empty(table, new_num_buckets);
825	num_elements = `0`;
826	num_deleted = `0`;
827	num_buckets = new_num_buckets; // our new size
828	settings.reset_thresholds(bucket_count());
829	}
830
831	public:
832	// It's always nice to be able to clear a table without deallocating it
833	void clear() {
834	// If the table is already empty, and the number of buckets is
835	// already as we desire, there's nothing to do.
836	const size_type new_num_buckets = settings.min_buckets(`0`, `0`);
837	if (num_elements == `0` && new_num_buckets == num_buckets) {
838	return;
839	}
840	clear_to_size(new_num_buckets);
841	}
842
843	// Clear the table without resizing it.
844	// Mimicks the stl_hashtable's behaviour when clear()-ing in that it
845	// does not modify the bucket count
846	void clear_no_resize() {
847	if (num_elements > `0`) {
848	assert(table);
849	destroy_buckets(`0`, num_buckets);
850	fill_range_with_empty(table, num_buckets);
851	}
852	// don't consider to shrink before another erase()
853	settings.reset_thresholds(bucket_count());
854	num_elements = `0`;
855	num_deleted = `0`;
856	}
857
858	// LOOKUP ROUTINES
859	private:
860	// Returns a pair of positions: 1st where the object is, 2nd where
861	// it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET
862	// if object is not found; 2nd is ILLEGAL_BUCKET if it is.
863	// Note: because of deletions where-to-insert is not trivial: it's the
864	// first deleted bucket we see, as long as we don't find the key later
865	template <typename K>
866	std::pair<size_type, size_type> find_position(const K& key) const {
867	size_type num_probes = `0`; // how many times we've probed
868	const size_type bucket_count_minus_one = bucket_count() - `1`;
869	size_type bucknum = hash(key) & bucket_count_minus_one;
870	size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
871	while (`1`) { // probe until something happens
872	if (test_empty(bucknum)) { // bucket is empty
873	if (insert_pos == ILLEGAL_BUCKET) // found no prior place to insert
874	return std::pair<size_type, size_type>(ILLEGAL_BUCKET, bucknum);
875	else
876	return std::pair<size_type, size_type>(ILLEGAL_BUCKET, insert_pos);
877
878	} else if (test_deleted(bucknum)) { // keep searching, but mark to insert
879	if (insert_pos == ILLEGAL_BUCKET) insert_pos = bucknum;
880
881	} else if (equals(key, get_key(table[bucknum]))) {
882	return std::pair<size_type, size_type>(bucknum, ILLEGAL_BUCKET);
883	}
884	++num_probes; // we're doing another probe
885	bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
886	assert(num_probes < bucket_count() &&
887	"Hashtable is full: an error in key_equal<> or hash<>");
888	}
889	}
890
891	public:
892	template <typename K>
893	iterator find(const K& key) {
894	if (size() == `0`) return end();
895	std::pair<size_type, size_type> pos = find_position(key);
896	if (pos.first == ILLEGAL_BUCKET) // alas, not there
897	return end();
898	else
899	return iterator(this, table + pos.first, table + num_buckets, false);
900	}
901
902	template <typename K>
903	const_iterator find(const K& key) const {
904	if (size() == `0`) return end();
905	std::pair<size_type, size_type> pos = find_position(key);
906	if (pos.first == ILLEGAL_BUCKET) // alas, not there
907	return end();
908	else
909	return const_iterator(this, table + pos.first, table + num_buckets,
910	false);
911	}
912
913	// This is a tr1 method: the bucket a given key is in, or what bucket
914	// it would be put in, if it were to be inserted. Shrug.
915	size_type bucket(const key_type& key) const {
916	std::pair<size_type, size_type> pos = find_position(key);
917	return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first;
918	}
919
920	// Counts how many elements have key key. For maps, it's either 0 or 1.
921	template <typename K>
922	size_type count(const K& key) const {
923	std::pair<size_type, size_type> pos = find_position(key);
924	return pos.first == ILLEGAL_BUCKET ? `0` : `1`;
925	}
926
927	// Likewise, equal_range doesn't really make sense for us. Oh well.
928	template <typename K>
929	std::pair<iterator, iterator> equal_range(const K& key) {
930	iterator pos = find(key); // either an iterator or end
931	if (pos == end()) {
932	return std::pair<iterator, iterator>(pos, pos);
933	} else {
934	const iterator startpos = pos++;
935	return std::pair<iterator, iterator>(startpos, pos);
936	}
937	}
938	template <typename K>
939	std::pair<const_iterator, const_iterator> equal_range(
940	const K& key) const {
941	const_iterator pos = find(key); // either an iterator or end
942	if (pos == end()) {
943	return std::pair<const_iterator, const_iterator>(pos, pos);
944	} else {
945	const const_iterator startpos = pos++;
946	return std::pair<const_iterator, const_iterator>(startpos, pos);
947	}
948	}
949
950	// INSERTION ROUTINES
951	private:
952	// Private method used by insert_noresize and find_or_insert.
953	template <typename... Args>
954	iterator insert_at(size_type pos, Args&&... args) {
955	if (size() >= max_size()) {
956	throw std::length_error ("insert overflow");
957	}
958	if (test_deleted(pos)) { // just replace if it's been del.
959	// shrug: shouldn't need to be const.
960	const_iterator delpos(this, table + pos, table + num_buckets, false);
961	clear_deleted(delpos);
962	assert(num_deleted > `0`);
963	--num_deleted; // used to be, now it isn't
964	} else {
965	++num_elements; // replacing an empty bucket
966	}
967	set_value(&table[pos], std::forward<Args>(args)...);
968	return iterator(this, table + pos, table + num_buckets, false);
969	}
970
971	// If you know this is big enough to hold obj, use this routine*
972	template <typename K, typename... Args>
973	std::pair<iterator, bool> insert_noresize(K&& key, Args&&... args) {
974	// First, double-check we're not inserting delkey or emptyval
975	assert(settings.use_empty() && "Inserting without empty key");
976	assert(!equals(std::forward<K>(key), key_info.empty_key) && "Inserting the empty key");
977	assert((!settings.use_deleted() \|\| !equals(key, key_info.delkey)) && "Inserting the deleted key");
978
979	const std::pair<size_type, size_type> pos = find_position(key);
980	if (pos.first != ILLEGAL_BUCKET) { // object was already there
981	return std::pair<iterator, bool>(
982	iterator(this, table + pos.first, table + num_buckets, false),
983	false); // false: we didn't insert
984	} else { // pos.second says where to put it
985	return std::pair<iterator, bool>(insert_at(pos.second, std::forward<Args>(args)...), true);
986	}
987	}
988
989	// Specializations of insert(it, it) depending on the power of the iterator:
990	// (1) Iterator supports operator-, resize before inserting
991	template <class ForwardIterator>
992	void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) {
993	size_t dist = std::distance(f, l);
994	if (dist >= (std::numeric_limits<size_type>::max)()) {
995	throw std::length_error ("insert-range overflow");
996	}
997	resize_delta(static_cast<size_type>(dist));
998	for (; dist > `0`; --dist, ++f) {
999	insert_noresize(get_key(f), f);
1000	}
1001	}
1002
1003	// (2) Arbitrary iterator, can't tell how much to resize
1004	template <class InputIterator>
1005	void insert(InputIterator f, InputIterator l, std::input_iterator_tag) {
1006	for (; f != l; ++f) insert(*f);
1007	}
1008
1009	public:
1010	// This is the normal insert routine, used by the outside world
1011	template <typename Arg>
1012	std::pair<iterator, bool> insert(Arg&& obj) {
1013	resize_delta(`1`); // adding an object, grow if need be
1014	return insert_noresize(get_key(std::forward<Arg>(obj)), std::forward<Arg>(obj));
1015	}
1016
1017	template <typename K, typename... Args>
1018	std::pair<iterator, bool> emplace(K&& key, Args&&... args) {
1019	resize_delta(`1`);
1020	// here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself
1021	return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...);
1022	}
1023
1024	template <typename K, typename... Args>
1025	std::pair<iterator, bool> emplace_hint(const_iterator hint, K&& key, Args&&... args) {
1026	resize_delta(`1`);
1027
1028	if (equals(key, hint->first)) {
1029	return {iterator(this, const_cast<pointer>(hint.pos), const_cast<pointer>(hint.end), false), false};
1030	}
1031
1032	// here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself
1033	return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...);
1034	}
1035
1036	// When inserting a lot at a time, we specialize on the type of iterator
1037	template <class InputIterator>
1038	void insert(InputIterator f, InputIterator l) {
1039	// specializes on iterator type
1040	insert(f, l,
1041	typename std::iterator_traits<InputIterator>::iterator_category());
1042	}
1043
1044	// DefaultValue is a functor that takes a key and returns a value_type
1045	// representing the default value to be inserted if none is found.
1046	template <class T, class K>
1047	value_type& find_or_insert(K&& key) {
1048	// First, double-check we're not inserting emptykey or delkey
1049	assert(
1050	(!settings.use_empty() \|\| !equals(key, key_info.empty_key)) &&
1051	"Inserting the empty key");
1052	assert((!settings.use_deleted() \|\| !equals(key, key_info.delkey)) &&
1053	"Inserting the deleted key");
1054	const std::pair<size_type, size_type> pos = find_position(key);
1055	if (pos.first != ILLEGAL_BUCKET) { // object was already there
1056	return table[pos.first];
1057	} else if (resize_delta(`1`)) { // needed to rehash to make room
1058	// Since we resized, we can't use pos, so recalculate where to insert.
1059	return *insert_noresize(std::forward<K>(key), std::forward<K>(key), T()).first;
1060	} else { // no need to rehash, insert right here
1061	return *insert_at(pos.second, std::forward<K>(key), T());
1062	}
1063	}
1064
1065	// DELETION ROUTINES
1066	size_type erase(const key_type& key) {
1067	// First, double-check we're not trying to erase delkey or emptyval.
1068	assert(
1069	(!settings.use_empty() \|\| !equals(key, key_info.empty_key)) &&
1070	"Erasing the empty key");
1071	assert((!settings.use_deleted() \|\| !equals(key, key_info.delkey)) &&
1072	"Erasing the deleted key");
1073	const_iterator pos = find(key); // shrug: shouldn't need to be const
1074	if (pos != end()) {
1075	assert(!test_deleted(pos)); // or find() shouldn't have returned it
1076	set_deleted(pos);
1077	++num_deleted;
1078	settings.set_consider_shrink(
1079	true); // will think about shrink after next insert
1080	return `1`; // because we deleted one thing
1081	} else {
1082	return `0`; // because we deleted nothing
1083	}
1084	}
1085
1086	// We return the iterator past the deleted item.
1087	iterator erase(const_iterator pos) {
1088	if (pos == end()) return end(); // sanity check
1089	if (set_deleted(pos)) { // true if object has been newly deleted
1090	++num_deleted;
1091	settings.set_consider_shrink(
1092	true); // will think about shrink after next insert
1093	}
1094	return iterator(this, const_cast<pointer>(pos.pos), const_cast<pointer>(pos.end), true);
1095	}
1096
1097	iterator erase(const_iterator f, const_iterator l) {
1098	for (; f != l; ++f) {
1099	if (set_deleted(f)) // should always be true
1100	++num_deleted;
1101	}
1102	settings.set_consider_shrink(
1103	true); // will think about shrink after next insert
1104	return iterator(this, const_cast<pointer>(f.pos), const_cast<pointer>(f.end), false);
1105	}
1106
1107	// COMPARISON
1108	bool operator==(const dense_hashtable& ht) const {
1109	if (size() != ht.size()) {
1110	return false;
1111	} else if (this == &ht) {
1112	return true;
1113	} else {
1114	// Iterate through the elements in "this" and see if the
1115	// corresponding element is in ht
1116	for (const_iterator it = begin(); it != end(); ++it) {
1117	const_iterator it2 = ht.find(get_key(*it));
1118	if ((it2 == ht.end()) \|\| (it != it2)) {
1119	return false;
1120	}
1121	}
1122	return true;
1123	}
1124	}
1125	bool operator!=(const dense_hashtable& ht) const { return !(*this == ht); }
1126
1127	// I/O
1128	// We support reading and writing hashtables to disk. Alas, since
1129	// I don't know how to write a hasher or key_equal, you have to make
1130	// sure everything but the table is the same. We compact before writing.
1131	private:
1132	// Every time the disk format changes, this should probably change too
1133	typedef unsigned long MagicNumberType;
1134	static const MagicNumberType MAGIC_NUMBER = `0x13578642`;
1135
1136	public:
1137	// I/O -- this is an add-on for writing hash table to disk
1138	//
1139	// INPUT and OUTPUT must be either a FILE, or* a C++ stream*
1140	// (istream, ostream, etc) or* a class providing*
1141	// Read(void, size_t) and Write(const void, size_t)
1142	// (respectively), which writes a buffer into a stream
1143	// (which the INPUT/OUTPUT instance presumably owns).
1144
1145	typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
1146
1147	// ValueSerializer: a functor. operator()(OUTPUT, const value_type&)*
1148	template <typename ValueSerializer, typename OUTPUT>
1149	bool serialize(ValueSerializer serializer, OUTPUT* fp) {
1150	squash_deleted(); // so we don't have to worry about delkey
1151	if (!sparsehash_internal::write_bigendian_number(fp, MAGIC_NUMBER, `4`))
1152	return false;
1153	if (!sparsehash_internal::write_bigendian_number(fp, num_buckets, `8`))
1154	return false;
1155	if (!sparsehash_internal::write_bigendian_number(fp, num_elements, `8`))
1156	return false;
1157	// Now write a bitmap of non-empty buckets.
1158	for (size_type i = `0`; i < num_buckets; i += `8`) {
1159	unsigned char bits = `0`;
1160	for (int bit = `0`; bit < `8`; ++bit) {
1161	if (i + bit < num_buckets && !test_empty(i + bit)) bits \|= (`1` << bit);
1162	}
1163	if (!sparsehash_internal::write_data(fp, &bits, sizeof(bits)))
1164	return false;
1165	for (int bit = `0`; bit < `8`; ++bit) {
1166	if (bits & (`1` << bit)) {
1167	if (!serializer(fp, table[i + bit])) return false;
1168	}
1169	}
1170	}
1171	return true;
1172	}
1173
1174	// INPUT: anything we've written an overload of read_data() for.
1175	// ValueSerializer: a functor. operator()(INPUT, value_type)
1176	template <typename ValueSerializer, typename INPUT>
1177	bool unserialize(ValueSerializer serializer, INPUT* fp) {
1178	assert(settings.use_empty() && "empty_key not set for read");
1179
1180	clear(); // just to be consistent
1181	MagicNumberType magic_read;
1182	if (!sparsehash_internal::read_bigendian_number(fp, &magic_read, `4`))
1183	return false;
1184	if (magic_read != MAGIC_NUMBER) {
1185	return false;
1186	}
1187	size_type new_num_buckets;
1188	if (!sparsehash_internal::read_bigendian_number(fp, &new_num_buckets, `8`))
1189	return false;
1190	clear_to_size(new_num_buckets);
1191	if (!sparsehash_internal::read_bigendian_number(fp, &num_elements, `8`))
1192	return false;
1193
1194	// Read the bitmap of non-empty buckets.
1195	for (size_type i = `0`; i < num_buckets; i += `8`) {
1196	unsigned char bits;
1197	if (!sparsehash_internal::read_data(fp, &bits, sizeof(bits)))
1198	return false;
1199	for (int bit = `0`; bit < `8`; ++bit) {
1200	if (i + bit < num_buckets && (bits & (`1` << bit))) { // not empty
1201	if (!serializer(fp, &table[i + bit])) return false;
1202	}
1203	}
1204	}
1205	return true;
1206	}
1207
1208	private:
1209	template <class A>
1210	class alloc_impl : public A {
1211	public:
1212	typedef typename A::pointer pointer;
1213	typedef typename A::size_type size_type;
1214
1215	// Convert a normal allocator to one that has realloc_or_die()
1216	alloc_impl(const A& a) : A(a) {}
1217
1218	// realloc_or_die should only be used when using the default
1219	// allocator (libc_allocator_with_realloc).
1220	pointer realloc_or_die(pointer /ptr/, size_type /n/) {
1221	fprintf(stderr,
1222	"realloc_or_die is only supported for "
1223	"libc_allocator_with_realloc\n");
1224	exit(`1`);
1225	return NULL;
1226	}
1227	};
1228
1229	// A template specialization of alloc_impl for
1230	// libc_allocator_with_realloc that can handle realloc_or_die.
1231	template <class A>
1232	class alloc_impl<libc_allocator_with_realloc<A>>
1233	: public libc_allocator_with_realloc<A> {
1234	public:
1235	typedef typename libc_allocator_with_realloc<A>::pointer pointer;
1236	typedef typename libc_allocator_with_realloc<A>::size_type size_type;
1237
1238	alloc_impl(const libc_allocator_with_realloc<A>& a)
1239	: libc_allocator_with_realloc<A>(a) {}
1240
1241	pointer realloc_or_die(pointer ptr, size_type n) {
1242	pointer retval = this->reallocate(ptr, n);
1243	if (retval == NULL) {
1244	fprintf(stderr,
1245	"sparsehash: FATAL ERROR: failed to reallocate "
1246	"%lu elements for ptr %p",
1247	static_cast<unsigned long>(n), static_cast<void*>(ptr));
1248	exit(`1`);
1249	}
1250	return retval;
1251	}
1252	};
1253
1254	// Package allocator with emptyval to eliminate memory needed for
1255	// the zero-size allocator.
1256	// If new fields are added to this class, we should add them to
1257	// operator= and swap.
1258	class ValInfo : public alloc_impl<value_alloc_type> {
1259	public:
1260	typedef typename alloc_impl<value_alloc_type>::value_type value_type;
1261
1262	ValInfo(const alloc_impl<value_alloc_type>& a)
1263	: alloc_impl<value_alloc_type>(a) {}
1264	};
1265
1266	// Package functors with another class to eliminate memory needed for
1267	// zero-size functors. Since ExtractKey and hasher's operator() might
1268	// have the same function signature, they must be packaged in
1269	// different classes.
1270	struct Settings
1271	: sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
1272	HT_MIN_BUCKETS> {
1273	explicit Settings(const hasher& hf)
1274	: sparsehash_internal::sh_hashtable_settings<key_type, hasher,
1275	size_type, HT_MIN_BUCKETS>(
1276	hf, HT_OCCUPANCY_PCT / `100.0f`, HT_EMPTY_PCT / `100.0f`) {}
1277	};
1278
1279	// Packages ExtractKey and SetKey functors.
1280	class KeyInfo : public ExtractKey, public SetKey, public EqualKey {
1281	public:
1282	KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
1283	: ExtractKey(ek), SetKey(sk), EqualKey(eq) {}
1284
1285	// We want to return the exact same type as ExtractKey: Key or const Key&
1286	template <typename V>
1287	typename ExtractKey::result_type get_key(V&& v) const {
1288	return ExtractKey::operator()(std::forward<V>(v));
1289	}
1290	void set_key(pointer v, const key_type& k) const {
1291	SetKey::operator()(v, k);
1292	}
1293	void construct_key(pointer v, const key_type& k) const {
1294	SetKey::operator()(v, k, true);
1295	}
1296	template <typename K1, typename K2>
1297	bool equals(const K1& a, const K2& b) const {
1298	return EqualKey::operator()(a, b);
1299	}
1300
1301	// Which key marks deleted entries.
1302	// TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!)
1303	typename std::remove_const<key_type>::type delkey;
1304	typename std::remove_const<key_type>::type empty_key;
1305	};
1306
1307	// Utility functions to access the templated operators
1308	template <typename K>
1309	size_type hash(const K& v) const { return settings.hash(v); }
1310	template <typename K1, typename K2>
1311	bool equals(const K1& a, const K2& b) const {
1312	return key_info.equals(a, b);
1313	}
1314	template <typename V>
1315	typename ExtractKey::result_type get_key(V&& v) const {
1316	return key_info.get_key(std::forward<V>(v));
1317	}
1318	void set_key(pointer v, const key_type& k) const { key_info.set_key(v, k); }
1319	void construct_key(pointer v, const key_type& k) const { key_info.construct_key(v, k); }
1320
1321	private:
1322	// Actual data
1323	Settings settings;
1324	KeyInfo key_info;
1325
1326	size_type num_deleted; // how many occupied buckets are marked deleted
1327	size_type num_elements;
1328	size_type num_buckets;
1329	ValInfo val_info; // holds emptyval, and also the allocator
1330	pointer table;
1331	};
1332
1333	// We need a global swap as well
1334	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1335	inline void swap(dense_hashtable<V, K, HF, ExK, SetK, EqK, A>& x,
1336	dense_hashtable<V, K, HF, ExK, SetK, EqK, A>& y) {
1337	x.swap(y);
1338	}
1339
1340	#undef JUMP_
1341
1342	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1343	const typename dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::size_type
1344	dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::ILLEGAL_BUCKET;
1345
1346	// How full we let the table get before we resize. Knuth says .8 is
1347	// good -- higher causes us to probe too much, though saves memory.
1348	// However, we go with .5, getting better performance at the cost of
1349	// more space (a trade-off densehashtable explicitly chooses to make).
1350	// Feel free to play around with different values, though, via
1351	// max_load_factor() and/or set_resizing_parameters().
1352	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1353	const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = `50`;
1354
1355	// How empty we let the table get before we resize lower.
1356	// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing.
1357	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1358	const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT =
1359	static_cast<int>(
1360	`0.4` * dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT);
1361
1362	} // namespace google
1363

Browse the source code of ClickHouse/contrib/sparsehash-c11/sparsehash/internal/densehashtable.h