sparsehashtable.h source code [ClickHouse/contrib/sparsehash-c11/sparsehash/internal/sparsehashtable.h]

1	// Copyright (c) 2005, Google Inc.
2	// All rights reserved.
3	//
4	// Redistribution and use in source and binary forms, with or without
5	// modification, are permitted provided that the following conditions are
6	// met:
7	//
8	// Redistributions of source code must retain the above copyright*
9	// notice, this list of conditions and the following disclaimer.
10	// Redistributions in binary form must reproduce the above*
11	// copyright notice, this list of conditions and the following disclaimer
12	// in the documentation and/or other materials provided with the
13	// distribution.
14	// Neither the name of Google Inc. nor the names of its*
15	// contributors may be used to endorse or promote products derived from
16	// this software without specific prior written permission.
17	//
18	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30	// ---
31	//
32	// A sparse hashtable is a particular implementation of
33	// a hashtable: one that is meant to minimize memory use.
34	// It does this by using a sparse table* (cf sparsetable.h),*
35	// which uses between 1 and 2 bits to store empty buckets
36	// (we may need another bit for hashtables that support deletion).
37	//
38	// When empty buckets are so cheap, an appealing hashtable
39	// implementation is internal probing, in which the hashtable
40	// is a single table, and collisions are resolved by trying
41	// to insert again in another bucket. The most cache-efficient
42	// internal probing schemes are linear probing (which suffers,
43	// alas, from clumping) and quadratic probing, which is what
44	// we implement by default.
45	//
46	// Deleted buckets are a bit of a pain. We have to somehow mark
47	// deleted buckets (the probing must distinguish them from empty
48	// buckets). The most principled way is to have another bitmap,
49	// but that's annoying and takes up space. Instead we let the
50	// user specify an "impossible" key. We set deleted buckets
51	// to have the impossible key.
52	//
53	// Note it is possible to change the value of the delete key
54	// on the fly; you can even remove it, though after that point
55	// the hashtable is insert_only until you set it again.
56	//
57	// You probably shouldn't use this code directly. Use
58	// sparse_hash_map<> or sparse_hash_set<> instead.
59	//
60	// You can modify the following, below:
61	// HT_OCCUPANCY_PCT -- how full before we double size
62	// HT_EMPTY_PCT -- how empty before we halve size
63	// HT_MIN_BUCKETS -- smallest bucket size
64	// HT_DEFAULT_STARTING_BUCKETS -- default bucket size at construct-time
65	//
66	// You can also change enlarge_factor (which defaults to
67	// HT_OCCUPANCY_PCT), and shrink_factor (which defaults to
68	// HT_EMPTY_PCT) with set_resizing_parameters().
69	//
70	// How to decide what values to use?
71	// shrink_factor's default of .4 OCCUPANCY_PCT, is probably good.*
72	// HT_MIN_BUCKETS is probably unnecessary since you can specify
73	// (indirectly) the starting number of buckets at construct-time.
74	// For enlarge_factor, you can use this chart to try to trade-off
75	// expected lookup time to the space taken up. By default, this
76	// code uses quadratic probing, though you can change it to linear
77	// via _JUMP below if you really want to.
78	//
79	// From
80	// http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
81	// NUMBER OF PROBES / LOOKUP Successful Unsuccessful
82	// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
83	// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
84	//
85	// -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
86	// QUADRATIC COLLISION RES.
87	// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
88	// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
89	// LINEAR COLLISION RES.
90	// probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5
91	// probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0
92	//
93	// The value type is required to be copy constructible and default
94	// constructible, but it need not be (and commonly isn't) assignable.
95
96	#pragma once
97
98	#include <assert.h>
99	#include <algorithm> // For swap(), eg
100	#include <iterator> // for iterator tags
101	#include <limits> // for numeric_limits
102	#include <utility> // for pair
103	#include <type_traits> // for remove_const
104	#include <sparsehash/internal/hashtable-common.h>
105	#include <sparsehash/sparsetable> // IWYU pragma: export
106	#include <stdexcept> // For length_error
107
108	namespace google {
109
110	#ifndef SPARSEHASH_STAT_UPDATE
111	#define SPARSEHASH_STAT_UPDATE(x) ((void)0)
112	#endif
113
114	// The probing method
115	// Linear probing
116	// #define JUMP_(key, num_probes) ( 1 )
117	// Quadratic probing
118	#define JUMP_(key, num_probes) (num_probes)
119
120	// The smaller this is, the faster lookup is (because the group bitmap is
121	// smaller) and the faster insert is, because there's less to move.
122	// On the other hand, there are more groups. Since group::size_type is
123	// a short, this number should be of the form 32x + 16 to avoid waste.*
124	static const uint16_t DEFAULT_GROUP_SIZE = `48`; // fits in 1.5 words
125
126	// Hashtable class, used to implement the hashed associative containers
127	// hash_set and hash_map.
128	//
129	// Value: what is stored in the table (each bucket is a Value).
130	// Key: something in a 1-to-1 correspondence to a Value, that can be used
131	// to search for a Value in the table (find() takes a Key).
132	// HashFcn: Takes a Key and returns an integer, the more unique the better.
133	// ExtractKey: given a Value, returns the unique Key associated with it.
134	// Must inherit from unary_function, or at least have a
135	// result_type enum indicating the return type of operator().
136	// SetKey: given a Value and a Key, modifies the value such that*
137	// ExtractKey(value) == key. We guarantee this is only called
138	// with key == deleted_key.
139	// EqualKey: Given two Keys, says whether they are the same (that is,
140	// if they are both associated with the same Value).
141	// Alloc: STL allocator to use to allocate memory.
142
143	template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey,
144	class EqualKey, class Alloc>
145	class sparse_hashtable;
146
147	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
148	struct sparse_hashtable_iterator;
149
150	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
151	struct sparse_hashtable_const_iterator;
152
153	// As far as iterating, we're basically just a sparsetable
154	// that skips over deleted elements.
155	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
156	struct sparse_hashtable_iterator {
157	private:
158	using value_alloc_type =
159	typename std::allocator_traits<A>::template rebind_alloc<V>;
160
161	public:
162	typedef sparse_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
163	typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
164	const_iterator;
165	typedef typename sparsetable<V, DEFAULT_GROUP_SIZE,
166	value_alloc_type>::nonempty_iterator st_iterator;
167
168	typedef std::forward_iterator_tag iterator_category; // very little defined!
169	typedef V value_type;
170	typedef typename value_alloc_type::difference_type difference_type;
171	typedef typename value_alloc_type::size_type size_type;
172	typedef typename value_alloc_type::reference reference;
173	typedef typename value_alloc_type::pointer pointer;
174
175	// "Real" constructor and default constructor
176	sparse_hashtable_iterator(
177	const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it,
178	st_iterator it_end)
179	: ht(h), pos(it), end(it_end) {
180	advance_past_deleted();
181	}
182	sparse_hashtable_iterator() {} // not ever used internally
183	// The default destructor is fine; we don't define one
184	// The default operator= is fine; we don't define one
185
186	// Happy dereferencer
187	reference operator() const* { return *pos; }
188	pointer operator->() const { return &(operator*()); }
189
190	// Arithmetic. The only hard part is making sure that
191	// we're not on a marked-deleted array element
192	void advance_past_deleted() {
193	while (pos != end && ht->test_deleted(*this)) ++pos;
194	}
195	iterator& operator++() {
196	assert(pos != end);
197	++pos;
198	advance_past_deleted();
199	return *this;
200	}
201	iterator operator++(int) {
202	iterator tmp(*this);
203	++*this;
204	return tmp;
205	}
206
207	// Comparison.
208	bool operator==(const iterator& it) const { return pos == it.pos; }
209	bool operator!=(const iterator& it) const { return pos != it.pos; }
210
211	// The actual data
212	const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht;
213	st_iterator pos, end;
214	};
215
216	// Now do it all again, but with const-ness!
217	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
218	struct sparse_hashtable_const_iterator {
219	private:
220	using value_alloc_type =
221	typename std::allocator_traits<A>::template rebind_alloc<V>;
222
223	public:
224	typedef sparse_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
225	typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
226	const_iterator;
227	typedef typename sparsetable<V, DEFAULT_GROUP_SIZE,
228	value_alloc_type>::const_nonempty_iterator
229	st_iterator;
230
231	typedef std::forward_iterator_tag iterator_category; // very little defined!
232	typedef V value_type;
233	typedef typename value_alloc_type::difference_type difference_type;
234	typedef typename value_alloc_type::size_type size_type;
235	typedef typename value_alloc_type::const_reference reference;
236	typedef typename value_alloc_type::const_pointer pointer;
237
238	// "Real" constructor and default constructor
239	sparse_hashtable_const_iterator(
240	const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it,
241	st_iterator it_end)
242	: ht(h), pos(it), end(it_end) {
243	advance_past_deleted();
244	}
245	// This lets us convert regular iterators to const iterators
246	sparse_hashtable_const_iterator() {} // never used internally
247	sparse_hashtable_const_iterator(const iterator& it)
248	: ht(it.ht), pos(it.pos), end(it.end) {}
249	// The default destructor is fine; we don't define one
250	// The default operator= is fine; we don't define one
251
252	// Happy dereferencer
253	reference operator() const* { return *pos; }
254	pointer operator->() const { return &(operator*()); }
255
256	// Arithmetic. The only hard part is making sure that
257	// we're not on a marked-deleted array element
258	void advance_past_deleted() {
259	while (pos != end && ht->test_deleted(*this)) ++pos;
260	}
261	const_iterator& operator++() {
262	assert(pos != end);
263	++pos;
264	advance_past_deleted();
265	return *this;
266	}
267	const_iterator operator++(int) {
268	const_iterator tmp(*this);
269	++*this;
270	return tmp;
271	}
272
273	// Comparison.
274	bool operator==(const const_iterator& it) const { return pos == it.pos; }
275	bool operator!=(const const_iterator& it) const { return pos != it.pos; }
276
277	// The actual data
278	const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht;
279	st_iterator pos, end;
280	};
281
282	// And once again, but this time freeing up memory as we iterate
283	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
284	struct sparse_hashtable_destructive_iterator {
285	private:
286	using value_alloc_type =
287	typename std::allocator_traits<A>::template rebind_alloc<V>;
288
289	public:
290	typedef sparse_hashtable_destructive_iterator<V, K, HF, ExK, SetK, EqK, A>
291	iterator;
292	typedef
293	typename sparsetable<V, DEFAULT_GROUP_SIZE,
294	value_alloc_type>::destructive_iterator st_iterator;
295
296	typedef std::forward_iterator_tag iterator_category; // very little defined!
297	typedef V value_type;
298	typedef typename value_alloc_type::difference_type difference_type;
299	typedef typename value_alloc_type::size_type size_type;
300	typedef typename value_alloc_type::reference reference;
301	typedef typename value_alloc_type::pointer pointer;
302
303	// "Real" constructor and default constructor
304	sparse_hashtable_destructive_iterator(
305	const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it,
306	st_iterator it_end)
307	: ht(h), pos(it), end(it_end) {
308	advance_past_deleted();
309	}
310	sparse_hashtable_destructive_iterator() {} // never used internally
311	// The default destructor is fine; we don't define one
312	// The default operator= is fine; we don't define one
313
314	// Happy dereferencer
315	reference operator() const* { return *pos; }
316	pointer operator->() const { return &(operator*()); }
317
318	// Arithmetic. The only hard part is making sure that
319	// we're not on a marked-deleted array element
320	void advance_past_deleted() {
321	while (pos != end && ht->test_deleted(*this)) ++pos;
322	}
323	iterator& operator++() {
324	assert(pos != end);
325	++pos;
326	advance_past_deleted();
327	return *this;
328	}
329	iterator operator++(int) {
330	iterator tmp(*this);
331	++*this;
332	return tmp;
333	}
334
335	// Comparison.
336	bool operator==(const iterator& it) const { return pos == it.pos; }
337	bool operator!=(const iterator& it) const { return pos != it.pos; }
338
339	// The actual data
340	const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* ht;
341	st_iterator pos, end;
342	};
343
344	template <class Value, class Key, class HashFcn, class ExtractKey, class SetKey,
345	class EqualKey, class Alloc>
346	class sparse_hashtable {
347	private:
348	using value_alloc_type =
349	typename std::allocator_traits<Alloc>::template rebind_alloc<Value>;
350
351	public:
352	typedef Key key_type;
353	typedef Value value_type;
354	typedef HashFcn hasher;
355	typedef EqualKey key_equal;
356	typedef Alloc allocator_type;
357
358	typedef typename value_alloc_type::size_type size_type;
359	typedef typename value_alloc_type::difference_type difference_type;
360	typedef typename value_alloc_type::reference reference;
361	typedef typename value_alloc_type::const_reference const_reference;
362	typedef typename value_alloc_type::pointer pointer;
363	typedef typename value_alloc_type::const_pointer const_pointer;
364	typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey,
365	EqualKey, Alloc> iterator;
366
367	typedef sparse_hashtable_const_iterator<
368	Value, Key, HashFcn, ExtractKey, SetKey, EqualKey, Alloc> const_iterator;
369
370	typedef sparse_hashtable_destructive_iterator<Value, Key, HashFcn, ExtractKey,
371	SetKey, EqualKey,
372	Alloc> destructive_iterator;
373
374	// These come from tr1. For us they're the same as regular iterators.
375	typedef iterator local_iterator;
376	typedef const_iterator const_local_iterator;
377
378	// How full we let the table get before we resize, by default.
379	// Knuth says .8 is good -- higher causes us to probe too much,
380	// though it saves memory.
381	static const int HT_OCCUPANCY_PCT; // = 80 (out of 100);
382
383	// How empty we let the table get before we resize lower, by default.
384	// (0.0 means never resize lower.)
385	// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
386	static const int HT_EMPTY_PCT; // = 0.4 HT_OCCUPANCY_PCT;*
387
388	// Minimum size we're willing to let hashtables be.
389	// Must be a power of two, and at least 4.
390	// Note, however, that for a given hashtable, the initial size is a
391	// function of the first constructor arg, and may be >HT_MIN_BUCKETS.
392	static const size_type HT_MIN_BUCKETS = `4`;
393
394	// By default, if you don't specify a hashtable size at
395	// construction-time, we use this size. Must be a power of two, and
396	// at least HT_MIN_BUCKETS.
397	static const size_type HT_DEFAULT_STARTING_BUCKETS = `32`;
398
399	// ITERATOR FUNCTIONS
400	iterator begin() {
401	return iterator(this, table.nonempty_begin(), table.nonempty_end());
402	}
403	iterator end() {
404	return iterator(this, table.nonempty_end(), table.nonempty_end());
405	}
406	const_iterator begin() const {
407	return const_iterator(this, table.nonempty_begin(), table.nonempty_end());
408	}
409	const_iterator end() const {
410	return const_iterator(this, table.nonempty_end(), table.nonempty_end());
411	}
412
413	// These come from tr1 unordered_map. They iterate over 'bucket' n.
414	// For sparsehashtable, we could consider each 'group' to be a bucket,
415	// I guess, but I don't really see the point. We'll just consider
416	// bucket n to be the n-th element of the sparsetable, if it's occupied,
417	// or some empty element, otherwise.
418	local_iterator begin(size_type i) {
419	if (table.test(i))
420	return local_iterator(this, table.get_iter(i), table.nonempty_end());
421	else
422	return local_iterator(this, table.nonempty_end(), table.nonempty_end());
423	}
424	local_iterator end(size_type i) {
425	local_iterator it = begin(i);
426	if (table.test(i) && !test_deleted(i)) ++it;
427	return it;
428	}
429	const_local_iterator begin(size_type i) const {
430	if (table.test(i))
431	return const_local_iterator(this, table.get_iter(i),
432	table.nonempty_end());
433	else
434	return const_local_iterator(this, table.nonempty_end(),
435	table.nonempty_end());
436	}
437	const_local_iterator end(size_type i) const {
438	const_local_iterator it = begin(i);
439	if (table.test(i) && !test_deleted(i)) ++it;
440	return it;
441	}
442
443	// This is used when resizing
444	destructive_iterator destructive_begin() {
445	return destructive_iterator(this, table.destructive_begin(),
446	table.destructive_end());
447	}
448	destructive_iterator destructive_end() {
449	return destructive_iterator(this, table.destructive_end(),
450	table.destructive_end());
451	}
452
453	// ACCESSOR FUNCTIONS for the things we templatize on, basically
454	hasher hash_funct() const { return settings; }
455	key_equal key_eq() const { return key_info; }
456	allocator_type get_allocator() const { return table.get_allocator(); }
457
458	// Accessor function for statistics gathering.
459	int num_table_copies() const { return settings.num_ht_copies(); }
460
461	private:
462	// We need to copy values when we set the special marker for deleted
463	// elements, but, annoyingly, we can't just use the copy assignment
464	// operator because value_type might not be assignable (it's often
465	// pair<const X, Y>). We use explicit destructor invocation and
466	// placement new to get around this. Arg.
467	void set_value(pointer dst, const_reference src) {
468	dst->~value_type(); // delete the old value, if any
469	new (dst) value_type(src);
470	}
471
472	// This is used as a tag for the copy constructor, saying to destroy its
473	// arg We have two ways of destructively copying: with potentially growing
474	// the hashtable as we copy, and without. To make sure the outside world
475	// can't do a destructive copy, we make the typename private.
476	enum MoveDontCopyT { MoveDontCopy, MoveDontGrow };
477
478	// DELETE HELPER FUNCTIONS
479	// This lets the user describe a key that will indicate deleted
480	// table entries. This key should be an "impossible" entry --
481	// if you try to insert it for real, you won't be able to retrieve it!
482	// (NB: while you pass in an entire value, only the key part is looked
483	// at. This is just because I don't know how to assign just a key.)
484	private:
485	void squash_deleted() { // gets rid of any deleted entries we have
486	if (num_deleted) { // get rid of deleted before writing
487	sparse_hashtable tmp(MoveDontGrow, *this);
488	swap(tmp); // now we are tmp
489	}
490	assert(num_deleted == `0`);
491	}
492
493	// Test if the given key is the deleted indicator. Requires
494	// num_deleted > 0, for correctness of read(), and because that
495	// guarantees that key_info.delkey is valid.
496	bool test_deleted_key(const key_type& key) const {
497	assert(num_deleted > `0`);
498	return equals(key_info.delkey, key);
499	}
500
501	public:
502	void set_deleted_key(const key_type& key) {
503	// It's only safe to change what "deleted" means if we purge deleted
504	// guys
505	squash_deleted();
506	settings.set_use_deleted(true);
507	key_info.delkey = key;
508	}
509	void clear_deleted_key() {
510	squash_deleted();
511	settings.set_use_deleted(false);
512	}
513	key_type deleted_key() const {
514	assert(settings.use_deleted() &&
515	"Must set deleted key before calling deleted_key");
516	return key_info.delkey;
517	}
518
519	// These are public so the iterators can use them
520	// True if the item at position bucknum is "deleted" marker
521	bool test_deleted(size_type bucknum) const {
522	// Invariant: !use_deleted() implies num_deleted is 0.
523	assert(settings.use_deleted() \|\| num_deleted == `0`);
524	return num_deleted > `0` && table.test(bucknum) &&
525	test_deleted_key(get_key(table.unsafe_get(bucknum)));
526	}
527	bool test_deleted(const iterator& it) const {
528	// Invariant: !use_deleted() implies num_deleted is 0.
529	assert(settings.use_deleted() \|\| num_deleted == `0`);
530	return num_deleted > `0` && test_deleted_key(get_key(*it));
531	}
532	bool test_deleted(const const_iterator& it) const {
533	// Invariant: !use_deleted() implies num_deleted is 0.
534	assert(settings.use_deleted() \|\| num_deleted == `0`);
535	return num_deleted > `0` && test_deleted_key(get_key(*it));
536	}
537	bool test_deleted(const destructive_iterator& it) const {
538	// Invariant: !use_deleted() implies num_deleted is 0.
539	assert(settings.use_deleted() \|\| num_deleted == `0`);
540	return num_deleted > `0` && test_deleted_key(get_key(*it));
541	}
542
543	private:
544	void check_use_deleted(const char* caller) {
545	(void)caller; // could log it if the assert failed
546	assert(settings.use_deleted());
547	}
548
549	// Set it so test_deleted is true. true if object didn't used to be
550	// deleted.
551	// TODO(csilvers): make these private (also in densehashtable.h)
552	bool set_deleted(iterator& it) {
553	check_use_deleted("set_deleted()");
554	bool retval = !test_deleted(it);
555	// & converts from iterator to value-type.*
556	set_key(&(*it), key_info.delkey);
557	return retval;
558	}
559	// Set it so test_deleted is false. true if object used to be deleted.
560	bool clear_deleted(iterator& it) {
561	check_use_deleted("clear_deleted()");
562	// Happens automatically when we assign something else in its place.
563	return test_deleted(it);
564	}
565
566	// We also allow to set/clear the deleted bit on a const iterator.
567	// We allow a const_iterator for the same reason you can delete a
568	// const pointer: it's convenient, and semantically you can't use
569	// 'it' after it's been deleted anyway, so its const-ness doesn't
570	// really matter.
571	bool set_deleted(const_iterator& it) {
572	check_use_deleted("set_deleted()");
573	bool retval = !test_deleted(it);
574	set_key(const_cast<pointer>(&(*it)), key_info.delkey);
575	return retval;
576	}
577	// Set it so test_deleted is false. true if object used to be deleted.
578	bool clear_deleted(const_iterator& it) {
579	check_use_deleted("clear_deleted()");
580	return test_deleted(it);
581	}
582
583	// FUNCTIONS CONCERNING SIZE
584	public:
585	size_type size() const { return table.num_nonempty() - num_deleted; }
586	size_type max_size() const { return table.max_size(); }
587	bool empty() const { return size() == `0`; }
588	size_type bucket_count() const { return table.size(); }
589	size_type max_bucket_count() const { return max_size(); }
590	// These are tr1 methods. Their idea of 'bucket' doesn't map well to
591	// what we do. We just say every bucket has 0 or 1 items in it.
592	size_type bucket_size(size_type i) const {
593	return begin(i) == end(i) ? `0` : `1`;
594	}
595
596	private:
597	// Because of the above, size_type(-1) is never legal; use it for errors
598	static const size_type ILLEGAL_BUCKET = size_type(-`1`);
599
600	// Used after a string of deletes. Returns true if we actually shrunk.
601	// TODO(csilvers): take a delta so we can take into account inserts
602	// done after shrinking. Maybe make part of the Settings class?
603	bool maybe_shrink() {
604	assert(table.num_nonempty() >= num_deleted);
605	assert((bucket_count() & (bucket_count() - `1`)) == `0`); // is a power of two
606	assert(bucket_count() >= HT_MIN_BUCKETS);
607	bool retval = false;
608
609	// If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
610	// we'll never shrink until you get relatively big, and we'll never
611	// shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
612	// like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
613	// shrink us down to HT_MIN_BUCKETS buckets, which is too small.
614	const size_type num_remain = table.num_nonempty() - num_deleted;
615	const size_type shrink_threshold = settings.shrink_threshold();
616	if (shrink_threshold > `0` && num_remain < shrink_threshold &&
617	bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
618	const float shrink_factor = settings.shrink_factor();
619	size_type sz = bucket_count() / `2`; // find how much we should shrink
620	while (sz > HT_DEFAULT_STARTING_BUCKETS &&
621	num_remain < static_cast<size_type>(sz * shrink_factor)) {
622	sz /= `2`; // stay a power of 2
623	}
624	sparse_hashtable tmp(MoveDontCopy, *this, sz);
625	swap(tmp); // now we are tmp
626	retval = true;
627	}
628	settings.set_consider_shrink(false); // because we just considered it
629	return retval;
630	}
631
632	// We'll let you resize a hashtable -- though this makes us copy all!
633	// When you resize, you say, "make it big enough for this many more
634	// elements"
635	// Returns true if we actually resized, false if size was already ok.
636	bool resize_delta(size_type delta) {
637	bool did_resize = false;
638	if (settings.consider_shrink()) { // see if lots of deletes happened
639	if (maybe_shrink()) did_resize = true;
640	}
641	if (table.num_nonempty() >=
642	(std::numeric_limits<size_type>::max)() - delta) {
643	throw std::length_error ("resize overflow");
644	}
645	if (bucket_count() >= HT_MIN_BUCKETS &&
646	(table.num_nonempty() + delta) <= settings.enlarge_threshold())
647	return did_resize; // we're ok as we are
648
649	// Sometimes, we need to resize just to get rid of all the
650	// "deleted" buckets that are clogging up the hashtable. So when
651	// deciding whether to resize, count the deleted buckets (which
652	// are currently taking up room). But later, when we decide what
653	// size to resize to, don't* count deleted buckets, since they*
654	// get discarded during the resize.
655	const size_type needed_size =
656	settings.min_buckets(table.num_nonempty() + delta, `0`);
657	if (needed_size <= bucket_count()) // we have enough buckets
658	return did_resize;
659
660	size_type resize_to = settings.min_buckets(
661	table.num_nonempty() - num_deleted + delta, bucket_count());
662	if (resize_to < needed_size && // may double resize_to
663	resize_to < (std::numeric_limits<size_type>::max)() / `2`) {
664	// This situation means that we have enough deleted elements,
665	// that once we purge them, we won't actually have needed to
666	// grow. But we may want to grow anyway: if we just purge one
667	// element, say, we'll have to grow anyway next time we
668	// insert. Might as well grow now, since we're already going
669	// through the trouble of copying (in order to purge the
670	// deleted elements).
671	const size_type target =
672	static_cast<size_type>(settings.shrink_size(resize_to * `2`));
673	if (table.num_nonempty() - num_deleted + delta >= target) {
674	// Good, we won't be below the shrink threshhold even if we
675	// double.
676	resize_to *= `2`;
677	}
678	}
679
680	sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
681	swap(tmp); // now we are tmp
682	return true;
683	}
684
685	// Used to actually do the rehashing when we grow/shrink a hashtable
686	void copy_from(const sparse_hashtable& ht, size_type min_buckets_wanted) {
687	clear(); // clear table, set num_deleted to 0
688
689	// If we need to change the size of our table, do it now
690	const size_type resize_to =
691	settings.min_buckets(ht.size(), min_buckets_wanted);
692	if (resize_to > bucket_count()) { // we don't have enough buckets
693	table.resize(resize_to); // sets the number of buckets
694	settings.reset_thresholds(bucket_count());
695	}
696
697	// We use a normal iterator to get non-deleted bcks from ht
698	// We could use insert() here, but since we know there are
699	// no duplicates and no deleted items, we can be more efficient
700	assert((bucket_count() & (bucket_count() - `1`)) == `0`); // a power of two
701	for (const_iterator it = ht.begin(); it != ht.end(); ++it) {
702	size_type num_probes = `0`; // how many times we've probed
703	size_type bucknum;
704	const size_type bucket_count_minus_one = bucket_count() - `1`;
705	for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
706	table.test(bucknum); // not empty
707	bucknum =
708	(bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
709	++num_probes;
710	assert(num_probes < bucket_count() &&
711	"Hashtable is full: an error in key_equal<> or hash<>");
712	}
713	table.set(bucknum, it); // copies the value to here*
714	}
715	settings.inc_num_ht_copies();
716	}
717
718	// Implementation is like copy_from, but it destroys the table of the
719	// "from" guy by freeing sparsetable memory as we iterate. This is
720	// useful in resizing, since we're throwing away the "from" guy anyway.
721	void move_from(MoveDontCopyT mover, sparse_hashtable& ht,
722	size_type min_buckets_wanted) {
723	clear(); // clear table, set num_deleted to 0
724
725	// If we need to change the size of our table, do it now
726	size_type resize_to;
727	if (mover == MoveDontGrow)
728	resize_to = ht.bucket_count(); // keep same size as old ht
729	else // MoveDontCopy
730	resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
731	if (resize_to > bucket_count()) { // we don't have enough buckets
732	table.resize(resize_to); // sets the number of buckets
733	settings.reset_thresholds(bucket_count());
734	}
735
736	// We use a normal iterator to get non-deleted bcks from ht
737	// We could use insert() here, but since we know there are
738	// no duplicates and no deleted items, we can be more efficient
739	assert((bucket_count() & (bucket_count() - `1`)) == `0`); // a power of two
740	// THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():
741	for (destructive_iterator it = ht.destructive_begin();
742	it != ht.destructive_end(); ++it) {
743	size_type num_probes = `0`; // how many times we've probed
744	size_type bucknum;
745	for (bucknum = hash(get_key(it)) & (bucket_count() - `1`); // h % buck_cnt*
746	table.test(bucknum); // not empty
747	bucknum =
748	(bucknum + JUMP_(key, num_probes)) & (bucket_count() - `1`)) {
749	++num_probes;
750	assert(num_probes < bucket_count() &&
751	"Hashtable is full: an error in key_equal<> or hash<>");
752	}
753	table.set(bucknum, it); // copies the value to here*
754	}
755	settings.inc_num_ht_copies();
756	}
757
758	// Required by the spec for hashed associative container
759	public:
760	// Though the docs say this should be num_buckets, I think it's much
761	// more useful as num_elements. As a special feature, calling with
762	// req_elements==0 will cause us to shrink if we can, saving space.
763	void resize(size_type req_elements) { // resize to this or larger
764	if (settings.consider_shrink() \|\| req_elements == `0`) maybe_shrink();
765	if (req_elements > table.num_nonempty()) // we only grow
766	resize_delta(req_elements - table.num_nonempty());
767	}
768
769	// Get and change the value of shrink_factor and enlarge_factor. The
770	// description at the beginning of this file explains how to choose
771	// the values. Setting the shrink parameter to 0.0 ensures that the
772	// table never shrinks.
773	void get_resizing_parameters(float* shrink, float* grow) const {
774	*shrink = settings.shrink_factor();
775	*grow = settings.enlarge_factor();
776	}
777	void set_resizing_parameters(float shrink, float grow) {
778	settings.set_resizing_parameters(shrink, grow);
779	settings.reset_thresholds(bucket_count());
780	}
781
782	// CONSTRUCTORS -- as required by the specs, we take a size,
783	// but also let you specify a hashfunction, key comparator,
784	// and key extractor. We also define a copy constructor and =.
785	// DESTRUCTOR -- the default is fine, surprisingly.
786	explicit sparse_hashtable(size_type expected_max_items_in_table = `0`,
787	const HashFcn& hf = HashFcn(),
788	const EqualKey& eql = EqualKey(),
789	const ExtractKey& ext = ExtractKey(),
790	const SetKey& set = SetKey(),
791	const Alloc& alloc = Alloc())
792	: settings(hf),
793	key_info(ext, set, eql),
794	num_deleted(`0`),
795	table((expected_max_items_in_table == `0`
796	? HT_DEFAULT_STARTING_BUCKETS
797	: settings.min_buckets(expected_max_items_in_table, `0`)),
798	alloc) {
799	settings.reset_thresholds(bucket_count());
800	}
801
802	// As a convenience for resize(), we allow an optional second argument
803	// which lets you make this new hashtable a different size than ht.
804	// We also provide a mechanism of saying you want to "move" the ht argument
805	// into us instead of copying.
806	sparse_hashtable(const sparse_hashtable& ht,
807	size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
808	: settings(ht.settings),
809	key_info(ht.key_info),
810	num_deleted(`0`),
811	table(`0`, ht.get_allocator()) {
812	settings.reset_thresholds(bucket_count());
813	copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries
814	}
815	sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht,
816	size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
817	: settings(ht.settings),
818	key_info(ht.key_info),
819	num_deleted(`0`),
820	table(`0`, ht.get_allocator()) {
821	settings.reset_thresholds(bucket_count());
822	move_from(mover, ht, min_buckets_wanted); // ignores deleted entries
823	}
824
825	sparse_hashtable& operator=(const sparse_hashtable& ht) {
826	if (&ht == this) return *this; // don't copy onto ourselves
827	settings = ht.settings;
828	key_info = ht.key_info;
829	num_deleted = ht.num_deleted;
830	// copy_from() calls clear and sets num_deleted to 0 too
831	copy_from(ht, HT_MIN_BUCKETS);
832	// we purposefully don't copy the allocator, which may not be copyable
833	return *this;
834	}
835
836	// Many STL algorithms use swap instead of copy constructors
837	void swap(sparse_hashtable& ht) {
838	std::swap(settings, ht.settings);
839	std::swap(key_info, ht.key_info);
840	std::swap(num_deleted, ht.num_deleted);
841	table.swap(ht.table);
842	settings.reset_thresholds(bucket_count()); // also resets consider_shrink
843	ht.settings.reset_thresholds(ht.bucket_count());
844	// we purposefully don't swap the allocator, which may not be swap-able
845	}
846
847	// It's always nice to be able to clear a table without deallocating it
848	void clear() {
849	if (!empty() \|\| (num_deleted != `0`)) {
850	table.clear();
851	}
852	settings.reset_thresholds(bucket_count());
853	num_deleted = `0`;
854	}
855
856	// LOOKUP ROUTINES
857	private:
858	// Returns a pair of positions: 1st where the object is, 2nd where
859	// it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET
860	// if object is not found; 2nd is ILLEGAL_BUCKET if it is.
861	// Note: because of deletions where-to-insert is not trivial: it's the
862	// first deleted bucket we see, as long as we don't find the key later
863	template <typename K>
864	std::pair<size_type, size_type> find_position(const K& key) const {
865	size_type num_probes = `0`; // how many times we've probed
866	const size_type bucket_count_minus_one = bucket_count() - `1`;
867	size_type bucknum = hash(key) & bucket_count_minus_one;
868	size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
869	SPARSEHASH_STAT_UPDATE(total_lookups += `1`);
870	while (`1`) { // probe until something happens
871	if (!table.test(bucknum)) { // bucket is empty
872	SPARSEHASH_STAT_UPDATE(total_probes += num_probes);
873	if (insert_pos == ILLEGAL_BUCKET) // found no prior place to insert
874	return std::pair<size_type, size_type>(ILLEGAL_BUCKET, bucknum);
875	else
876	return std::pair<size_type, size_type>(ILLEGAL_BUCKET, insert_pos);
877	} else if (test_deleted(bucknum)) { // keep searching, but mark to insert
878	if (insert_pos == ILLEGAL_BUCKET) insert_pos = bucknum;
879	} else if (equals(key, get_key(table.unsafe_get(bucknum)))) {
880	SPARSEHASH_STAT_UPDATE(total_probes += num_probes);
881	return std::pair<size_type, size_type>(bucknum, ILLEGAL_BUCKET);
882	}
883	++num_probes; // we're doing another probe
884	bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
885	assert(num_probes < bucket_count() &&
886	"Hashtable is full: an error in key_equal<> or hash<>");
887	}
888	}
889
890	public:
891	template <typename K>
892	iterator find(const K& key) {
893	if (size() == `0`) return end();
894	std::pair<size_type, size_type> pos = find_position(key);
895	if (pos.first == ILLEGAL_BUCKET) // alas, not there
896	return end();
897	else
898	return iterator(this, table.get_iter(pos.first), table.nonempty_end());
899	}
900
901	template <typename K>
902	const_iterator find(const K& key) const {
903	if (size() == `0`) return end();
904	std::pair<size_type, size_type> pos = find_position(key);
905	if (pos.first == ILLEGAL_BUCKET) // alas, not there
906	return end();
907	else
908	return const_iterator(this, table.get_iter(pos.first),
909	table.nonempty_end());
910	}
911
912	// This is a tr1 method: the bucket a given key is in, or what bucket
913	// it would be put in, if it were to be inserted. Shrug.
914	size_type bucket(const key_type& key) const {
915	std::pair<size_type, size_type> pos = find_position(key);
916	return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first;
917	}
918
919	// Counts how many elements have key key. For maps, it's either 0 or 1.
920	template <typename K>
921	size_type count(const K& key) const {
922	std::pair<size_type, size_type> pos = find_position(key);
923	return pos.first == ILLEGAL_BUCKET ? `0` : `1`;
924	}
925
926	// Likewise, equal_range doesn't really make sense for us. Oh well.
927	template <typename K>
928	std::pair<iterator, iterator> equal_range(const K& key) {
929	iterator pos = find(key); // either an iterator or end
930	if (pos == end()) {
931	return std::pair<iterator, iterator>(pos, pos);
932	} else {
933	const iterator startpos = pos++;
934	return std::pair<iterator, iterator>(startpos, pos);
935	}
936	}
937	template <typename K>
938	std::pair<const_iterator, const_iterator> equal_range(
939	const K& key) const {
940	const_iterator pos = find(key); // either an iterator or end
941	if (pos == end()) {
942	return std::pair<const_iterator, const_iterator>(pos, pos);
943	} else {
944	const const_iterator startpos = pos++;
945	return std::pair<const_iterator, const_iterator>(startpos, pos);
946	}
947	}
948
949	// INSERTION ROUTINES
950	private:
951	// Private method used by insert_noresize and find_or_insert.
952	iterator insert_at(const_reference obj, size_type pos) {
953	if (size() >= max_size()) {
954	throw std::length_error ("insert overflow");
955	}
956	if (test_deleted(pos)) { // just replace if it's been deleted
957	// The set() below will undelete this object. We just worry about
958	// stats
959	assert(num_deleted > `0`);
960	--num_deleted; // used to be, now it isn't
961	}
962	table.set(pos, obj);
963	return iterator(this, table.get_iter(pos), table.nonempty_end());
964	}
965
966	// If you know this is big enough to hold obj, use this routine*
967	std::pair<iterator, bool> insert_noresize(const_reference obj) {
968	// First, double-check we're not inserting delkey
969	assert(
970	(!settings.use_deleted() \|\| !equals(get_key(obj), key_info.delkey)) &&
971	"Inserting the deleted key");
972	const std::pair<size_type, size_type> pos = find_position(get_key(obj));
973	if (pos.first != ILLEGAL_BUCKET) { // object was already there
974	return std::pair<iterator, bool>(
975	iterator(this, table.get_iter(pos.first), table.nonempty_end()),
976	false); // false: we didn't insert
977	} else { // pos.second says where to put it
978	return std::pair<iterator, bool>(insert_at(obj, pos.second), true);
979	}
980	}
981
982	// Specializations of insert(it, it) depending on the power of the iterator:
983	// (1) Iterator supports operator-, resize before inserting
984	template <class ForwardIterator>
985	void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) {
986	size_t dist = std::distance(f, l);
987	if (dist >= (std::numeric_limits<size_type>::max)()) {
988	throw std::length_error ("insert-range overflow");
989	}
990	resize_delta(static_cast<size_type>(dist));
991	for (; dist > `0`; --dist, ++f) {
992	insert_noresize(*f);
993	}
994	}
995
996	// (2) Arbitrary iterator, can't tell how much to resize
997	template <class InputIterator>
998	void insert(InputIterator f, InputIterator l, std::input_iterator_tag) {
999	for (; f != l; ++f) insert(*f);
1000	}
1001
1002	public:
1003	// This is the normal insert routine, used by the outside world
1004	std::pair<iterator, bool> insert(const_reference obj) {
1005	resize_delta(`1`); // adding an object, grow if need be
1006	return insert_noresize(obj);
1007	}
1008
1009	// When inserting a lot at a time, we specialize on the type of iterator
1010	template <class InputIterator>
1011	void insert(InputIterator f, InputIterator l) {
1012	// specializes on iterator type
1013	insert(f, l,
1014	typename std::iterator_traits<InputIterator>::iterator_category());
1015	}
1016
1017	// DefaultValue is a functor that takes a key and returns a value_type
1018	// representing the default value to be inserted if none is found.
1019	template <class DefaultValue>
1020	value_type& find_or_insert(const key_type& key) {
1021	// First, double-check we're not inserting delkey
1022	assert((!settings.use_deleted() \|\| !equals(key, key_info.delkey)) &&
1023	"Inserting the deleted key");
1024	const std::pair<size_type, size_type> pos = find_position(key);
1025	DefaultValue default_value;
1026	if (pos.first != ILLEGAL_BUCKET) { // object was already there
1027	return *table.get_iter(pos.first);
1028	} else if (resize_delta(`1`)) { // needed to rehash to make room
1029	// Since we resized, we can't use pos, so recalculate where to
1030	// insert.
1031	return *insert_noresize(default_value(key)).first;
1032	} else { // no need to rehash, insert right here
1033	return *insert_at(default_value(key), pos.second);
1034	}
1035	}
1036
1037	// DELETION ROUTINES
1038	size_type erase(const key_type& key) {
1039	// First, double-check we're not erasing delkey.
1040	assert((!settings.use_deleted() \|\| !equals(key, key_info.delkey)) &&
1041	"Erasing the deleted key");
1042	assert(!settings.use_deleted() \|\| !equals(key, key_info.delkey));
1043	const_iterator pos = find(key); // shrug: shouldn't need to be const
1044	if (pos != end()) {
1045	assert(!test_deleted(pos)); // or find() shouldn't have returned it
1046	set_deleted(pos);
1047	++num_deleted;
1048	// will think about shrink after next insert
1049	settings.set_consider_shrink(true);
1050	return `1`; // because we deleted one thing
1051	} else {
1052	return `0`; // because we deleted nothing
1053	}
1054	}
1055
1056	// We return the iterator past the deleted item.
1057	void erase(iterator pos) {
1058	if (pos == end()) return; // sanity check
1059	if (set_deleted(pos)) { // true if object has been newly deleted
1060	++num_deleted;
1061	// will think about shrink after next insert
1062	settings.set_consider_shrink(true);
1063	}
1064	}
1065
1066	void erase(iterator f, iterator l) {
1067	for (; f != l; ++f) {
1068	if (set_deleted(f)) // should always be true
1069	++num_deleted;
1070	}
1071	// will think about shrink after next insert
1072	settings.set_consider_shrink(true);
1073	}
1074
1075	// We allow you to erase a const_iterator just like we allow you to
1076	// erase an iterator. This is in parallel to 'delete': you can delete
1077	// a const pointer just like a non-const pointer. The logic is that
1078	// you can't use the object after it's erased anyway, so it doesn't matter
1079	// if it's const or not.
1080	void erase(const_iterator pos) {
1081	if (pos == end()) return; // sanity check
1082	if (set_deleted(pos)) { // true if object has been newly deleted
1083	++num_deleted;
1084	// will think about shrink after next insert
1085	settings.set_consider_shrink(true);
1086	}
1087	}
1088	void erase(const_iterator f, const_iterator l) {
1089	for (; f != l; ++f) {
1090	if (set_deleted(f)) // should always be true
1091	++num_deleted;
1092	}
1093	// will think about shrink after next insert
1094	settings.set_consider_shrink(true);
1095	}
1096
1097	// COMPARISON
1098	bool operator==(const sparse_hashtable& ht) const {
1099	if (size() != ht.size()) {
1100	return false;
1101	} else if (this == &ht) {
1102	return true;
1103	} else {
1104	// Iterate through the elements in "this" and see if the
1105	// corresponding element is in ht
1106	for (const_iterator it = begin(); it != end(); ++it) {
1107	const_iterator it2 = ht.find(get_key(*it));
1108	if ((it2 == ht.end()) \|\| (it != it2)) {
1109	return false;
1110	}
1111	}
1112	return true;
1113	}
1114	}
1115	bool operator!=(const sparse_hashtable& ht) const { return !(*this == ht); }
1116
1117	// I/O
1118	// We support reading and writing hashtables to disk. NOTE that
1119	// this only stores the hashtable metadata, not the stuff you've
1120	// actually put in the hashtable! Alas, since I don't know how to
1121	// write a hasher or key_equal, you have to make sure everything
1122	// but the table is the same. We compact before writing.
1123	//
1124	// The OUTPUT type needs to support a Write() operation. File and
1125	// OutputBuffer are appropriate types to pass in.
1126	//
1127	// The INPUT type needs to support a Read() operation. File and
1128	// InputBuffer are appropriate types to pass in.
1129	template <typename OUTPUT>
1130	bool write_metadata(OUTPUT* fp) {
1131	squash_deleted(); // so we don't have to worry about delkey
1132	return table.write_metadata(fp);
1133	}
1134
1135	template <typename INPUT>
1136	bool read_metadata(INPUT* fp) {
1137	num_deleted = `0`; // since we got rid before writing
1138	const bool result = table.read_metadata(fp);
1139	settings.reset_thresholds(bucket_count());
1140	return result;
1141	}
1142
1143	// Only meaningful if value_type is a POD.
1144	template <typename OUTPUT>
1145	bool write_nopointer_data(OUTPUT* fp) {
1146	return table.write_nopointer_data(fp);
1147	}
1148
1149	// Only meaningful if value_type is a POD.
1150	template <typename INPUT>
1151	bool read_nopointer_data(INPUT* fp) {
1152	return table.read_nopointer_data(fp);
1153	}
1154
1155	// INPUT and OUTPUT must be either a FILE, or* a C++ stream*
1156	// (istream, ostream, etc) or* a class providing*
1157	// Read(void, size_t) and Write(const void, size_t)
1158	// (respectively), which writes a buffer into a stream
1159	// (which the INPUT/OUTPUT instance presumably owns).
1160
1161	typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
1162
1163	// ValueSerializer: a functor. operator()(OUTPUT, const value_type&)*
1164	template <typename ValueSerializer, typename OUTPUT>
1165	bool serialize(ValueSerializer serializer, OUTPUT* fp) {
1166	squash_deleted(); // so we don't have to worry about delkey
1167	return table.serialize(serializer, fp);
1168	}
1169
1170	// ValueSerializer: a functor. operator()(INPUT, value_type)
1171	template <typename ValueSerializer, typename INPUT>
1172	bool unserialize(ValueSerializer serializer, INPUT* fp) {
1173	num_deleted = `0`; // since we got rid before writing
1174	const bool result = table.unserialize(serializer, fp);
1175	settings.reset_thresholds(bucket_count());
1176	return result;
1177	}
1178
1179	private:
1180	// Table is the main storage class.
1181	typedef sparsetable<value_type, DEFAULT_GROUP_SIZE, value_alloc_type> Table;
1182
1183	// Package templated functors with the other types to eliminate memory
1184	// needed for storing these zero-size operators. Since ExtractKey and
1185	// hasher's operator() might have the same function signature, they
1186	// must be packaged in different classes.
1187	struct Settings
1188	: sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
1189	HT_MIN_BUCKETS> {
1190	explicit Settings(const hasher& hf)
1191	: sparsehash_internal::sh_hashtable_settings<key_type, hasher,
1192	size_type, HT_MIN_BUCKETS>(
1193	hf, HT_OCCUPANCY_PCT / `100.0f`, HT_EMPTY_PCT / `100.0f`) {}
1194	};
1195
1196	// KeyInfo stores delete key and packages zero-size functors:
1197	// ExtractKey and SetKey.
1198	class KeyInfo : public ExtractKey, public SetKey, public EqualKey {
1199	public:
1200	KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
1201	: ExtractKey(ek), SetKey(sk), EqualKey(eq) {}
1202	// We want to return the exact same type as ExtractKey: Key or const
1203	// Key&
1204	typename ExtractKey::result_type get_key(const_reference v) const {
1205	return ExtractKey::operator()(v);
1206	}
1207	void set_key(pointer v, const key_type& k) const {
1208	SetKey::operator()(v, k);
1209	}
1210	template <typename K1, typename K2>
1211	bool equals(const K1& a, const K2& b) const {
1212	return EqualKey::operator()(a, b);
1213	}
1214
1215	// Which key marks deleted entries.
1216	// TODO(csilvers): make a pointer, and get rid of use_deleted
1217	// (benchmark!)
1218	typename std::remove_const<key_type>::type delkey;
1219	};
1220
1221	// Utility functions to access the templated operators
1222	template <typename K>
1223	size_type hash(const K& v) const { return settings.hash(v); }
1224	template <typename K1, typename K2>
1225	bool equals(const K1& a, const K2& b) const {
1226	return key_info.equals(a, b);
1227	}
1228	typename ExtractKey::result_type get_key(const_reference v) const {
1229	return key_info.get_key(v);
1230	}
1231	void set_key(pointer v, const key_type& k) const { key_info.set_key(v, k); }
1232
1233	private:
1234	// Actual data
1235	Settings settings;
1236	KeyInfo key_info;
1237	size_type num_deleted; // how many occupied buckets are marked deleted
1238	Table table; // holds num_buckets and num_elements too
1239	};
1240
1241	// We need a global swap as well
1242	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1243	inline void swap(sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>& x,
1244	sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>& y) {
1245	x.swap(y);
1246	}
1247
1248	#undef JUMP_
1249
1250	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1251	const typename sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::size_type
1252	sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::ILLEGAL_BUCKET;
1253
1254	// How full we let the table get before we resize. Knuth says .8 is
1255	// good -- higher causes us to probe too much, though saves memory
1256	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1257	const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = `80`;
1258
1259	// How empty we let the table get before we resize lower.
1260	// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
1261	template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1262	const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT =
1263	static_cast<int>(
1264	`0.4` * sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT);
1265	}
1266

Browse the source code of ClickHouse/contrib/sparsehash-c11/sparsehash/internal/sparsehashtable.h