sparse_hash_map source code [ClickHouse/contrib/sparsehash-c11/sparsehash/sparse_hash_map]

1	// Copyright (c) 2005, Google Inc.
2	// All rights reserved.
3	//
4	// Redistribution and use in source and binary forms, with or without
5	// modification, are permitted provided that the following conditions are
6	// met:
7	//
8	// Redistributions of source code must retain the above copyright*
9	// notice, this list of conditions and the following disclaimer.
10	// Redistributions in binary form must reproduce the above*
11	// copyright notice, this list of conditions and the following disclaimer
12	// in the documentation and/or other materials provided with the
13	// distribution.
14	// Neither the name of Google Inc. nor the names of its*
15	// contributors may be used to endorse or promote products derived from
16	// this software without specific prior written permission.
17	//
18	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30	// ---
31	//
32	// This is just a very thin wrapper over sparsehashtable.h, just
33	// like sgi stl's stl_hash_map is a very thin wrapper over
34	// stl_hashtable. The major thing we define is operator[], because
35	// we have a concept of a data_type which stl_hashtable doesn't
36	// (it only has a key and a value).
37	//
38	// We adhere mostly to the STL semantics for hash-map. One important
39	// exception is that insert() may invalidate iterators entirely -- STL
40	// semantics are that insert() may reorder iterators, but they all
41	// still refer to something valid in the hashtable. Not so for us.
42	// Likewise, insert() may invalidate pointers into the hashtable.
43	// (Whether insert invalidates iterators and pointers depends on
44	// whether it results in a hashtable resize). On the plus side,
45	// delete() doesn't invalidate iterators or pointers at all, or even
46	// change the ordering of elements.
47	//
48	// Here are a few "power user" tips:
49	//
50	// 1) set_deleted_key():
51	// Unlike STL's hash_map, if you want to use erase() you
52	// must* call set_deleted_key() after construction.*
53	//
54	// 2) resize(0):
55	// When an item is deleted, its memory isn't freed right
56	// away. This is what allows you to iterate over a hashtable
57	// and call erase() without invalidating the iterator.
58	// To force the memory to be freed, call resize(0).
59	// For tr1 compatibility, this can also be called as rehash(0).
60	//
61	// 3) min_load_factor(0.0)
62	// Setting the minimum load factor to 0.0 guarantees that
63	// the hash table will never shrink.
64	//
65	// Roughly speaking:
66	// (1) dense_hash_map: fastest, uses the most memory unless entries are small
67	// (2) sparse_hash_map: slowest, uses the least memory
68	// (3) hash_map / unordered_map (STL): in the middle
69	//
70	// Typically I use sparse_hash_map when I care about space and/or when
71	// I need to save the hashtable on disk. I use hash_map otherwise. I
72	// don't personally use dense_hash_map ever; some people use it for
73	// small maps with lots of lookups.
74	//
75	// - dense_hash_map has, typically, about 78% memory overhead (if your
76	// data takes up X bytes, the hash_map uses .78X more bytes in overhead).
77	// - sparse_hash_map has about 4 bits overhead per entry.
78	// - sparse_hash_map can be 3-7 times slower than the others for lookup and,
79	// especially, inserts. See time_hash_map.cc for details.
80	//
81	// See /usr/(local/)?doc/sparsehash-/sparse_hash_map.html*
82	// for information about how to use this class.
83
84	#pragma once
85
86	#include <algorithm> // needed by stl_alloc
87	#include <functional> // for equal_to<>, select1st<>, etc
88	#include <memory> // for alloc
89	#include <utility> // for pair<>
90	#include <sparsehash/internal/libc_allocator_with_realloc.h>
91	#include <sparsehash/internal/sparsehashtable.h> // IWYU pragma: export
92
93	namespace google {
94
95	template <class Key, class T, class HashFcn = std::hash<Key>,
96	class EqualKey = std::equal_to<Key>,
97	class Alloc = libc_allocator_with_realloc<std::pair<const Key, T>>>
98	class sparse_hash_map {
99	private:
100	// Apparently select1st is not stl-standard, so we define our own
101	struct SelectKey {
102	typedef const Key& result_type;
103	const Key& operator()(const std::pair<const Key, T>& p) const {
104	return p.first;
105	}
106	};
107	struct SetKey {
108	void operator()(std::pair<const Key, T>* value, const Key& new_key) const {
109	*const_cast<Key*>(&value->first) = new_key;
110	// It would be nice to clear the rest of value here as well, in
111	// case it's taking up a lot of memory. We do this by clearing
112	// the value. This assumes T has a zero-arg constructor!
113	value->second = T();
114	}
115	};
116	// For operator[].
117	struct DefaultValue {
118	std::pair<const Key, T> operator()(const Key& key) {
119	return std::make_pair(key, T());
120	}
121	};
122
123	// The actual data
124	typedef typename sparsehash_internal::key_equal_chosen<HashFcn, EqualKey>::type EqualKeyChosen;
125	typedef sparse_hashtable<std::pair<const Key, T>, Key, HashFcn, SelectKey,
126	SetKey, EqualKeyChosen, Alloc> ht;
127	ht rep;
128
129	static_assert(!sparsehash_internal::has_transparent_key_equal<HashFcn>::value
130	\|\| std::is_same<EqualKey, std::equal_to<Key>>::value
131	\|\| std::is_same<EqualKey, EqualKeyChosen>::value,
132	"Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash");
133
134	public:
135	typedef typename ht::key_type key_type;
136	typedef T data_type;
137	typedef T mapped_type;
138	typedef typename ht::value_type value_type;
139	typedef typename ht::hasher hasher;
140	typedef typename ht::key_equal key_equal;
141	typedef Alloc allocator_type;
142
143	typedef typename ht::size_type size_type;
144	typedef typename ht::difference_type difference_type;
145	typedef typename ht::pointer pointer;
146	typedef typename ht::const_pointer const_pointer;
147	typedef typename ht::reference reference;
148	typedef typename ht::const_reference const_reference;
149
150	typedef typename ht::iterator iterator;
151	typedef typename ht::const_iterator const_iterator;
152	typedef typename ht::local_iterator local_iterator;
153	typedef typename ht::const_local_iterator const_local_iterator;
154
155	// Iterator functions
156	iterator begin() { return rep.begin(); }
157	iterator end() { return rep.end(); }
158	const_iterator begin() const { return rep.begin(); }
159	const_iterator end() const { return rep.end(); }
160
161	// These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements.
162	local_iterator begin(size_type i) { return rep.begin(i); }
163	local_iterator end(size_type i) { return rep.end(i); }
164	const_local_iterator begin(size_type i) const { return rep.begin(i); }
165	const_local_iterator end(size_type i) const { return rep.end(i); }
166
167	// Accessor functions
168	allocator_type get_allocator() const { return rep.get_allocator(); }
169	hasher hash_funct() const { return rep.hash_funct(); }
170	hasher hash_function() const { return hash_funct(); }
171	key_equal key_eq() const { return rep.key_eq(); }
172
173	// Constructors
174	explicit sparse_hash_map(size_type expected_max_items_in_table = `0`,
175	const hasher& hf = hasher(),
176	const key_equal& eql = key_equal(),
177	const allocator_type& alloc = allocator_type())
178	: rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(),
179	alloc) {}
180
181	template <class InputIterator>
182	sparse_hash_map(InputIterator f, InputIterator l,
183	size_type expected_max_items_in_table = `0`,
184	const hasher& hf = hasher(),
185	const key_equal& eql = key_equal(),
186	const allocator_type& alloc = allocator_type())
187	: rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(),
188	alloc) {
189	rep.insert(f, l);
190	}
191	// We use the default copy constructor
192	// We use the default operator=()
193	// We use the default destructor
194
195	void clear() { rep.clear(); }
196	void swap(sparse_hash_map& hs) { rep.swap(hs.rep); }
197
198	// Functions concerning size
199	size_type size() const { return rep.size(); }
200	size_type max_size() const { return rep.max_size(); }
201	bool empty() const { return rep.empty(); }
202	size_type bucket_count() const { return rep.bucket_count(); }
203	size_type max_bucket_count() const { return rep.max_bucket_count(); }
204
205	// These are tr1 methods. bucket() is the bucket the key is or would be in.
206	size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
207	size_type bucket(const key_type& key) const { return rep.bucket(key); }
208	float load_factor() const { return size() * `1.0f` / bucket_count(); }
209	float max_load_factor() const {
210	float shrink, grow;
211	rep.get_resizing_parameters(&shrink, &grow);
212	return grow;
213	}
214	void max_load_factor(float new_grow) {
215	float shrink, grow;
216	rep.get_resizing_parameters(&shrink, &grow);
217	rep.set_resizing_parameters(shrink, new_grow);
218	}
219	// These aren't tr1 methods but perhaps ought to be.
220	float min_load_factor() const {
221	float shrink, grow;
222	rep.get_resizing_parameters(&shrink, &grow);
223	return shrink;
224	}
225	void min_load_factor(float new_shrink) {
226	float shrink, grow;
227	rep.get_resizing_parameters(&shrink, &grow);
228	rep.set_resizing_parameters(new_shrink, grow);
229	}
230	// Deprecated; use min_load_factor() or max_load_factor() instead.
231	void set_resizing_parameters(float shrink, float grow) {
232	rep.set_resizing_parameters(shrink, grow);
233	}
234
235	void resize(size_type hint) { rep.resize(hint); }
236	void rehash(size_type hint) { resize(hint); } // the tr1 name
237
238	// Lookup routines
239	iterator find(const key_type& key) { return rep.find(key); }
240	const_iterator find(const key_type& key) const { return rep.find(key); }
241
242	template <typename K>
243	typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, iterator>::type
244	find(const K& key) { return rep.find(key); }
245	template <typename K>
246	typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, const_iterator>::type
247	find(const K& key) const { return rep.find(key); }
248
249	data_type& operator[](const key_type& key) { // This is our value-add!
250	// If key is in the hashtable, returns find(key)->second,
251	// otherwise returns insert(value_type(key, T()).first->second.
252	// Note it does not create an empty T unless the find fails.
253	return rep.template find_or_insert<DefaultValue>(key).second;
254	}
255
256	size_type count(const key_type& key) const { return rep.count(key); }
257
258	template <typename K>
259	typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, size_type>::type
260	count(const K& key) const { return rep.count(key); }
261
262	std::pair<iterator, iterator> equal_range(const key_type& key) {
263	return rep.equal_range(key);
264	}
265	std::pair<const_iterator, const_iterator> equal_range(
266	const key_type& key) const {
267	return rep.equal_range(key);
268	}
269
270	template<typename K>
271	typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<iterator, iterator>>::type
272	equal_range(const K& key) {
273	return rep.equal_range(key);
274	}
275	template<typename K>
276	typename std::enable_if<sparsehash_internal::has_transparent_key_equal<hasher, K>::value, std::pair<const_iterator, const_iterator>>::type
277	equal_range(const K& key) const {
278	return rep.equal_range(key);
279	}
280
281	// Insertion routines
282	std::pair<iterator, bool> insert(const value_type& obj) {
283	return rep.insert(obj);
284	}
285	template <class InputIterator>
286	void insert(InputIterator f, InputIterator l) {
287	rep.insert(f, l);
288	}
289	void insert(const_iterator f, const_iterator l) { rep.insert(f, l); }
290	// Required for std::insert_iterator; the passed-in iterator is ignored.
291	iterator insert(iterator, const value_type& obj) { return insert(obj).first; }
292
293	// Deletion routines
294	// THESE ARE NON-STANDARD! I make you specify an "impossible" key
295	// value to identify deleted buckets. You can change the key as
296	// time goes on, or get rid of it entirely to be insert-only.
297	void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
298	void clear_deleted_key() { rep.clear_deleted_key(); }
299	key_type deleted_key() const { return rep.deleted_key(); }
300
301	// These are standard
302	size_type erase(const key_type& key) { return rep.erase(key); }
303	void erase(iterator it) { rep.erase(it); }
304	void erase(iterator f, iterator l) { rep.erase(f, l); }
305
306	// Comparison
307	bool operator==(const sparse_hash_map& hs) const { return rep == hs.rep; }
308	bool operator!=(const sparse_hash_map& hs) const { return rep != hs.rep; }
309
310	// I/O -- this is an add-on for writing metainformation to disk
311	//
312	// For maximum flexibility, this does not assume a particular
313	// file type (though it will probably be a FILE ). We just pass*
314	// the fp through to rep.
315
316	// If your keys and values are simple enough, you can pass this
317	// serializer to serialize()/unserialize(). "Simple enough" means
318	// value_type is a POD type that contains no pointers. Note,
319	// however, we don't try to normalize endianness.
320	typedef typename ht::NopointerSerializer NopointerSerializer;
321
322	// serializer: a class providing operator()(OUTPUT, const value_type&)*
323	// (writing value_type to OUTPUT). You can specify a
324	// NopointerSerializer object if appropriate (see above).
325	// fp: either a FILE, OR an ostream/subclass_of_ostream, OR a*
326	// pointer to a class providing size_t Write(const void, size_t),*
327	// which writes a buffer into a stream (which fp presumably
328	// owns) and returns the number of bytes successfully written.
329	// Note basic_ostream<not_char> is not currently supported.
330	template <typename ValueSerializer, typename OUTPUT>
331	bool serialize(ValueSerializer serializer, OUTPUT* fp) {
332	return rep.serialize(serializer, fp);
333	}
334
335	// serializer: a functor providing operator()(INPUT, value_type)
336	// (reading from INPUT and into value_type). You can specify a
337	// NopointerSerializer object if appropriate (see above).
338	// fp: either a FILE, OR an istream/subclass_of_istream, OR a*
339	// pointer to a class providing size_t Read(void, size_t),*
340	// which reads into a buffer from a stream (which fp presumably
341	// owns) and returns the number of bytes successfully read.
342	// Note basic_istream<not_char> is not currently supported.
343	// NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer
344	// may need to do a const cast in order to fill in the key.
345	// NOTE: if Key or T are not POD types, the serializer MUST use
346	// placement-new to initialize their values, rather than a normal
347	// equals-assignment or similar. (The value_type passed into the*
348	// serializer points to garbage memory.)
349	template <typename ValueSerializer, typename INPUT>
350	bool unserialize(ValueSerializer serializer, INPUT* fp) {
351	return rep.unserialize(serializer, fp);
352	}
353
354	// The four methods below are DEPRECATED.
355	// Use serialize() and unserialize() for new code.
356	template <typename OUTPUT>
357	bool write_metadata(OUTPUT* fp) {
358	return rep.write_metadata(fp);
359	}
360
361	template <typename INPUT>
362	bool read_metadata(INPUT* fp) {
363	return rep.read_metadata(fp);
364	}
365
366	template <typename OUTPUT>
367	bool write_nopointer_data(OUTPUT* fp) {
368	return rep.write_nopointer_data(fp);
369	}
370
371	template <typename INPUT>
372	bool read_nopointer_data(INPUT* fp) {
373	return rep.read_nopointer_data(fp);
374	}
375	};
376
377	// We need a global swap as well
378	template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
379	inline void swap(sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
380	sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2) {
381	hm1.swap(hm2);
382	}
383
384	} // namespace google
385

Browse the source code of ClickHouse/contrib/sparsehash-c11/sparsehash/sparse_hash_map