1 | // |
2 | // HashTable.h |
3 | // |
4 | // Library: Foundation |
5 | // Package: Hashing |
6 | // Module: HashTable |
7 | // |
8 | // Definition of the HashTable class. |
9 | // |
10 | // Copyright (c) 2006, Applied Informatics Software Engineering GmbH. |
11 | // and Contributors. |
12 | // |
13 | // SPDX-License-Identifier: BSL-1.0 |
14 | // |
15 | |
16 | |
17 | #ifndef Foundation_HashTable_INCLUDED |
18 | #define Foundation_HashTable_INCLUDED |
19 | |
20 | |
21 | #include "Poco/Foundation.h" |
22 | #include "Poco/Exception.h" |
23 | #include "Poco/HashFunction.h" |
24 | #include "Poco/HashStatistic.h" |
25 | #include <vector> |
26 | #include <map> |
27 | #include <cstddef> |
28 | #include <cstring> |
29 | |
30 | |
31 | namespace Poco { |
32 | |
33 | |
34 | //@ deprecated |
35 | template <class Key, class Value, class KeyHashFunction = HashFunction<Key> > |
36 | class HashTable |
37 | /// A HashTable stores a key value pair that can be looked up via a hashed key. |
38 | /// |
39 | /// Collision handling is done via overflow maps(!). With small hash tables performance of this |
40 | /// data struct will be closer to that a map than a hash table, i.e. slower. On the plus side, |
41 | /// this class offers remove operations. Also HashTable full errors are not possible. If a fast |
42 | /// HashTable implementation is needed and the remove operation is not required, use SimpleHashTable |
43 | /// instead. |
44 | /// |
45 | /// This class is NOT thread safe. |
46 | { |
47 | public: |
48 | typedef std::map<Key, Value> HashEntryMap; |
49 | typedef HashEntryMap** HashTableVector; |
50 | |
51 | typedef typename HashEntryMap::const_iterator ConstIterator; |
52 | typedef typename HashEntryMap::iterator Iterator; |
53 | |
54 | HashTable(UInt32 initialSize = 251): |
55 | _entries(0), |
56 | _size(0), |
57 | _maxCapacity(initialSize) |
58 | /// Creates the HashTable. |
59 | { |
60 | _entries = new HashEntryMap*[initialSize]; |
61 | memset(_entries, '\0', sizeof(HashEntryMap*)*initialSize); |
62 | } |
63 | |
64 | HashTable(const HashTable& ht): |
65 | _entries(new HashEntryMap*[ht._maxCapacity]), |
66 | _size(ht._size), |
67 | _maxCapacity(ht._maxCapacity) |
68 | { |
69 | for (UInt32 i = 0; i < _maxCapacity; ++i) |
70 | { |
71 | if (ht._entries[i]) |
72 | _entries[i] = new HashEntryMap(ht._entries[i]->begin(), ht._entries[i]->end()); |
73 | else |
74 | _entries[i] = 0; |
75 | } |
76 | } |
77 | |
78 | ~HashTable() |
79 | /// Destroys the HashTable. |
80 | { |
81 | clear(); |
82 | } |
83 | |
84 | HashTable& operator = (const HashTable& ht) |
85 | { |
86 | if (this != &ht) |
87 | { |
88 | clear(); |
89 | _maxCapacity = ht._maxCapacity; |
90 | poco_assert_dbg (_entries == 0); |
91 | _entries = new HashEntryMap*[_maxCapacity]; |
92 | _size = ht._size; |
93 | |
94 | for (UInt32 i = 0; i < _maxCapacity; ++i) |
95 | { |
96 | if (ht._entries[i]) |
97 | _entries[i] = new HashEntryMap(ht._entries[i]->begin(), ht._entries[i]->end()); |
98 | else |
99 | _entries[i] = 0; |
100 | } |
101 | } |
102 | return *this; |
103 | } |
104 | |
105 | void clear() |
106 | { |
107 | if (!_entries) |
108 | return; |
109 | for (UInt32 i = 0; i < _maxCapacity; ++i) |
110 | { |
111 | delete _entries[i]; |
112 | } |
113 | delete[] _entries; |
114 | _entries = 0; |
115 | _size = 0; |
116 | _maxCapacity = 0; |
117 | } |
118 | |
119 | UInt32 insert(const Key& key, const Value& value) |
120 | /// Returns the hash value of the inserted item. |
121 | /// Throws an exception if the entry was already inserted |
122 | { |
123 | UInt32 hsh = hash(key); |
124 | insertRaw(key, hsh, value); |
125 | return hsh; |
126 | } |
127 | |
128 | Value& insertRaw(const Key& key, UInt32 hsh, const Value& value) |
129 | /// Returns the hash value of the inserted item. |
130 | /// Throws an exception if the entry was already inserted |
131 | { |
132 | if (!_entries[hsh]) |
133 | _entries[hsh] = new HashEntryMap(); |
134 | std::pair<typename HashEntryMap::iterator, bool> res(_entries[hsh]->insert(std::make_pair(key, value))); |
135 | if (!res.second) |
136 | throw InvalidArgumentException("HashTable::insert, key already exists." ); |
137 | _size++; |
138 | return res.first->second; |
139 | } |
140 | |
141 | UInt32 update(const Key& key, const Value& value) |
142 | /// Returns the hash value of the inserted item. |
143 | /// Replaces an existing entry if it finds one |
144 | { |
145 | UInt32 hsh = hash(key); |
146 | updateRaw(key, hsh, value); |
147 | return hsh; |
148 | } |
149 | |
150 | void updateRaw(const Key& key, UInt32 hsh, const Value& value) |
151 | /// Returns the hash value of the inserted item. |
152 | /// Replaces an existing entry if it finds one |
153 | { |
154 | if (!_entries[hsh]) |
155 | _entries[hsh] = new HashEntryMap(); |
156 | std::pair<Iterator, bool> res = _entries[hsh]->insert(std::make_pair(key, value)); |
157 | if (res.second == false) |
158 | res.first->second = value; |
159 | else |
160 | _size++; |
161 | } |
162 | |
163 | void remove(const Key& key) |
164 | { |
165 | UInt32 hsh = hash(key); |
166 | removeRaw(key, hsh); |
167 | } |
168 | |
169 | void removeRaw(const Key& key, UInt32 hsh) |
170 | /// Performance version, allows to specify the hash value |
171 | { |
172 | if (_entries[hsh]) |
173 | { |
174 | _size -= _entries[hsh]->erase(key); |
175 | } |
176 | } |
177 | |
178 | UInt32 hash(const Key& key) const |
179 | { |
180 | return _hash(key, _maxCapacity); |
181 | } |
182 | |
183 | const Value& get(const Key& key) const |
184 | /// Throws an exception if the value does not exist |
185 | { |
186 | UInt32 hsh = hash(key); |
187 | return getRaw(key, hsh); |
188 | } |
189 | |
190 | const Value& getRaw(const Key& key, UInt32 hsh) const |
191 | /// Throws an exception if the value does not exist |
192 | { |
193 | if (!_entries[hsh]) |
194 | throw InvalidArgumentException("key not found" ); |
195 | |
196 | ConstIterator it = _entries[hsh]->find(key); |
197 | if (it == _entries[hsh]->end()) |
198 | throw InvalidArgumentException("key not found" ); |
199 | |
200 | return it->second; |
201 | } |
202 | |
203 | Value& get(const Key& key) |
204 | /// Throws an exception if the value does not exist |
205 | { |
206 | UInt32 hsh = hash(key); |
207 | return const_cast<Value&>(getRaw(key, hsh)); |
208 | } |
209 | |
210 | const Value& operator [] (const Key& key) const |
211 | { |
212 | return get(key); |
213 | } |
214 | |
215 | Value& operator [] (const Key& key) |
216 | { |
217 | UInt32 hsh = hash(key); |
218 | |
219 | if (!_entries[hsh]) |
220 | return insertRaw(key, hsh, Value()); |
221 | |
222 | ConstIterator it = _entries[hsh]->find(key); |
223 | if (it == _entries[hsh]->end()) |
224 | return insertRaw(key, hsh, Value()); |
225 | |
226 | return it->second; |
227 | } |
228 | |
229 | const Key& getKeyRaw(const Key& key, UInt32 hsh) |
230 | /// Throws an exception if the key does not exist. returns a reference to the internally |
231 | /// stored key. Useful when someone does an insert and wants for performance reason only to store |
232 | /// a pointer to the key in another collection |
233 | { |
234 | if (!_entries[hsh]) |
235 | throw InvalidArgumentException("key not found" ); |
236 | ConstIterator it = _entries[hsh]->find(key); |
237 | if (it == _entries[hsh]->end()) |
238 | throw InvalidArgumentException("key not found" ); |
239 | return it->first; |
240 | } |
241 | |
242 | bool get(const Key& key, Value& v) const |
243 | /// Sets v to the found value, returns false if no value was found |
244 | { |
245 | UInt32 hsh = hash(key); |
246 | return getRaw(key, hsh, v); |
247 | } |
248 | |
249 | bool getRaw(const Key& key, UInt32 hsh, Value& v) const |
250 | /// Sets v to the found value, returns false if no value was found |
251 | { |
252 | if (!_entries[hsh]) |
253 | return false; |
254 | |
255 | ConstIterator it = _entries[hsh]->find(key); |
256 | if (it == _entries[hsh]->end()) |
257 | return false; |
258 | |
259 | v = it->second; |
260 | return true; |
261 | } |
262 | |
263 | bool exists(const Key& key) |
264 | { |
265 | UInt32 hsh = hash(key); |
266 | return existsRaw(key, hsh); |
267 | } |
268 | |
269 | bool existsRaw(const Key& key, UInt32 hsh) |
270 | { |
271 | return _entries[hsh] && (_entries[hsh]->end() != _entries[hsh]->find(key)); |
272 | } |
273 | |
274 | std::size_t size() const |
275 | /// Returns the number of elements already inserted into the HashTable |
276 | { |
277 | return _size; |
278 | } |
279 | |
280 | UInt32 maxCapacity() const |
281 | { |
282 | return _maxCapacity; |
283 | } |
284 | |
285 | void resize(UInt32 newSize) |
286 | /// Resizes the hashtable, rehashes all existing entries. Expensive! |
287 | { |
288 | if (_maxCapacity != newSize) |
289 | { |
290 | HashTableVector cpy = _entries; |
291 | _entries = 0; |
292 | UInt32 oldSize = _maxCapacity; |
293 | _maxCapacity = newSize; |
294 | _entries = new HashEntryMap*[_maxCapacity]; |
295 | memset(_entries, '\0', sizeof(HashEntryMap*)*_maxCapacity); |
296 | |
297 | if (_size == 0) |
298 | { |
299 | // no data was yet inserted |
300 | delete[] cpy; |
301 | return; |
302 | } |
303 | _size = 0; |
304 | for (UInt32 i = 0; i < oldSize; ++i) |
305 | { |
306 | if (cpy[i]) |
307 | { |
308 | ConstIterator it = cpy[i]->begin(); |
309 | ConstIterator itEnd = cpy[i]->end(); |
310 | for (; it != itEnd; ++it) |
311 | { |
312 | insert(it->first, it->second); |
313 | } |
314 | delete cpy[i]; |
315 | } |
316 | } |
317 | delete[] cpy; |
318 | } |
319 | } |
320 | |
321 | HashStatistic currentState(bool details = false) const |
322 | /// Returns the current internal state |
323 | { |
324 | UInt32 numberOfEntries = (UInt32)_size; |
325 | UInt32 numZeroEntries = 0; |
326 | UInt32 maxEntriesPerHash = 0; |
327 | std::vector<UInt32> detailedEntriesPerHash; |
328 | #ifdef _DEBUG |
329 | UInt32 totalSize = 0; |
330 | #endif |
331 | for (UInt32 i = 0; i < _maxCapacity; ++i) |
332 | { |
333 | if (_entries[i]) |
334 | { |
335 | UInt32 size = (UInt32)_entries[i]->size(); |
336 | poco_assert_dbg(size != 0); |
337 | if (size > maxEntriesPerHash) |
338 | maxEntriesPerHash = size; |
339 | if (details) |
340 | detailedEntriesPerHash.push_back(size); |
341 | #ifdef _DEBUG |
342 | totalSize += size; |
343 | #endif |
344 | } |
345 | else |
346 | { |
347 | numZeroEntries++; |
348 | if (details) |
349 | detailedEntriesPerHash.push_back(0); |
350 | } |
351 | } |
352 | #ifdef _DEBUG |
353 | poco_assert_dbg(totalSize == numberOfEntries); |
354 | #endif |
355 | return HashStatistic(_maxCapacity, numberOfEntries, numZeroEntries, maxEntriesPerHash, detailedEntriesPerHash); |
356 | } |
357 | |
358 | private: |
359 | HashTableVector _entries; |
360 | std::size_t _size; |
361 | UInt32 _maxCapacity; |
362 | KeyHashFunction _hash; |
363 | }; |
364 | |
365 | |
366 | } // namespace Poco |
367 | |
368 | |
369 | #endif // Foundation_HashTable_INCLUDED |
370 | |