1 | // |
2 | // SimpleHashTable.h |
3 | // |
4 | // Library: Foundation |
5 | // Package: Hashing |
6 | // Module: SimpleHashTable |
7 | // |
8 | // Definition of the SimpleHashTable class. |
9 | // |
10 | // Copyright (c) 2006, Applied Informatics Software Engineering GmbH. |
11 | // and Contributors. |
12 | // |
13 | // SPDX-License-Identifier: BSL-1.0 |
14 | // |
15 | |
16 | |
17 | #ifndef Foundation_SimpleHashTable_INCLUDED |
18 | #define Foundation_SimpleHashTable_INCLUDED |
19 | |
20 | |
21 | #include "Poco/Foundation.h" |
22 | #include "Poco/Exception.h" |
23 | #include "Poco/HashFunction.h" |
24 | #include "Poco/HashStatistic.h" |
25 | #include <vector> |
26 | #include <map> |
27 | #include <cstddef> |
28 | #include <algorithm> |
29 | |
30 | |
31 | namespace Poco { |
32 | |
33 | |
34 | //@ deprecated |
35 | template <class Key, class Value, class KeyHashFunction = HashFunction<Key> > |
36 | class SimpleHashTable |
37 | /// A SimpleHashTable stores a key value pair that can be looked up via a hashed key. |
38 | /// |
39 | /// In comparison to a HashTable, this class handles collisions by sequentially searching the next |
40 | /// free location. This also means that the maximum size of this table is limited, i.e. if the hash table |
41 | /// is full, it will throw an exception and that this class does not support remove operations. |
42 | /// On the plus side it is faster than the HashTable. |
43 | /// |
44 | /// This class is NOT thread safe. |
45 | { |
46 | public: |
47 | class HashEntry |
48 | { |
49 | public: |
50 | Key key; |
51 | Value value; |
52 | HashEntry(const Key k, const Value v): key(k), value(v) |
53 | { |
54 | } |
55 | }; |
56 | |
57 | typedef std::vector<HashEntry*> HashTableVector; |
58 | |
59 | SimpleHashTable(UInt32 capacity = 251): _entries(capacity, 0), _size(0), _capacity(capacity) |
60 | /// Creates the SimpleHashTable. |
61 | { |
62 | } |
63 | |
64 | SimpleHashTable(const SimpleHashTable& ht): |
65 | _size(ht._size), |
66 | _capacity(ht._capacity) |
67 | { |
68 | _entries.reserve(ht._capacity); |
69 | for (typename HashTableVector::iterator it = ht._entries.begin(); it != ht._entries.end(); ++it) |
70 | { |
71 | if (*it) |
72 | _entries.push_back(new HashEntry(*it)); |
73 | else |
74 | _entries.push_back(0); |
75 | } |
76 | } |
77 | |
78 | ~SimpleHashTable() |
79 | /// Destroys the SimpleHashTable. |
80 | { |
81 | clear(); |
82 | } |
83 | |
84 | SimpleHashTable& operator = (const SimpleHashTable& ht) |
85 | { |
86 | if (this != &ht) |
87 | { |
88 | SimpleHashTable tmp(ht); |
89 | swap(tmp); |
90 | } |
91 | return *this; |
92 | } |
93 | |
94 | void swap(SimpleHashTable& ht) |
95 | { |
96 | using std::swap; |
97 | swap(_entries, ht._entries); |
98 | swap(_size, ht._size); |
99 | swap(_capacity, ht._capacity); |
100 | } |
101 | |
102 | void clear() |
103 | { |
104 | for (typename HashTableVector::iterator it = _entries.begin(); it != _entries.end(); ++it) |
105 | { |
106 | delete *it; |
107 | *it = 0; |
108 | } |
109 | _size = 0; |
110 | } |
111 | |
112 | UInt32 insert(const Key& key, const Value& value) |
113 | /// Returns the hash value of the inserted item. |
114 | /// Throws an exception if the entry was already inserted |
115 | { |
116 | UInt32 hsh = hash(key); |
117 | insertRaw(key, hsh, value); |
118 | return hsh; |
119 | } |
120 | |
121 | Value& insertRaw(const Key& key, UInt32 hsh, const Value& value) |
122 | /// Returns the hash value of the inserted item. |
123 | /// Throws an exception if the entry was already inserted |
124 | { |
125 | UInt32 pos = hsh; |
126 | if (!_entries[pos]) |
127 | _entries[pos] = new HashEntry(key, value); |
128 | else |
129 | { |
130 | UInt32 origHash = hsh; |
131 | while (_entries[hsh % _capacity]) |
132 | { |
133 | if (_entries[hsh % _capacity]->key == key) |
134 | throw ExistsException(); |
135 | if (hsh - origHash > _capacity) |
136 | throw PoolOverflowException("SimpleHashTable full" ); |
137 | hsh++; |
138 | } |
139 | pos = hsh % _capacity; |
140 | _entries[pos] = new HashEntry(key, value); |
141 | } |
142 | _size++; |
143 | return _entries[pos]->value; |
144 | } |
145 | |
146 | UInt32 update(const Key& key, const Value& value) |
147 | /// Returns the hash value of the inserted item. |
148 | /// Replaces an existing entry if it finds one |
149 | { |
150 | UInt32 hsh = hash(key); |
151 | updateRaw(key, hsh, value); |
152 | return hsh; |
153 | } |
154 | |
155 | void updateRaw(const Key& key, UInt32 hsh, const Value& value) |
156 | /// Returns the hash value of the inserted item. |
157 | /// Replaces an existing entry if it finds one |
158 | { |
159 | if (!_entries[hsh]) |
160 | _entries[hsh] = new HashEntry(key, value); |
161 | else |
162 | { |
163 | UInt32 origHash = hsh; |
164 | while (_entries[hsh % _capacity]) |
165 | { |
166 | if (_entries[hsh % _capacity]->key == key) |
167 | { |
168 | _entries[hsh % _capacity]->value = value; |
169 | return; |
170 | } |
171 | if (hsh - origHash > _capacity) |
172 | throw PoolOverflowException("SimpleHashTable full" ); |
173 | hsh++; |
174 | } |
175 | _entries[hsh % _capacity] = new HashEntry(key, value); |
176 | } |
177 | _size++; |
178 | } |
179 | |
180 | UInt32 hash(const Key& key) const |
181 | { |
182 | return _hash(key, _capacity); |
183 | } |
184 | |
185 | const Value& get(const Key& key) const |
186 | /// Throws an exception if the value does not exist |
187 | { |
188 | UInt32 hsh = hash(key); |
189 | return getRaw(key, hsh); |
190 | } |
191 | |
192 | const Value& getRaw(const Key& key, UInt32 hsh) const |
193 | /// Throws an exception if the value does not exist |
194 | { |
195 | UInt32 origHash = hsh; |
196 | while (true) |
197 | { |
198 | if (_entries[hsh % _capacity]) |
199 | { |
200 | if (_entries[hsh % _capacity]->key == key) |
201 | { |
202 | return _entries[hsh % _capacity]->value; |
203 | } |
204 | } |
205 | else |
206 | throw InvalidArgumentException("value not found" ); |
207 | if (hsh - origHash > _capacity) |
208 | throw InvalidArgumentException("value not found" ); |
209 | hsh++; |
210 | } |
211 | } |
212 | |
213 | Value& get(const Key& key) |
214 | /// Throws an exception if the value does not exist |
215 | { |
216 | UInt32 hsh = hash(key); |
217 | return const_cast<Value&>(getRaw(key, hsh)); |
218 | } |
219 | |
220 | const Value& operator [] (const Key& key) const |
221 | { |
222 | return get(key); |
223 | } |
224 | |
225 | Value& operator [] (const Key& key) |
226 | { |
227 | UInt32 hsh = hash(key); |
228 | UInt32 origHash = hsh; |
229 | while (true) |
230 | { |
231 | if (_entries[hsh % _capacity]) |
232 | { |
233 | if (_entries[hsh % _capacity]->key == key) |
234 | { |
235 | return _entries[hsh % _capacity]->value; |
236 | } |
237 | } |
238 | else return insertRaw(key, hsh, Value()); |
239 | if (hsh - origHash > _capacity) |
240 | return insertRaw(key, hsh, Value()); |
241 | hsh++; |
242 | } |
243 | } |
244 | |
245 | const Key& getKeyRaw(const Key& key, UInt32 hsh) |
246 | /// Throws an exception if the key does not exist. returns a reference to the internally |
247 | /// stored key. Useful when someone does an insert and wants for performance reason only to store |
248 | /// a pointer to the key in another collection |
249 | { |
250 | UInt32 origHash = hsh; |
251 | while (true) |
252 | { |
253 | if (_entries[hsh % _capacity]) |
254 | { |
255 | if (_entries[hsh % _capacity]->key == key) |
256 | { |
257 | return _entries[hsh % _capacity]->key; |
258 | } |
259 | } |
260 | else |
261 | throw InvalidArgumentException("key not found" ); |
262 | |
263 | if (hsh - origHash > _capacity) |
264 | throw InvalidArgumentException("key not found" ); |
265 | hsh++; |
266 | } |
267 | } |
268 | |
269 | bool get(const Key& key, Value& v) const |
270 | /// Sets v to the found value, returns false if no value was found |
271 | { |
272 | UInt32 hsh = hash(key); |
273 | return getRaw(key, hsh, v); |
274 | } |
275 | |
276 | bool getRaw(const Key& key, UInt32 hsh, Value& v) const |
277 | /// Sets v to the found value, returns false if no value was found |
278 | { |
279 | UInt32 origHash = hsh; |
280 | while (true) |
281 | { |
282 | if (_entries[hsh % _capacity]) |
283 | { |
284 | if (_entries[hsh % _capacity]->key == key) |
285 | { |
286 | v = _entries[hsh % _capacity]->value; |
287 | return true; |
288 | } |
289 | } |
290 | else |
291 | return false; |
292 | if (hsh - origHash > _capacity) |
293 | return false; |
294 | hsh++; |
295 | } |
296 | } |
297 | |
298 | bool exists(const Key& key) const |
299 | { |
300 | UInt32 hsh = hash(key); |
301 | return existsRaw(key, hsh); |
302 | } |
303 | |
304 | bool existsRaw(const Key& key, UInt32 hsh) const |
305 | { |
306 | UInt32 origHash = hsh; |
307 | while (true) |
308 | { |
309 | if (_entries[hsh % _capacity]) |
310 | { |
311 | if (_entries[hsh % _capacity]->key == key) |
312 | { |
313 | return true; |
314 | } |
315 | } |
316 | else |
317 | return false; |
318 | if (hsh - origHash > _capacity) |
319 | return false; |
320 | hsh++; |
321 | } |
322 | } |
323 | |
324 | std::size_t size() const |
325 | /// Returns the number of elements already inserted into the SimpleHashTable |
326 | { |
327 | return _size; |
328 | } |
329 | |
330 | UInt32 capacity() const |
331 | { |
332 | return _capacity; |
333 | } |
334 | |
335 | void resize(UInt32 newSize) |
336 | /// Resizes the hashtable, rehashes all existing entries. Expensive! |
337 | { |
338 | if (_capacity != newSize) |
339 | { |
340 | SimpleHashTable tmp(newSize); |
341 | swap(tmp); |
342 | for (typename HashTableVector::const_iterator it = tmp._entries.begin(); it != tmp._entries.end(); ++it) |
343 | { |
344 | if (*it) |
345 | { |
346 | insertRaw((*it)->key, hash((*it)->key), (*it)->value); |
347 | } |
348 | } |
349 | } |
350 | } |
351 | |
352 | HashStatistic currentState(bool details = false) const |
353 | /// Returns the current internal state |
354 | { |
355 | UInt32 numberOfEntries = (UInt32)_size; |
356 | UInt32 numZeroEntries = 0; |
357 | UInt32 maxEntriesPerHash = 0; |
358 | std::vector<UInt32> detailedEntriesPerHash; |
359 | #ifdef _DEBUG |
360 | UInt32 totalSize = 0; |
361 | #endif |
362 | for (int i=0; i < _capacity; ++i) |
363 | { |
364 | if (_entries[i]) |
365 | { |
366 | maxEntriesPerHash = 1; |
367 | UInt32 size = 1; |
368 | if (details) |
369 | detailedEntriesPerHash.push_back(size); |
370 | #ifdef _DEBUG |
371 | totalSize += size; |
372 | #endif |
373 | } |
374 | else |
375 | { |
376 | numZeroEntries++; |
377 | if (details) |
378 | detailedEntriesPerHash.push_back(0); |
379 | } |
380 | } |
381 | #ifdef _DEBUG |
382 | poco_assert_dbg(totalSize == numberOfEntries); |
383 | #endif |
384 | return HashStatistic(_capacity, numberOfEntries, numZeroEntries, maxEntriesPerHash, detailedEntriesPerHash); |
385 | } |
386 | |
387 | private: |
388 | HashTableVector _entries; |
389 | std::size_t _size; |
390 | UInt32 _capacity; |
391 | KeyHashFunction _hash; |
392 | }; |
393 | |
394 | |
395 | } // namespace Poco |
396 | |
397 | |
398 | #endif // Foundation_HashTable_INCLUDED |
399 | |