1 | // |
2 | // LinearHashTable.h |
3 | // |
4 | // Library: Foundation |
5 | // Package: Hashing |
6 | // Module: LinearHashTable |
7 | // |
8 | // Definition of the LinearHashTable class. |
9 | // |
10 | // Copyright (c) 2006, Applied Informatics Software Engineering GmbH. |
11 | // and Contributors. |
12 | // |
13 | // SPDX-License-Identifier: BSL-1.0 |
14 | // |
15 | |
16 | |
17 | #ifndef Foundation_LinearHashTable_INCLUDED |
18 | #define Foundation_LinearHashTable_INCLUDED |
19 | |
20 | |
21 | #include "Poco/Foundation.h" |
22 | #include "Poco/Hash.h" |
23 | #include <functional> |
24 | #include <algorithm> |
25 | #include <vector> |
26 | #include <utility> |
27 | #include <cstddef> |
28 | |
29 | |
30 | namespace Poco { |
31 | |
32 | |
33 | template <class Value, class HashFunc = Hash<Value> > |
34 | class LinearHashTable |
35 | /// This class implements a linear hash table. |
36 | /// |
37 | /// In a linear hash table, the available address space |
38 | /// grows or shrinks dynamically. A linear hash table thus |
39 | /// supports any number of insertions or deletions without |
40 | /// lookup or insertion performance deterioration. |
41 | /// |
42 | /// Linear hashing was discovered by Witold Litwin in 1980 |
43 | /// and described in the paper LINEAR HASHING: A NEW TOOL FOR FILE AND TABLE ADDRESSING. |
44 | /// |
45 | /// For more information on linear hashing, see <http://en.wikipedia.org/wiki/Linear_hash>. |
46 | /// |
47 | /// The LinearHashTable is not thread safe. |
48 | /// |
49 | /// Value must support comparison for equality. |
50 | /// |
51 | /// Find, insert and delete operations are basically O(1) with regard |
52 | /// to the total number of elements in the table, and O(N) with regard |
53 | /// to the number of elements in the bucket where the element is stored. |
54 | /// On average, every bucket stores one element; the exact number depends |
55 | /// on the quality of the hash function. In most cases, the maximum number of |
56 | /// elements in a bucket should not exceed 3. |
57 | { |
58 | public: |
59 | typedef Value ValueType; |
60 | typedef Value& Reference; |
61 | typedef const Value& ConstReference; |
62 | typedef Value* Pointer; |
63 | typedef const Value* ConstPointer; |
64 | typedef HashFunc Hash; |
65 | typedef std::vector<Value> Bucket; |
66 | typedef std::vector<Bucket> BucketVec; |
67 | typedef typename Bucket::iterator BucketIterator; |
68 | typedef typename BucketVec::iterator BucketVecIterator; |
69 | |
70 | class ConstIterator: public std::iterator<std::forward_iterator_tag, Value> |
71 | { |
72 | public: |
73 | ConstIterator(): _initialized(false) |
74 | { |
75 | } |
76 | |
77 | ConstIterator(const BucketVecIterator& vecIt, const BucketVecIterator& endIt, const BucketIterator& buckIt): |
78 | _vecIt(vecIt), |
79 | _endIt(endIt), |
80 | _buckIt(buckIt), |
81 | _initialized(true) |
82 | { |
83 | } |
84 | |
85 | ConstIterator(const ConstIterator& it): |
86 | _vecIt(it._vecIt), |
87 | _endIt(it._endIt), |
88 | _buckIt(it._buckIt), |
89 | _initialized(it._initialized) |
90 | |
91 | { |
92 | } |
93 | |
94 | ConstIterator& operator = (const ConstIterator& it) |
95 | { |
96 | ConstIterator tmp(it); |
97 | swap(tmp); |
98 | return *this; |
99 | } |
100 | |
101 | void swap(ConstIterator& it) |
102 | { |
103 | using std::swap; |
104 | // uninitialized iterators crash when swapped |
105 | if (_initialized) |
106 | { |
107 | swap(_vecIt, it._vecIt); |
108 | swap(_endIt, it._endIt); |
109 | swap(_buckIt, it._buckIt); |
110 | swap(_initialized, it._initialized); |
111 | } |
112 | else |
113 | { |
114 | _vecIt = it._vecIt; |
115 | _endIt = it._endIt; |
116 | _buckIt = it._buckIt; |
117 | _initialized = it._initialized; |
118 | } |
119 | } |
120 | |
121 | bool operator == (const ConstIterator& it) const |
122 | { |
123 | return _vecIt == it._vecIt && (_vecIt == _endIt || _buckIt == it._buckIt); |
124 | } |
125 | |
126 | bool operator != (const ConstIterator& it) const |
127 | { |
128 | return _vecIt != it._vecIt || (_vecIt != _endIt && _buckIt != it._buckIt); |
129 | } |
130 | |
131 | const typename Bucket::value_type& operator * () const |
132 | { |
133 | return *_buckIt; |
134 | } |
135 | |
136 | const typename Bucket::value_type* operator -> () const |
137 | { |
138 | return &*_buckIt; |
139 | } |
140 | |
141 | ConstIterator& operator ++ () // prefix |
142 | { |
143 | if (_vecIt != _endIt) |
144 | { |
145 | ++_buckIt; |
146 | while (_vecIt != _endIt && _buckIt == _vecIt->end()) |
147 | { |
148 | ++_vecIt; |
149 | if (_vecIt != _endIt) _buckIt = _vecIt->begin(); |
150 | } |
151 | } |
152 | return *this; |
153 | } |
154 | |
155 | ConstIterator operator ++ (int) // postfix |
156 | { |
157 | ConstIterator tmp(*this); |
158 | ++*this; |
159 | return tmp; |
160 | } |
161 | |
162 | protected: |
163 | BucketVecIterator _vecIt; |
164 | BucketVecIterator _endIt; |
165 | BucketIterator _buckIt; |
166 | bool _initialized; |
167 | |
168 | friend class LinearHashTable; |
169 | }; |
170 | |
171 | class Iterator: public ConstIterator |
172 | { |
173 | public: |
174 | Iterator() |
175 | { |
176 | } |
177 | |
178 | Iterator(const BucketVecIterator& vecIt, const BucketVecIterator& endIt, const BucketIterator& buckIt): |
179 | ConstIterator(vecIt, endIt, buckIt) |
180 | { |
181 | } |
182 | |
183 | Iterator(const Iterator& it): |
184 | ConstIterator(it) |
185 | { |
186 | } |
187 | |
188 | Iterator& operator = (const Iterator& it) |
189 | { |
190 | Iterator tmp(it); |
191 | ConstIterator::swap(tmp); |
192 | return *this; |
193 | } |
194 | |
195 | void swap(Iterator& it) |
196 | { |
197 | ConstIterator::swap(it); |
198 | } |
199 | |
200 | typename Bucket::value_type& operator * () |
201 | { |
202 | return *this->_buckIt; |
203 | } |
204 | |
205 | const typename Bucket::value_type& operator * () const |
206 | { |
207 | return *this->_buckIt; |
208 | } |
209 | |
210 | typename Bucket::value_type* operator -> () |
211 | { |
212 | return &*this->_buckIt; |
213 | } |
214 | |
215 | const typename Bucket::value_type* operator -> () const |
216 | { |
217 | return &*this->_buckIt; |
218 | } |
219 | |
220 | Iterator& operator ++ () // prefix |
221 | { |
222 | ConstIterator::operator ++ (); |
223 | return *this; |
224 | } |
225 | |
226 | Iterator operator ++ (int) // postfix |
227 | { |
228 | Iterator tmp(*this); |
229 | ++*this; |
230 | return tmp; |
231 | } |
232 | |
233 | friend class LinearHashTable; |
234 | }; |
235 | |
236 | LinearHashTable(std::size_t initialReserve = 64): |
237 | _split(0), |
238 | _front(1), |
239 | _size(0) |
240 | /// Creates the LinearHashTable, using the given initialReserve. |
241 | { |
242 | _buckets.reserve(calcSize(initialReserve)); |
243 | _buckets.push_back(Bucket()); |
244 | } |
245 | |
246 | LinearHashTable(const LinearHashTable& table): |
247 | _buckets(table._buckets), |
248 | _split(table._split), |
249 | _front(table._front), |
250 | _size(table._size) |
251 | /// Creates the LinearHashTable by copying another one. |
252 | { |
253 | } |
254 | |
255 | ~LinearHashTable() |
256 | /// Destroys the LinearHashTable. |
257 | { |
258 | } |
259 | |
260 | LinearHashTable& operator = (const LinearHashTable& table) |
261 | /// Assigns another LinearHashTable. |
262 | { |
263 | LinearHashTable tmp(table); |
264 | swap(tmp); |
265 | return *this; |
266 | } |
267 | |
268 | void swap(LinearHashTable& table) |
269 | /// Swaps the LinearHashTable with another one. |
270 | { |
271 | using std::swap; |
272 | swap(_buckets, table._buckets); |
273 | swap(_split, table._split); |
274 | swap(_front, table._front); |
275 | swap(_size, table._size); |
276 | } |
277 | |
278 | ConstIterator begin() const |
279 | /// Returns an iterator pointing to the first entry, if one exists. |
280 | { |
281 | BucketVecIterator it(_buckets.begin()); |
282 | BucketVecIterator itEnd(_buckets.end()); |
283 | while (it != itEnd && it->empty()) |
284 | { |
285 | ++it; |
286 | } |
287 | if (it == itEnd) |
288 | return end(); |
289 | else |
290 | return ConstIterator(it, itEnd, it->begin()); |
291 | } |
292 | |
293 | ConstIterator end() const |
294 | /// Returns an iterator pointing to the end of the table. |
295 | { |
296 | return ConstIterator(_buckets.end(), _buckets.end(), _buckets.front().end()); |
297 | } |
298 | |
299 | Iterator begin() |
300 | /// Returns an iterator pointing to the first entry, if one exists. |
301 | { |
302 | BucketVecIterator it(_buckets.begin()); |
303 | BucketVecIterator itEnd(_buckets.end()); |
304 | while (it != itEnd && it->empty()) |
305 | { |
306 | ++it; |
307 | } |
308 | if (it == itEnd) |
309 | return end(); |
310 | else |
311 | return Iterator(it, itEnd, it->begin()); |
312 | } |
313 | |
314 | Iterator end() |
315 | /// Returns an iterator pointing to the end of the table. |
316 | { |
317 | return Iterator(_buckets.end(), _buckets.end(), _buckets.front().end()); |
318 | } |
319 | |
320 | ConstIterator find(const Value& value) const |
321 | /// Finds an entry in the table. |
322 | { |
323 | std::size_t addr = bucketAddress(value); |
324 | BucketVecIterator it(_buckets.begin() + addr); |
325 | BucketIterator buckIt(std::find(it->begin(), it->end(), value)); |
326 | if (buckIt != it->end()) |
327 | return ConstIterator(it, _buckets.end(), buckIt); |
328 | else |
329 | return end(); |
330 | } |
331 | |
332 | Iterator find(const Value& value) |
333 | /// Finds an entry in the table. |
334 | { |
335 | std::size_t addr = bucketAddress(value); |
336 | BucketVecIterator it(_buckets.begin() + addr); |
337 | BucketIterator buckIt(std::find(it->begin(), it->end(), value)); |
338 | if (buckIt != it->end()) |
339 | return Iterator(it, _buckets.end(), buckIt); |
340 | else |
341 | return end(); |
342 | } |
343 | |
344 | std::size_t count(const Value& value) const |
345 | /// Returns the number of elements with the given |
346 | /// value, with is either 1 or 0. |
347 | { |
348 | return find(value) != end() ? 1 : 0; |
349 | } |
350 | |
351 | std::pair<Iterator, bool> insert(const Value& value) |
352 | /// Inserts an element into the table. |
353 | /// |
354 | /// If the element already exists in the table, |
355 | /// a pair(iterator, false) with iterator pointing to the |
356 | /// existing element is returned. |
357 | /// Otherwise, the element is inserted an a |
358 | /// pair(iterator, true) with iterator |
359 | /// pointing to the new element is returned. |
360 | { |
361 | std::size_t hash = _hash(value); |
362 | std::size_t addr = bucketAddressForHash(hash); |
363 | BucketVecIterator it(_buckets.begin() + addr); |
364 | BucketIterator buckIt(std::find(it->begin(), it->end(), value)); |
365 | if (buckIt == it->end()) |
366 | { |
367 | split(); |
368 | addr = bucketAddressForHash(hash); |
369 | it = _buckets.begin() + addr; |
370 | buckIt = it->insert(it->end(), value); |
371 | ++_size; |
372 | return std::make_pair(Iterator(it, _buckets.end(), buckIt), true); |
373 | } |
374 | else |
375 | { |
376 | return std::make_pair(Iterator(it, _buckets.end(), buckIt), false); |
377 | } |
378 | } |
379 | |
380 | void erase(Iterator it) |
381 | /// Erases the element pointed to by it. |
382 | { |
383 | if (it != end()) |
384 | { |
385 | it._vecIt->erase(it._buckIt); |
386 | --_size; |
387 | merge(); |
388 | } |
389 | } |
390 | |
391 | void erase(const Value& value) |
392 | /// Erases the element with the given value, if it exists. |
393 | { |
394 | Iterator it = find(value); |
395 | erase(it); |
396 | } |
397 | |
398 | void clear() |
399 | /// Erases all elements. |
400 | { |
401 | LinearHashTable emptyTable; |
402 | swap(emptyTable); |
403 | } |
404 | |
405 | std::size_t size() const |
406 | /// Returns the number of elements in the table. |
407 | { |
408 | return _size; |
409 | } |
410 | |
411 | bool empty() const |
412 | /// Returns true iff the table is empty. |
413 | { |
414 | return _size == 0; |
415 | } |
416 | |
417 | std::size_t buckets() const |
418 | /// Returns the number of allocated buckets. |
419 | { |
420 | return _buckets.size(); |
421 | } |
422 | |
423 | protected: |
424 | std::size_t bucketAddress(const Value& value) const |
425 | { |
426 | std::size_t n = _hash(value); |
427 | if (n % _front >= _split) |
428 | return n % _front; |
429 | else |
430 | return n % (2*_front); |
431 | } |
432 | |
433 | std::size_t bucketAddressForHash(std::size_t hash) |
434 | { |
435 | if (hash % _front >= _split) |
436 | return hash % _front; |
437 | else |
438 | return hash % (2*_front); |
439 | } |
440 | |
441 | void split() |
442 | { |
443 | if (_split == _front) |
444 | { |
445 | _split = 0; |
446 | _front *= 2; |
447 | _buckets.reserve(_front*2); |
448 | } |
449 | Bucket tmp; |
450 | _buckets.push_back(tmp); |
451 | _buckets[_split].swap(tmp); |
452 | ++_split; |
453 | for (BucketIterator it = tmp.begin(); it != tmp.end(); ++it) |
454 | { |
455 | using std::swap; |
456 | std::size_t addr = bucketAddress(*it); |
457 | _buckets[addr].push_back(Value()); |
458 | swap(*it, _buckets[addr].back()); |
459 | } |
460 | } |
461 | |
462 | void merge() |
463 | { |
464 | if (_split == 0) |
465 | { |
466 | _front /= 2; |
467 | _split = _front; |
468 | } |
469 | --_split; |
470 | Bucket tmp; |
471 | tmp.swap(_buckets.back()); |
472 | _buckets.pop_back(); |
473 | for (BucketIterator it = tmp.begin(); it != tmp.end(); ++it) |
474 | { |
475 | using std::swap; |
476 | std::size_t addr = bucketAddress(*it); |
477 | _buckets[addr].push_back(Value()); |
478 | swap(*it, _buckets[addr].back()); |
479 | } |
480 | } |
481 | |
482 | static std::size_t calcSize(std::size_t initialSize) |
483 | { |
484 | std::size_t size = 32; |
485 | while (size < initialSize) size *= 2; |
486 | return size; |
487 | } |
488 | |
489 | private: |
490 | // Evil hack: _buckets must be mutable because both ConstIterator and Iterator hold |
491 | // ordinary iterator's (not const_iterator's). |
492 | mutable BucketVec _buckets; |
493 | std::size_t _split; |
494 | std::size_t _front; |
495 | std::size_t _size; |
496 | HashFunc _hash; |
497 | }; |
498 | |
499 | |
500 | } // namespace Poco |
501 | |
502 | |
503 | #endif // Foundation_LinearHashTable_INCLUDED |
504 | |