1//
2// LinearHashTable.h
3//
4// Library: Foundation
5// Package: Hashing
6// Module: LinearHashTable
7//
8// Definition of the LinearHashTable class.
9//
10// Copyright (c) 2006, Applied Informatics Software Engineering GmbH.
11// and Contributors.
12//
13// SPDX-License-Identifier: BSL-1.0
14//
15
16
17#ifndef Foundation_LinearHashTable_INCLUDED
18#define Foundation_LinearHashTable_INCLUDED
19
20
21#include "Poco/Foundation.h"
22#include "Poco/Hash.h"
23#include <functional>
24#include <algorithm>
25#include <vector>
26#include <utility>
27#include <cstddef>
28
29
30namespace Poco {
31
32
33template <class Value, class HashFunc = Hash<Value> >
34class LinearHashTable
35 /// This class implements a linear hash table.
36 ///
37 /// In a linear hash table, the available address space
38 /// grows or shrinks dynamically. A linear hash table thus
39 /// supports any number of insertions or deletions without
40 /// lookup or insertion performance deterioration.
41 ///
42 /// Linear hashing was discovered by Witold Litwin in 1980
43 /// and described in the paper LINEAR HASHING: A NEW TOOL FOR FILE AND TABLE ADDRESSING.
44 ///
45 /// For more information on linear hashing, see <http://en.wikipedia.org/wiki/Linear_hash>.
46 ///
47 /// The LinearHashTable is not thread safe.
48 ///
49 /// Value must support comparison for equality.
50 ///
51 /// Find, insert and delete operations are basically O(1) with regard
52 /// to the total number of elements in the table, and O(N) with regard
53 /// to the number of elements in the bucket where the element is stored.
54 /// On average, every bucket stores one element; the exact number depends
55 /// on the quality of the hash function. In most cases, the maximum number of
56 /// elements in a bucket should not exceed 3.
57{
58public:
59 typedef Value ValueType;
60 typedef Value& Reference;
61 typedef const Value& ConstReference;
62 typedef Value* Pointer;
63 typedef const Value* ConstPointer;
64 typedef HashFunc Hash;
65 typedef std::vector<Value> Bucket;
66 typedef std::vector<Bucket> BucketVec;
67 typedef typename Bucket::iterator BucketIterator;
68 typedef typename BucketVec::iterator BucketVecIterator;
69
70 class ConstIterator: public std::iterator<std::forward_iterator_tag, Value>
71 {
72 public:
73 ConstIterator(): _initialized(false)
74 {
75 }
76
77 ConstIterator(const BucketVecIterator& vecIt, const BucketVecIterator& endIt, const BucketIterator& buckIt):
78 _vecIt(vecIt),
79 _endIt(endIt),
80 _buckIt(buckIt),
81 _initialized(true)
82 {
83 }
84
85 ConstIterator(const ConstIterator& it):
86 _vecIt(it._vecIt),
87 _endIt(it._endIt),
88 _buckIt(it._buckIt),
89 _initialized(it._initialized)
90
91 {
92 }
93
94 ConstIterator& operator = (const ConstIterator& it)
95 {
96 ConstIterator tmp(it);
97 swap(tmp);
98 return *this;
99 }
100
101 void swap(ConstIterator& it)
102 {
103 using std::swap;
104 // uninitialized iterators crash when swapped
105 if (_initialized)
106 {
107 swap(_vecIt, it._vecIt);
108 swap(_endIt, it._endIt);
109 swap(_buckIt, it._buckIt);
110 swap(_initialized, it._initialized);
111 }
112 else
113 {
114 _vecIt = it._vecIt;
115 _endIt = it._endIt;
116 _buckIt = it._buckIt;
117 _initialized = it._initialized;
118 }
119 }
120
121 bool operator == (const ConstIterator& it) const
122 {
123 return _vecIt == it._vecIt && (_vecIt == _endIt || _buckIt == it._buckIt);
124 }
125
126 bool operator != (const ConstIterator& it) const
127 {
128 return _vecIt != it._vecIt || (_vecIt != _endIt && _buckIt != it._buckIt);
129 }
130
131 const typename Bucket::value_type& operator * () const
132 {
133 return *_buckIt;
134 }
135
136 const typename Bucket::value_type* operator -> () const
137 {
138 return &*_buckIt;
139 }
140
141 ConstIterator& operator ++ () // prefix
142 {
143 if (_vecIt != _endIt)
144 {
145 ++_buckIt;
146 while (_vecIt != _endIt && _buckIt == _vecIt->end())
147 {
148 ++_vecIt;
149 if (_vecIt != _endIt) _buckIt = _vecIt->begin();
150 }
151 }
152 return *this;
153 }
154
155 ConstIterator operator ++ (int) // postfix
156 {
157 ConstIterator tmp(*this);
158 ++*this;
159 return tmp;
160 }
161
162 protected:
163 BucketVecIterator _vecIt;
164 BucketVecIterator _endIt;
165 BucketIterator _buckIt;
166 bool _initialized;
167
168 friend class LinearHashTable;
169 };
170
171 class Iterator: public ConstIterator
172 {
173 public:
174 Iterator()
175 {
176 }
177
178 Iterator(const BucketVecIterator& vecIt, const BucketVecIterator& endIt, const BucketIterator& buckIt):
179 ConstIterator(vecIt, endIt, buckIt)
180 {
181 }
182
183 Iterator(const Iterator& it):
184 ConstIterator(it)
185 {
186 }
187
188 Iterator& operator = (const Iterator& it)
189 {
190 Iterator tmp(it);
191 ConstIterator::swap(tmp);
192 return *this;
193 }
194
195 void swap(Iterator& it)
196 {
197 ConstIterator::swap(it);
198 }
199
200 typename Bucket::value_type& operator * ()
201 {
202 return *this->_buckIt;
203 }
204
205 const typename Bucket::value_type& operator * () const
206 {
207 return *this->_buckIt;
208 }
209
210 typename Bucket::value_type* operator -> ()
211 {
212 return &*this->_buckIt;
213 }
214
215 const typename Bucket::value_type* operator -> () const
216 {
217 return &*this->_buckIt;
218 }
219
220 Iterator& operator ++ () // prefix
221 {
222 ConstIterator::operator ++ ();
223 return *this;
224 }
225
226 Iterator operator ++ (int) // postfix
227 {
228 Iterator tmp(*this);
229 ++*this;
230 return tmp;
231 }
232
233 friend class LinearHashTable;
234 };
235
236 LinearHashTable(std::size_t initialReserve = 64):
237 _split(0),
238 _front(1),
239 _size(0)
240 /// Creates the LinearHashTable, using the given initialReserve.
241 {
242 _buckets.reserve(calcSize(initialReserve));
243 _buckets.push_back(Bucket());
244 }
245
246 LinearHashTable(const LinearHashTable& table):
247 _buckets(table._buckets),
248 _split(table._split),
249 _front(table._front),
250 _size(table._size)
251 /// Creates the LinearHashTable by copying another one.
252 {
253 }
254
255 ~LinearHashTable()
256 /// Destroys the LinearHashTable.
257 {
258 }
259
260 LinearHashTable& operator = (const LinearHashTable& table)
261 /// Assigns another LinearHashTable.
262 {
263 LinearHashTable tmp(table);
264 swap(tmp);
265 return *this;
266 }
267
268 void swap(LinearHashTable& table)
269 /// Swaps the LinearHashTable with another one.
270 {
271 using std::swap;
272 swap(_buckets, table._buckets);
273 swap(_split, table._split);
274 swap(_front, table._front);
275 swap(_size, table._size);
276 }
277
278 ConstIterator begin() const
279 /// Returns an iterator pointing to the first entry, if one exists.
280 {
281 BucketVecIterator it(_buckets.begin());
282 BucketVecIterator itEnd(_buckets.end());
283 while (it != itEnd && it->empty())
284 {
285 ++it;
286 }
287 if (it == itEnd)
288 return end();
289 else
290 return ConstIterator(it, itEnd, it->begin());
291 }
292
293 ConstIterator end() const
294 /// Returns an iterator pointing to the end of the table.
295 {
296 return ConstIterator(_buckets.end(), _buckets.end(), _buckets.front().end());
297 }
298
299 Iterator begin()
300 /// Returns an iterator pointing to the first entry, if one exists.
301 {
302 BucketVecIterator it(_buckets.begin());
303 BucketVecIterator itEnd(_buckets.end());
304 while (it != itEnd && it->empty())
305 {
306 ++it;
307 }
308 if (it == itEnd)
309 return end();
310 else
311 return Iterator(it, itEnd, it->begin());
312 }
313
314 Iterator end()
315 /// Returns an iterator pointing to the end of the table.
316 {
317 return Iterator(_buckets.end(), _buckets.end(), _buckets.front().end());
318 }
319
320 ConstIterator find(const Value& value) const
321 /// Finds an entry in the table.
322 {
323 std::size_t addr = bucketAddress(value);
324 BucketVecIterator it(_buckets.begin() + addr);
325 BucketIterator buckIt(std::find(it->begin(), it->end(), value));
326 if (buckIt != it->end())
327 return ConstIterator(it, _buckets.end(), buckIt);
328 else
329 return end();
330 }
331
332 Iterator find(const Value& value)
333 /// Finds an entry in the table.
334 {
335 std::size_t addr = bucketAddress(value);
336 BucketVecIterator it(_buckets.begin() + addr);
337 BucketIterator buckIt(std::find(it->begin(), it->end(), value));
338 if (buckIt != it->end())
339 return Iterator(it, _buckets.end(), buckIt);
340 else
341 return end();
342 }
343
344 std::size_t count(const Value& value) const
345 /// Returns the number of elements with the given
346 /// value, with is either 1 or 0.
347 {
348 return find(value) != end() ? 1 : 0;
349 }
350
351 std::pair<Iterator, bool> insert(const Value& value)
352 /// Inserts an element into the table.
353 ///
354 /// If the element already exists in the table,
355 /// a pair(iterator, false) with iterator pointing to the
356 /// existing element is returned.
357 /// Otherwise, the element is inserted an a
358 /// pair(iterator, true) with iterator
359 /// pointing to the new element is returned.
360 {
361 std::size_t hash = _hash(value);
362 std::size_t addr = bucketAddressForHash(hash);
363 BucketVecIterator it(_buckets.begin() + addr);
364 BucketIterator buckIt(std::find(it->begin(), it->end(), value));
365 if (buckIt == it->end())
366 {
367 split();
368 addr = bucketAddressForHash(hash);
369 it = _buckets.begin() + addr;
370 buckIt = it->insert(it->end(), value);
371 ++_size;
372 return std::make_pair(Iterator(it, _buckets.end(), buckIt), true);
373 }
374 else
375 {
376 return std::make_pair(Iterator(it, _buckets.end(), buckIt), false);
377 }
378 }
379
380 void erase(Iterator it)
381 /// Erases the element pointed to by it.
382 {
383 if (it != end())
384 {
385 it._vecIt->erase(it._buckIt);
386 --_size;
387 merge();
388 }
389 }
390
391 void erase(const Value& value)
392 /// Erases the element with the given value, if it exists.
393 {
394 Iterator it = find(value);
395 erase(it);
396 }
397
398 void clear()
399 /// Erases all elements.
400 {
401 LinearHashTable emptyTable;
402 swap(emptyTable);
403 }
404
405 std::size_t size() const
406 /// Returns the number of elements in the table.
407 {
408 return _size;
409 }
410
411 bool empty() const
412 /// Returns true iff the table is empty.
413 {
414 return _size == 0;
415 }
416
417 std::size_t buckets() const
418 /// Returns the number of allocated buckets.
419 {
420 return _buckets.size();
421 }
422
423protected:
424 std::size_t bucketAddress(const Value& value) const
425 {
426 std::size_t n = _hash(value);
427 if (n % _front >= _split)
428 return n % _front;
429 else
430 return n % (2*_front);
431 }
432
433 std::size_t bucketAddressForHash(std::size_t hash)
434 {
435 if (hash % _front >= _split)
436 return hash % _front;
437 else
438 return hash % (2*_front);
439 }
440
441 void split()
442 {
443 if (_split == _front)
444 {
445 _split = 0;
446 _front *= 2;
447 _buckets.reserve(_front*2);
448 }
449 Bucket tmp;
450 _buckets.push_back(tmp);
451 _buckets[_split].swap(tmp);
452 ++_split;
453 for (BucketIterator it = tmp.begin(); it != tmp.end(); ++it)
454 {
455 using std::swap;
456 std::size_t addr = bucketAddress(*it);
457 _buckets[addr].push_back(Value());
458 swap(*it, _buckets[addr].back());
459 }
460 }
461
462 void merge()
463 {
464 if (_split == 0)
465 {
466 _front /= 2;
467 _split = _front;
468 }
469 --_split;
470 Bucket tmp;
471 tmp.swap(_buckets.back());
472 _buckets.pop_back();
473 for (BucketIterator it = tmp.begin(); it != tmp.end(); ++it)
474 {
475 using std::swap;
476 std::size_t addr = bucketAddress(*it);
477 _buckets[addr].push_back(Value());
478 swap(*it, _buckets[addr].back());
479 }
480 }
481
482 static std::size_t calcSize(std::size_t initialSize)
483 {
484 std::size_t size = 32;
485 while (size < initialSize) size *= 2;
486 return size;
487 }
488
489private:
490 // Evil hack: _buckets must be mutable because both ConstIterator and Iterator hold
491 // ordinary iterator's (not const_iterator's).
492 mutable BucketVec _buckets;
493 std::size_t _split;
494 std::size_t _front;
495 std::size_t _size;
496 HashFunc _hash;
497};
498
499
500} // namespace Poco
501
502
503#endif // Foundation_LinearHashTable_INCLUDED
504