| 1 | // |
| 2 | // LinearHashTable.h |
| 3 | // |
| 4 | // Library: Foundation |
| 5 | // Package: Hashing |
| 6 | // Module: LinearHashTable |
| 7 | // |
| 8 | // Definition of the LinearHashTable class. |
| 9 | // |
| 10 | // Copyright (c) 2006, Applied Informatics Software Engineering GmbH. |
| 11 | // and Contributors. |
| 12 | // |
| 13 | // SPDX-License-Identifier: BSL-1.0 |
| 14 | // |
| 15 | |
| 16 | |
| 17 | #ifndef Foundation_LinearHashTable_INCLUDED |
| 18 | #define Foundation_LinearHashTable_INCLUDED |
| 19 | |
| 20 | |
| 21 | #include "Poco/Foundation.h" |
| 22 | #include "Poco/Hash.h" |
| 23 | #include <functional> |
| 24 | #include <algorithm> |
| 25 | #include <vector> |
| 26 | #include <utility> |
| 27 | #include <cstddef> |
| 28 | |
| 29 | |
| 30 | namespace Poco { |
| 31 | |
| 32 | |
| 33 | template <class Value, class HashFunc = Hash<Value> > |
| 34 | class LinearHashTable |
| 35 | /// This class implements a linear hash table. |
| 36 | /// |
| 37 | /// In a linear hash table, the available address space |
| 38 | /// grows or shrinks dynamically. A linear hash table thus |
| 39 | /// supports any number of insertions or deletions without |
| 40 | /// lookup or insertion performance deterioration. |
| 41 | /// |
| 42 | /// Linear hashing was discovered by Witold Litwin in 1980 |
| 43 | /// and described in the paper LINEAR HASHING: A NEW TOOL FOR FILE AND TABLE ADDRESSING. |
| 44 | /// |
| 45 | /// For more information on linear hashing, see <http://en.wikipedia.org/wiki/Linear_hash>. |
| 46 | /// |
| 47 | /// The LinearHashTable is not thread safe. |
| 48 | /// |
| 49 | /// Value must support comparison for equality. |
| 50 | /// |
| 51 | /// Find, insert and delete operations are basically O(1) with regard |
| 52 | /// to the total number of elements in the table, and O(N) with regard |
| 53 | /// to the number of elements in the bucket where the element is stored. |
| 54 | /// On average, every bucket stores one element; the exact number depends |
| 55 | /// on the quality of the hash function. In most cases, the maximum number of |
| 56 | /// elements in a bucket should not exceed 3. |
| 57 | { |
| 58 | public: |
| 59 | typedef Value ValueType; |
| 60 | typedef Value& Reference; |
| 61 | typedef const Value& ConstReference; |
| 62 | typedef Value* Pointer; |
| 63 | typedef const Value* ConstPointer; |
| 64 | typedef HashFunc Hash; |
| 65 | typedef std::vector<Value> Bucket; |
| 66 | typedef std::vector<Bucket> BucketVec; |
| 67 | typedef typename Bucket::iterator BucketIterator; |
| 68 | typedef typename BucketVec::iterator BucketVecIterator; |
| 69 | |
| 70 | class ConstIterator: public std::iterator<std::forward_iterator_tag, Value> |
| 71 | { |
| 72 | public: |
| 73 | ConstIterator(): _initialized(false) |
| 74 | { |
| 75 | } |
| 76 | |
| 77 | ConstIterator(const BucketVecIterator& vecIt, const BucketVecIterator& endIt, const BucketIterator& buckIt): |
| 78 | _vecIt(vecIt), |
| 79 | _endIt(endIt), |
| 80 | _buckIt(buckIt), |
| 81 | _initialized(true) |
| 82 | { |
| 83 | } |
| 84 | |
| 85 | ConstIterator(const ConstIterator& it): |
| 86 | _vecIt(it._vecIt), |
| 87 | _endIt(it._endIt), |
| 88 | _buckIt(it._buckIt), |
| 89 | _initialized(it._initialized) |
| 90 | |
| 91 | { |
| 92 | } |
| 93 | |
| 94 | ConstIterator& operator = (const ConstIterator& it) |
| 95 | { |
| 96 | ConstIterator tmp(it); |
| 97 | swap(tmp); |
| 98 | return *this; |
| 99 | } |
| 100 | |
| 101 | void swap(ConstIterator& it) |
| 102 | { |
| 103 | using std::swap; |
| 104 | // uninitialized iterators crash when swapped |
| 105 | if (_initialized) |
| 106 | { |
| 107 | swap(_vecIt, it._vecIt); |
| 108 | swap(_endIt, it._endIt); |
| 109 | swap(_buckIt, it._buckIt); |
| 110 | swap(_initialized, it._initialized); |
| 111 | } |
| 112 | else |
| 113 | { |
| 114 | _vecIt = it._vecIt; |
| 115 | _endIt = it._endIt; |
| 116 | _buckIt = it._buckIt; |
| 117 | _initialized = it._initialized; |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | bool operator == (const ConstIterator& it) const |
| 122 | { |
| 123 | return _vecIt == it._vecIt && (_vecIt == _endIt || _buckIt == it._buckIt); |
| 124 | } |
| 125 | |
| 126 | bool operator != (const ConstIterator& it) const |
| 127 | { |
| 128 | return _vecIt != it._vecIt || (_vecIt != _endIt && _buckIt != it._buckIt); |
| 129 | } |
| 130 | |
| 131 | const typename Bucket::value_type& operator * () const |
| 132 | { |
| 133 | return *_buckIt; |
| 134 | } |
| 135 | |
| 136 | const typename Bucket::value_type* operator -> () const |
| 137 | { |
| 138 | return &*_buckIt; |
| 139 | } |
| 140 | |
| 141 | ConstIterator& operator ++ () // prefix |
| 142 | { |
| 143 | if (_vecIt != _endIt) |
| 144 | { |
| 145 | ++_buckIt; |
| 146 | while (_vecIt != _endIt && _buckIt == _vecIt->end()) |
| 147 | { |
| 148 | ++_vecIt; |
| 149 | if (_vecIt != _endIt) _buckIt = _vecIt->begin(); |
| 150 | } |
| 151 | } |
| 152 | return *this; |
| 153 | } |
| 154 | |
| 155 | ConstIterator operator ++ (int) // postfix |
| 156 | { |
| 157 | ConstIterator tmp(*this); |
| 158 | ++*this; |
| 159 | return tmp; |
| 160 | } |
| 161 | |
| 162 | protected: |
| 163 | BucketVecIterator _vecIt; |
| 164 | BucketVecIterator _endIt; |
| 165 | BucketIterator _buckIt; |
| 166 | bool _initialized; |
| 167 | |
| 168 | friend class LinearHashTable; |
| 169 | }; |
| 170 | |
| 171 | class Iterator: public ConstIterator |
| 172 | { |
| 173 | public: |
| 174 | Iterator() |
| 175 | { |
| 176 | } |
| 177 | |
| 178 | Iterator(const BucketVecIterator& vecIt, const BucketVecIterator& endIt, const BucketIterator& buckIt): |
| 179 | ConstIterator(vecIt, endIt, buckIt) |
| 180 | { |
| 181 | } |
| 182 | |
| 183 | Iterator(const Iterator& it): |
| 184 | ConstIterator(it) |
| 185 | { |
| 186 | } |
| 187 | |
| 188 | Iterator& operator = (const Iterator& it) |
| 189 | { |
| 190 | Iterator tmp(it); |
| 191 | ConstIterator::swap(tmp); |
| 192 | return *this; |
| 193 | } |
| 194 | |
| 195 | void swap(Iterator& it) |
| 196 | { |
| 197 | ConstIterator::swap(it); |
| 198 | } |
| 199 | |
| 200 | typename Bucket::value_type& operator * () |
| 201 | { |
| 202 | return *this->_buckIt; |
| 203 | } |
| 204 | |
| 205 | const typename Bucket::value_type& operator * () const |
| 206 | { |
| 207 | return *this->_buckIt; |
| 208 | } |
| 209 | |
| 210 | typename Bucket::value_type* operator -> () |
| 211 | { |
| 212 | return &*this->_buckIt; |
| 213 | } |
| 214 | |
| 215 | const typename Bucket::value_type* operator -> () const |
| 216 | { |
| 217 | return &*this->_buckIt; |
| 218 | } |
| 219 | |
| 220 | Iterator& operator ++ () // prefix |
| 221 | { |
| 222 | ConstIterator::operator ++ (); |
| 223 | return *this; |
| 224 | } |
| 225 | |
| 226 | Iterator operator ++ (int) // postfix |
| 227 | { |
| 228 | Iterator tmp(*this); |
| 229 | ++*this; |
| 230 | return tmp; |
| 231 | } |
| 232 | |
| 233 | friend class LinearHashTable; |
| 234 | }; |
| 235 | |
| 236 | LinearHashTable(std::size_t initialReserve = 64): |
| 237 | _split(0), |
| 238 | _front(1), |
| 239 | _size(0) |
| 240 | /// Creates the LinearHashTable, using the given initialReserve. |
| 241 | { |
| 242 | _buckets.reserve(calcSize(initialReserve)); |
| 243 | _buckets.push_back(Bucket()); |
| 244 | } |
| 245 | |
| 246 | LinearHashTable(const LinearHashTable& table): |
| 247 | _buckets(table._buckets), |
| 248 | _split(table._split), |
| 249 | _front(table._front), |
| 250 | _size(table._size) |
| 251 | /// Creates the LinearHashTable by copying another one. |
| 252 | { |
| 253 | } |
| 254 | |
| 255 | ~LinearHashTable() |
| 256 | /// Destroys the LinearHashTable. |
| 257 | { |
| 258 | } |
| 259 | |
| 260 | LinearHashTable& operator = (const LinearHashTable& table) |
| 261 | /// Assigns another LinearHashTable. |
| 262 | { |
| 263 | LinearHashTable tmp(table); |
| 264 | swap(tmp); |
| 265 | return *this; |
| 266 | } |
| 267 | |
| 268 | void swap(LinearHashTable& table) |
| 269 | /// Swaps the LinearHashTable with another one. |
| 270 | { |
| 271 | using std::swap; |
| 272 | swap(_buckets, table._buckets); |
| 273 | swap(_split, table._split); |
| 274 | swap(_front, table._front); |
| 275 | swap(_size, table._size); |
| 276 | } |
| 277 | |
| 278 | ConstIterator begin() const |
| 279 | /// Returns an iterator pointing to the first entry, if one exists. |
| 280 | { |
| 281 | BucketVecIterator it(_buckets.begin()); |
| 282 | BucketVecIterator itEnd(_buckets.end()); |
| 283 | while (it != itEnd && it->empty()) |
| 284 | { |
| 285 | ++it; |
| 286 | } |
| 287 | if (it == itEnd) |
| 288 | return end(); |
| 289 | else |
| 290 | return ConstIterator(it, itEnd, it->begin()); |
| 291 | } |
| 292 | |
| 293 | ConstIterator end() const |
| 294 | /// Returns an iterator pointing to the end of the table. |
| 295 | { |
| 296 | return ConstIterator(_buckets.end(), _buckets.end(), _buckets.front().end()); |
| 297 | } |
| 298 | |
| 299 | Iterator begin() |
| 300 | /// Returns an iterator pointing to the first entry, if one exists. |
| 301 | { |
| 302 | BucketVecIterator it(_buckets.begin()); |
| 303 | BucketVecIterator itEnd(_buckets.end()); |
| 304 | while (it != itEnd && it->empty()) |
| 305 | { |
| 306 | ++it; |
| 307 | } |
| 308 | if (it == itEnd) |
| 309 | return end(); |
| 310 | else |
| 311 | return Iterator(it, itEnd, it->begin()); |
| 312 | } |
| 313 | |
| 314 | Iterator end() |
| 315 | /// Returns an iterator pointing to the end of the table. |
| 316 | { |
| 317 | return Iterator(_buckets.end(), _buckets.end(), _buckets.front().end()); |
| 318 | } |
| 319 | |
| 320 | ConstIterator find(const Value& value) const |
| 321 | /// Finds an entry in the table. |
| 322 | { |
| 323 | std::size_t addr = bucketAddress(value); |
| 324 | BucketVecIterator it(_buckets.begin() + addr); |
| 325 | BucketIterator buckIt(std::find(it->begin(), it->end(), value)); |
| 326 | if (buckIt != it->end()) |
| 327 | return ConstIterator(it, _buckets.end(), buckIt); |
| 328 | else |
| 329 | return end(); |
| 330 | } |
| 331 | |
| 332 | Iterator find(const Value& value) |
| 333 | /// Finds an entry in the table. |
| 334 | { |
| 335 | std::size_t addr = bucketAddress(value); |
| 336 | BucketVecIterator it(_buckets.begin() + addr); |
| 337 | BucketIterator buckIt(std::find(it->begin(), it->end(), value)); |
| 338 | if (buckIt != it->end()) |
| 339 | return Iterator(it, _buckets.end(), buckIt); |
| 340 | else |
| 341 | return end(); |
| 342 | } |
| 343 | |
| 344 | std::size_t count(const Value& value) const |
| 345 | /// Returns the number of elements with the given |
| 346 | /// value, with is either 1 or 0. |
| 347 | { |
| 348 | return find(value) != end() ? 1 : 0; |
| 349 | } |
| 350 | |
| 351 | std::pair<Iterator, bool> insert(const Value& value) |
| 352 | /// Inserts an element into the table. |
| 353 | /// |
| 354 | /// If the element already exists in the table, |
| 355 | /// a pair(iterator, false) with iterator pointing to the |
| 356 | /// existing element is returned. |
| 357 | /// Otherwise, the element is inserted an a |
| 358 | /// pair(iterator, true) with iterator |
| 359 | /// pointing to the new element is returned. |
| 360 | { |
| 361 | std::size_t hash = _hash(value); |
| 362 | std::size_t addr = bucketAddressForHash(hash); |
| 363 | BucketVecIterator it(_buckets.begin() + addr); |
| 364 | BucketIterator buckIt(std::find(it->begin(), it->end(), value)); |
| 365 | if (buckIt == it->end()) |
| 366 | { |
| 367 | split(); |
| 368 | addr = bucketAddressForHash(hash); |
| 369 | it = _buckets.begin() + addr; |
| 370 | buckIt = it->insert(it->end(), value); |
| 371 | ++_size; |
| 372 | return std::make_pair(Iterator(it, _buckets.end(), buckIt), true); |
| 373 | } |
| 374 | else |
| 375 | { |
| 376 | return std::make_pair(Iterator(it, _buckets.end(), buckIt), false); |
| 377 | } |
| 378 | } |
| 379 | |
| 380 | void erase(Iterator it) |
| 381 | /// Erases the element pointed to by it. |
| 382 | { |
| 383 | if (it != end()) |
| 384 | { |
| 385 | it._vecIt->erase(it._buckIt); |
| 386 | --_size; |
| 387 | merge(); |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | void erase(const Value& value) |
| 392 | /// Erases the element with the given value, if it exists. |
| 393 | { |
| 394 | Iterator it = find(value); |
| 395 | erase(it); |
| 396 | } |
| 397 | |
| 398 | void clear() |
| 399 | /// Erases all elements. |
| 400 | { |
| 401 | LinearHashTable emptyTable; |
| 402 | swap(emptyTable); |
| 403 | } |
| 404 | |
| 405 | std::size_t size() const |
| 406 | /// Returns the number of elements in the table. |
| 407 | { |
| 408 | return _size; |
| 409 | } |
| 410 | |
| 411 | bool empty() const |
| 412 | /// Returns true iff the table is empty. |
| 413 | { |
| 414 | return _size == 0; |
| 415 | } |
| 416 | |
| 417 | std::size_t buckets() const |
| 418 | /// Returns the number of allocated buckets. |
| 419 | { |
| 420 | return _buckets.size(); |
| 421 | } |
| 422 | |
| 423 | protected: |
| 424 | std::size_t bucketAddress(const Value& value) const |
| 425 | { |
| 426 | std::size_t n = _hash(value); |
| 427 | if (n % _front >= _split) |
| 428 | return n % _front; |
| 429 | else |
| 430 | return n % (2*_front); |
| 431 | } |
| 432 | |
| 433 | std::size_t bucketAddressForHash(std::size_t hash) |
| 434 | { |
| 435 | if (hash % _front >= _split) |
| 436 | return hash % _front; |
| 437 | else |
| 438 | return hash % (2*_front); |
| 439 | } |
| 440 | |
| 441 | void split() |
| 442 | { |
| 443 | if (_split == _front) |
| 444 | { |
| 445 | _split = 0; |
| 446 | _front *= 2; |
| 447 | _buckets.reserve(_front*2); |
| 448 | } |
| 449 | Bucket tmp; |
| 450 | _buckets.push_back(tmp); |
| 451 | _buckets[_split].swap(tmp); |
| 452 | ++_split; |
| 453 | for (BucketIterator it = tmp.begin(); it != tmp.end(); ++it) |
| 454 | { |
| 455 | using std::swap; |
| 456 | std::size_t addr = bucketAddress(*it); |
| 457 | _buckets[addr].push_back(Value()); |
| 458 | swap(*it, _buckets[addr].back()); |
| 459 | } |
| 460 | } |
| 461 | |
| 462 | void merge() |
| 463 | { |
| 464 | if (_split == 0) |
| 465 | { |
| 466 | _front /= 2; |
| 467 | _split = _front; |
| 468 | } |
| 469 | --_split; |
| 470 | Bucket tmp; |
| 471 | tmp.swap(_buckets.back()); |
| 472 | _buckets.pop_back(); |
| 473 | for (BucketIterator it = tmp.begin(); it != tmp.end(); ++it) |
| 474 | { |
| 475 | using std::swap; |
| 476 | std::size_t addr = bucketAddress(*it); |
| 477 | _buckets[addr].push_back(Value()); |
| 478 | swap(*it, _buckets[addr].back()); |
| 479 | } |
| 480 | } |
| 481 | |
| 482 | static std::size_t calcSize(std::size_t initialSize) |
| 483 | { |
| 484 | std::size_t size = 32; |
| 485 | while (size < initialSize) size *= 2; |
| 486 | return size; |
| 487 | } |
| 488 | |
| 489 | private: |
| 490 | // Evil hack: _buckets must be mutable because both ConstIterator and Iterator hold |
| 491 | // ordinary iterator's (not const_iterator's). |
| 492 | mutable BucketVec _buckets; |
| 493 | std::size_t _split; |
| 494 | std::size_t _front; |
| 495 | std::size_t _size; |
| 496 | HashFunc _hash; |
| 497 | }; |
| 498 | |
| 499 | |
| 500 | } // namespace Poco |
| 501 | |
| 502 | |
| 503 | #endif // Foundation_LinearHashTable_INCLUDED |
| 504 | |