1#pragma once
2
3#include <string.h>
4
5#include <math.h>
6
7#include <utility>
8
9#include <boost/noncopyable.hpp>
10
11#include <common/likely.h>
12
13#include <Core/Defines.h>
14#include <Core/Types.h>
15#include <Common/Exception.h>
16
17#include <IO/WriteBuffer.h>
18#include <IO/WriteHelpers.h>
19#include <IO/ReadBuffer.h>
20#include <IO/ReadHelpers.h>
21#include <IO/VarInt.h>
22
23#include <Common/HashTable/HashTableAllocator.h>
24#include <Common/HashTable/HashTableKeyHolder.h>
25
26#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
27 #include <iostream>
28 #include <iomanip>
29 #include <Common/Stopwatch.h>
30#endif
31
32/** NOTE HashTable could only be used for memmoveable (position independent) types.
33 * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++.
34 * Also, key in hash table must be of type, that zero bytes is compared equals to zero key.
35 */
36
37
38namespace DB
39{
40namespace ErrorCodes
41{
42 extern const int LOGICAL_ERROR;
43 extern const int NO_AVAILABLE_DATA;
44}
45}
46
47
48/** The state of the hash table that affects the properties of its cells.
49 * Used as a template parameter.
50 * For example, there is an implementation of an instantly clearable hash table - ClearableHashMap.
51 * For it, each cell holds the version number, and in the hash table itself is the current version.
52 * When clearing, the current version simply increases; All cells with a mismatching version are considered empty.
53 * Another example: for an approximate calculation of the number of unique visitors, there is a hash table for UniquesHashSet.
54 * It has the concept of "degree". At each overflow, cells with keys that do not divide by the corresponding power of the two are deleted.
55 */
56struct HashTableNoState
57{
58 /// Serialization, in binary and text form.
59 void write(DB::WriteBuffer &) const {}
60 void writeText(DB::WriteBuffer &) const {}
61
62 /// Deserialization, in binary and text form.
63 void read(DB::ReadBuffer &) {}
64 void readText(DB::ReadBuffer &) {}
65};
66
67
68/// These functions can be overloaded for custom types.
69namespace ZeroTraits
70{
71
72template <typename T>
73bool check(const T x) { return x == 0; }
74
75template <typename T>
76void set(T & x) { x = 0; }
77
78}
79
80/**
81 * getKey/Mapped -- methods to get key/"mapped" values from the LookupResult returned by find() and
82 * emplace() methods of HashTable. Must not be called for a null LookupResult.
83 *
84 * We don't use iterators for lookup result. Instead, LookupResult is a pointer of some kind. There
85 * are methods getKey/Mapped, that return references or values to key/"mapped" values.
86 *
87 * Different hash table implementations support this interface to a varying degree:
88 *
89 * 1) Hash tables that store neither the key in its original form, nor a "mapped" value:
90 * FixedHashTable or StringHashTable. Neither GetKey nor GetMapped are supported, the only valid
91 * operation is checking LookupResult for null.
92 *
93 * 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap. Only GetMapped is
94 * supported.
95 *
96 * 3) Hash tables that store the key and do not have a "mapped" value, e.g. the normal HashTable.
97 * GetKey returns the key, and GetMapped returns a zero void pointer. This simplifies generic
98 * code that works with mapped values: it can overload on the return type of GetMapped(), and
99 * doesn't need other parameters. One example is insertSetMapped() function.
100 *
101 * 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap. Both GetKey and
102 * GetMapped are supported.
103 *
104 * The implementation side goes as follows:
105 *
106 * for (1), LookupResult->getKey = const VoidKey, LookupResult->getMapped = VoidMapped;
107 *
108 * for (2), LookupResult->getKey = const VoidKey, LookupResult->getMapped = Mapped &;
109 *
110 * for (3) and (4), LookupResult->getKey = const Key [&], LookupResult->getMapped = Mapped &;
111 * VoidKey and VoidMapped may have specialized function overloads for generic code.
112 */
113
114struct VoidKey {};
115struct VoidMapped
116{
117 template <typename T>
118 auto & operator=(const T &)
119 {
120 return *this;
121 }
122};
123
124/** Compile-time interface for cell of the hash table.
125 * Different cell types are used to implement different hash tables.
126 * The cell must contain a key.
127 * It can also contain a value and arbitrary additional data
128 * (example: the stored hash value; version number for ClearableHashMap).
129 */
130template <typename Key, typename Hash, typename TState = HashTableNoState>
131struct HashTableCell
132{
133 using State = TState;
134
135 using key_type = Key;
136 using value_type = Key;
137 using mapped_type = VoidMapped;
138
139 Key key;
140
141 HashTableCell() {}
142
143 /// Create a cell with the given key / key and value.
144 HashTableCell(const Key & key_, const State &) : key(key_) {}
145
146 /// Get the key (externally).
147 const Key & getKey() const { return key; }
148 VoidMapped getMapped() const { return {}; }
149 const value_type & getValue() const { return key; }
150
151 /// Get the key (internally).
152 static const Key & getKey(const value_type & value) { return value; }
153
154 /// Are the keys at the cells equal?
155 bool keyEquals(const Key & key_) const { return key == key_; }
156 bool keyEquals(const Key & key_, size_t /*hash_*/) const { return key == key_; }
157 bool keyEquals(const Key & key_, size_t /*hash_*/, const State & /*state*/) const { return key == key_; }
158
159 /// If the cell can remember the value of the hash function, then remember it.
160 void setHash(size_t /*hash_value*/) {}
161
162 /// If the cell can store the hash value in itself, then return the stored value.
163 /// It must be at least once calculated before.
164 /// If storing the hash value is not provided, then just compute the hash.
165 size_t getHash(const Hash & hash) const { return hash(key); }
166
167 /// Whether the key is zero. In the main buffer, cells with a zero key are considered empty.
168 /// If zero keys can be inserted into the table, then the cell for the zero key is stored separately, not in the main buffer.
169 /// Zero keys must be such that the zeroed-down piece of memory is a zero key.
170 bool isZero(const State & state) const { return isZero(key, state); }
171 static bool isZero(const Key & key, const State & /*state*/) { return ZeroTraits::check(key); }
172
173 /// Set the key value to zero.
174 void setZero() { ZeroTraits::set(key); }
175
176 /// Do the hash table need to store the zero key separately (that is, can a zero key be inserted into the hash table).
177 static constexpr bool need_zero_value_storage = true;
178
179 /// Whether the cell is deleted.
180 bool isDeleted() const { return false; }
181
182 /// Set the mapped value, if any (for HashMap), to the corresponding `value`.
183 void setMapped(const value_type & /*value*/) {}
184
185 /// Serialization, in binary and text form.
186 void write(DB::WriteBuffer & wb) const { DB::writeBinary(key, wb); }
187 void writeText(DB::WriteBuffer & wb) const { DB::writeDoubleQuoted(key, wb); }
188
189 /// Deserialization, in binary and text form.
190 void read(DB::ReadBuffer & rb) { DB::readBinary(key, rb); }
191 void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); }
192};
193
194/**
195 * A helper function for HashTable::insert() to set the "mapped" value.
196 * Overloaded on the mapped type, does nothing if it's VoidMapped.
197 */
198template <typename ValueType>
199void insertSetMapped(VoidMapped /* dest */, const ValueType & /* src */) {}
200
201template <typename MappedType, typename ValueType>
202void insertSetMapped(MappedType & dest, const ValueType & src) { dest = src.second; }
203
204
205/** Determines the size of the hash table, and when and how much it should be resized.
206 */
207template <size_t initial_size_degree = 8>
208struct HashTableGrower
209{
210 /// The state of this structure is enough to get the buffer size of the hash table.
211
212 UInt8 size_degree = initial_size_degree;
213
214 /// The size of the hash table in the cells.
215 size_t bufSize() const { return 1ULL << size_degree; }
216
217 size_t maxFill() const { return 1ULL << (size_degree - 1); }
218 size_t mask() const { return bufSize() - 1; }
219
220 /// From the hash value, get the cell number in the hash table.
221 size_t place(size_t x) const { return x & mask(); }
222
223 /// The next cell in the collision resolution chain.
224 size_t next(size_t pos) const { ++pos; return pos & mask(); }
225
226 /// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
227 bool overflow(size_t elems) const { return elems > maxFill(); }
228
229 /// Increase the size of the hash table.
230 void increaseSize()
231 {
232 size_degree += size_degree >= 23 ? 1 : 2;
233 }
234
235 /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
236 void set(size_t num_elems)
237 {
238 size_degree = num_elems <= 1
239 ? initial_size_degree
240 : ((initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2)
241 ? initial_size_degree
242 : (static_cast<size_t>(log2(num_elems - 1)) + 2));
243 }
244
245 void setBufSize(size_t buf_size_)
246 {
247 size_degree = static_cast<size_t>(log2(buf_size_ - 1) + 1);
248 }
249};
250
251
252/** When used as a Grower, it turns a hash table into something like a lookup table.
253 * It remains non-optimal - the cells store the keys.
254 * Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
255 * NOTE: Better to use FixedHashTable instead.
256 */
257template <size_t key_bits>
258struct HashTableFixedGrower
259{
260 size_t bufSize() const { return 1ULL << key_bits; }
261 size_t place(size_t x) const { return x; }
262 /// You could write __builtin_unreachable(), but the compiler does not optimize everything, and it turns out less efficiently.
263 size_t next(size_t pos) const { return pos + 1; }
264 bool overflow(size_t /*elems*/) const { return false; }
265
266 void increaseSize() { __builtin_unreachable(); }
267 void set(size_t /*num_elems*/) {}
268 void setBufSize(size_t /*buf_size_*/) {}
269};
270
271
272/** If you want to store the zero key separately - a place to store it. */
273template <bool need_zero_value_storage, typename Cell>
274struct ZeroValueStorage;
275
276template <typename Cell>
277struct ZeroValueStorage<true, Cell>
278{
279private:
280 bool has_zero = false;
281 std::aligned_storage_t<sizeof(Cell), alignof(Cell)> zero_value_storage; /// Storage of element with zero key.
282
283public:
284 bool hasZero() const { return has_zero; }
285
286 void setHasZero()
287 {
288 has_zero = true;
289 new (zeroValue()) Cell();
290 }
291
292 void clearHasZero()
293 {
294 has_zero = false;
295 zeroValue()->~Cell();
296 }
297
298 Cell * zeroValue() { return reinterpret_cast<Cell*>(&zero_value_storage); }
299 const Cell * zeroValue() const { return reinterpret_cast<const Cell*>(&zero_value_storage); }
300};
301
302template <typename Cell>
303struct ZeroValueStorage<false, Cell>
304{
305 bool hasZero() const { return false; }
306 void setHasZero() { throw DB::Exception("HashTable: logical error", DB::ErrorCodes::LOGICAL_ERROR); }
307 void clearHasZero() {}
308
309 Cell * zeroValue() { return nullptr; }
310 const Cell * zeroValue() const { return nullptr; }
311};
312
313
314template
315<
316 typename Key,
317 typename Cell,
318 typename Hash,
319 typename Grower,
320 typename Allocator
321>
322class HashTable :
323 private boost::noncopyable,
324 protected Hash,
325 protected Allocator,
326 protected Cell::State,
327 protected ZeroValueStorage<Cell::need_zero_value_storage, Cell> /// empty base optimization
328{
329protected:
330 friend class const_iterator;
331 friend class iterator;
332 friend class Reader;
333
334 template <typename, typename, typename, typename, typename, typename, size_t>
335 friend class TwoLevelHashTable;
336
337 template <typename, typename, size_t>
338 friend class TwoLevelStringHashTable;
339
340 template <typename SubMaps>
341 friend class StringHashTable;
342
343 using HashValue = size_t;
344 using Self = HashTable;
345
346 size_t m_size = 0; /// Amount of elements
347 Cell * buf; /// A piece of memory for all elements except the element with zero key.
348 Grower grower;
349
350#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
351 mutable size_t collisions = 0;
352#endif
353
354 /// Find a cell with the same key or an empty cell, starting from the specified position and further along the collision resolution chain.
355 size_t ALWAYS_INLINE findCell(const Key & x, size_t hash_value, size_t place_value) const
356 {
357 while (!buf[place_value].isZero(*this) && !buf[place_value].keyEquals(x, hash_value, *this))
358 {
359 place_value = grower.next(place_value);
360#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
361 ++collisions;
362#endif
363 }
364
365 return place_value;
366 }
367
368
369 /// Find an empty cell, starting with the specified position and further along the collision resolution chain.
370 size_t ALWAYS_INLINE findEmptyCell(size_t place_value) const
371 {
372 while (!buf[place_value].isZero(*this))
373 {
374 place_value = grower.next(place_value);
375#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
376 ++collisions;
377#endif
378 }
379
380 return place_value;
381 }
382
383 void alloc(const Grower & new_grower)
384 {
385 buf = reinterpret_cast<Cell *>(Allocator::alloc(new_grower.bufSize() * sizeof(Cell)));
386 grower = new_grower;
387 }
388
389 void free()
390 {
391 if (buf)
392 {
393 Allocator::free(buf, getBufferSizeInBytes());
394 buf = nullptr;
395 }
396 }
397
398
399 /// Increase the size of the buffer.
400 void resize(size_t for_num_elems = 0, size_t for_buf_size = 0)
401 {
402#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
403 Stopwatch watch;
404#endif
405
406 size_t old_size = grower.bufSize();
407
408 /** In case of exception for the object to remain in the correct state,
409 * changing the variable `grower` (which determines the buffer size of the hash table)
410 * is postponed for a moment after a real buffer change.
411 * The temporary variable `new_grower` is used to determine the new size.
412 */
413 Grower new_grower = grower;
414
415 if (for_num_elems)
416 {
417 new_grower.set(for_num_elems);
418 if (new_grower.bufSize() <= old_size)
419 return;
420 }
421 else if (for_buf_size)
422 {
423 new_grower.setBufSize(for_buf_size);
424 if (new_grower.bufSize() <= old_size)
425 return;
426 }
427 else
428 new_grower.increaseSize();
429
430 /// Expand the space.
431 buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, getBufferSizeInBytes(), new_grower.bufSize() * sizeof(Cell)));
432 grower = new_grower;
433
434 /** Now some items may need to be moved to a new location.
435 * The element can stay in place, or move to a new location "on the right",
436 * or move to the left of the collision resolution chain, because the elements to the left of it have been moved to the new "right" location.
437 */
438 size_t i = 0;
439 for (; i < old_size; ++i)
440 if (!buf[i].isZero(*this) && !buf[i].isDeleted())
441 reinsert(buf[i], buf[i].getHash(*this));
442
443 /** There is also a special case:
444 * if the element was to be at the end of the old buffer, [ x]
445 * but is at the beginning because of the collision resolution chain, [o x]
446 * then after resizing, it will first be out of place again, [ xo ]
447 * and in order to transfer it where necessary,
448 * after transferring all the elements from the old halves you need to [ o x ]
449 * process tail from the collision resolution chain immediately after it [ o x ]
450 */
451 for (; !buf[i].isZero(*this) && !buf[i].isDeleted(); ++i)
452 reinsert(buf[i], buf[i].getHash(*this));
453
454#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
455 watch.stop();
456 std::cerr << std::fixed << std::setprecision(3)
457 << "Resize from " << old_size << " to " << grower.bufSize() << " took " << watch.elapsedSeconds() << " sec."
458 << std::endl;
459#endif
460 }
461
462
463 /** Paste into the new buffer the value that was in the old buffer.
464 * Used when increasing the buffer size.
465 */
466 void reinsert(Cell & x, size_t hash_value)
467 {
468 size_t place_value = grower.place(hash_value);
469
470 /// If the element is in its place.
471 if (&x == &buf[place_value])
472 return;
473
474 /// Compute a new location, taking into account the collision resolution chain.
475 place_value = findCell(Cell::getKey(x.getValue()), hash_value, place_value);
476
477 /// If the item remains in its place in the old collision resolution chain.
478 if (!buf[place_value].isZero(*this))
479 return;
480
481 /// Copy to a new location and zero the old one.
482 x.setHash(hash_value);
483 memcpy(static_cast<void*>(&buf[place_value]), &x, sizeof(x));
484 x.setZero();
485
486 /// Then the elements that previously were in collision with this can move to the old place.
487 }
488
489
490 void destroyElements()
491 {
492 if (!std::is_trivially_destructible_v<Cell>)
493 for (iterator it = begin(), it_end = end(); it != it_end; ++it)
494 it.ptr->~Cell();
495 }
496
497
498 template <typename Derived, bool is_const>
499 class iterator_base
500 {
501 using Container = std::conditional_t<is_const, const Self, Self>;
502 using cell_type = std::conditional_t<is_const, const Cell, Cell>;
503
504 Container * container;
505 cell_type * ptr;
506
507 friend class HashTable;
508
509 public:
510 iterator_base() {}
511 iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_) {}
512
513 bool operator== (const iterator_base & rhs) const { return ptr == rhs.ptr; }
514 bool operator!= (const iterator_base & rhs) const { return ptr != rhs.ptr; }
515
516 Derived & operator++()
517 {
518 /// If iterator was pointed to ZeroValueStorage, move it to the beginning of the main buffer.
519 if (unlikely(ptr->isZero(*container)))
520 ptr = container->buf;
521 else
522 ++ptr;
523
524 /// Skip empty cells in the main buffer.
525 auto buf_end = container->buf + container->grower.bufSize();
526 while (ptr < buf_end && ptr->isZero(*container))
527 ++ptr;
528
529 return static_cast<Derived &>(*this);
530 }
531
532 auto & operator* () const { return *ptr; }
533 auto * operator->() const { return ptr; }
534
535 auto getPtr() const { return ptr; }
536 size_t getHash() const { return ptr->getHash(*container); }
537
538 size_t getCollisionChainLength() const
539 {
540 return container->grower.place((ptr - container->buf) - container->grower.place(getHash()));
541 }
542
543 /**
544 * A hack for HashedDictionary.
545 *
546 * The problem: std-like find() returns an iterator, which has to be
547 * compared to end(). On the other hand, HashMap::find() returns
548 * LookupResult, which is compared to nullptr. HashedDictionary has to
549 * support both hash maps with the same code, hence the need for this
550 * hack.
551 *
552 * The proper way would be to remove iterator interface from our
553 * HashMap completely, change all its users to the existing internal
554 * iteration interface, and redefine end() to return LookupResult for
555 * compatibility with std find(). Unfortunately, now is not the time to
556 * do this.
557 */
558 operator Cell * () const { return nullptr; }
559 };
560
561
562public:
563 using key_type = Key;
564 using mapped_type = typename Cell::mapped_type;
565 using value_type = typename Cell::value_type;
566 using cell_type = Cell;
567
568 using LookupResult = Cell *;
569 using ConstLookupResult = const Cell *;
570
571 size_t hash(const Key & x) const { return Hash::operator()(x); }
572
573
574 HashTable()
575 {
576 if (Cell::need_zero_value_storage)
577 this->zeroValue()->setZero();
578 alloc(grower);
579 }
580
581 HashTable(size_t reserve_for_num_elements)
582 {
583 if (Cell::need_zero_value_storage)
584 this->zeroValue()->setZero();
585 grower.set(reserve_for_num_elements);
586 alloc(grower);
587 }
588
589 HashTable(HashTable && rhs)
590 : buf(nullptr)
591 {
592 *this = std::move(rhs);
593 }
594
595 ~HashTable()
596 {
597 destroyElements();
598 free();
599 }
600
601 HashTable & operator= (HashTable && rhs)
602 {
603 destroyElements();
604 free();
605
606 std::swap(buf, rhs.buf);
607 std::swap(m_size, rhs.m_size);
608 std::swap(grower, rhs.grower);
609
610 Hash::operator=(std::move(rhs));
611 Allocator::operator=(std::move(rhs));
612 Cell::State::operator=(std::move(rhs));
613 ZeroValueStorage<Cell::need_zero_value_storage, Cell>::operator=(std::move(rhs));
614
615 return *this;
616 }
617
618 class Reader final : private Cell::State
619 {
620 public:
621 Reader(DB::ReadBuffer & in_)
622 : in(in_)
623 {
624 }
625
626 Reader(const Reader &) = delete;
627 Reader & operator=(const Reader &) = delete;
628
629 bool next()
630 {
631 if (!is_initialized)
632 {
633 Cell::State::read(in);
634 DB::readVarUInt(size, in);
635 is_initialized = true;
636 }
637
638 if (read_count == size)
639 {
640 is_eof = true;
641 return false;
642 }
643
644 cell.read(in);
645 ++read_count;
646
647 return true;
648 }
649
650 inline const value_type & get() const
651 {
652 if (!is_initialized || is_eof)
653 throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA);
654
655 return cell.getValue();
656 }
657
658 private:
659 DB::ReadBuffer & in;
660 Cell cell;
661 size_t read_count = 0;
662 size_t size = 0;
663 bool is_eof = false;
664 bool is_initialized = false;
665 };
666
667
668 class iterator : public iterator_base<iterator, false>
669 {
670 public:
671 using iterator_base<iterator, false>::iterator_base;
672 };
673
674 class const_iterator : public iterator_base<const_iterator, true>
675 {
676 public:
677 using iterator_base<const_iterator, true>::iterator_base;
678 };
679
680
681 const_iterator begin() const
682 {
683 if (!buf)
684 return end();
685
686 if (this->hasZero())
687 return iteratorToZero();
688
689 const Cell * ptr = buf;
690 auto buf_end = buf + grower.bufSize();
691 while (ptr < buf_end && ptr->isZero(*this))
692 ++ptr;
693
694 return const_iterator(this, ptr);
695 }
696
697 const_iterator cbegin() const { return begin(); }
698
699 iterator begin()
700 {
701 if (!buf)
702 return end();
703
704 if (this->hasZero())
705 return iteratorToZero();
706
707 Cell * ptr = buf;
708 auto buf_end = buf + grower.bufSize();
709 while (ptr < buf_end && ptr->isZero(*this))
710 ++ptr;
711
712 return iterator(this, ptr);
713 }
714
715 const_iterator end() const { return const_iterator(this, buf + grower.bufSize()); }
716 const_iterator cend() const { return end(); }
717 iterator end() { return iterator(this, buf + grower.bufSize()); }
718
719
720protected:
721 const_iterator iteratorTo(const Cell * ptr) const { return const_iterator(this, ptr); }
722 iterator iteratorTo(Cell * ptr) { return iterator(this, ptr); }
723 const_iterator iteratorToZero() const { return iteratorTo(this->zeroValue()); }
724 iterator iteratorToZero() { return iteratorTo(this->zeroValue()); }
725
726
727 /// If the key is zero, insert it into a special place and return true.
728 /// We don't have to persist a zero key, because it's not actually inserted.
729 /// That's why we just take a Key by value, an not a key holder.
730 bool ALWAYS_INLINE emplaceIfZero(const Key & x, LookupResult & it, bool & inserted, size_t hash_value)
731 {
732 /// If it is claimed that the zero key can not be inserted into the table.
733 if (!Cell::need_zero_value_storage)
734 return false;
735
736 if (Cell::isZero(x, *this))
737 {
738 it = this->zeroValue();
739
740 if (!this->hasZero())
741 {
742 ++m_size;
743 this->setHasZero();
744 this->zeroValue()->setHash(hash_value);
745 inserted = true;
746 }
747 else
748 inserted = false;
749
750 return true;
751 }
752
753 return false;
754 }
755
756 template <typename KeyHolder>
757 void ALWAYS_INLINE emplaceNonZeroImpl(size_t place_value, KeyHolder && key_holder,
758 LookupResult & it, bool & inserted, size_t hash_value)
759 {
760 it = &buf[place_value];
761
762 if (!buf[place_value].isZero(*this))
763 {
764 keyHolderDiscardKey(key_holder);
765 inserted = false;
766 return;
767 }
768
769 keyHolderPersistKey(key_holder);
770 const auto & key = keyHolderGetKey(key_holder);
771
772 new (&buf[place_value]) Cell(key, *this);
773 buf[place_value].setHash(hash_value);
774 inserted = true;
775 ++m_size;
776
777 if (unlikely(grower.overflow(m_size)))
778 {
779 try
780 {
781 resize();
782 }
783 catch (...)
784 {
785 /** If we have not resized successfully, then there will be problems.
786 * There remains a key, but uninitialized mapped-value,
787 * which, perhaps, can not even be called a destructor.
788 */
789 --m_size;
790 buf[place_value].setZero();
791 throw;
792 }
793
794 // The hash table was rehashed, so we have to re-find the key.
795 size_t new_place = findCell(key, hash_value, grower.place(hash_value));
796 assert(!buf[new_place].isZero(*this));
797 it = &buf[new_place];
798 }
799 }
800
801 /// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter.
802 template <typename KeyHolder>
803 void ALWAYS_INLINE emplaceNonZero(KeyHolder && key_holder, LookupResult & it,
804 bool & inserted, size_t hash_value)
805 {
806 const auto & key = keyHolderGetKey(key_holder);
807 size_t place_value = findCell(key, hash_value, grower.place(hash_value));
808 emplaceNonZeroImpl(place_value, key_holder, it, inserted, hash_value);
809 }
810
811
812public:
813 /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
814 std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x)
815 {
816 std::pair<LookupResult, bool> res;
817
818 size_t hash_value = hash(Cell::getKey(x));
819 if (!emplaceIfZero(Cell::getKey(x), res.first, res.second, hash_value))
820 {
821 emplaceNonZero(Cell::getKey(x), res.first, res.second, hash_value);
822 }
823
824 if (res.second)
825 insertSetMapped(res.first->getMapped(), x);
826
827 return res;
828 }
829
830
831 /// Reinsert node pointed to by iterator
832 void ALWAYS_INLINE reinsert(iterator & it, size_t hash_value)
833 {
834 reinsert(*it.getPtr(), hash_value);
835 }
836
837
838 /** Insert the key.
839 * Return values:
840 * 'it' -- a LookupResult pointing to the corresponding key/mapped pair.
841 * 'inserted' -- whether a new key was inserted.
842 *
843 * You have to make `placement new` of value if you inserted a new key,
844 * since when destroying a hash table, it will call the destructor!
845 *
846 * Example usage:
847 *
848 * Map::LookupResult it;
849 * bool inserted;
850 * map.emplace(key, it, inserted);
851 * if (inserted)
852 * new (&it->getMapped()) Mapped(value);
853 */
854 template <typename KeyHolder>
855 void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
856 {
857 const auto & key = keyHolderGetKey(key_holder);
858 emplace(key_holder, it, inserted, hash(key));
859 }
860
861 template <typename KeyHolder>
862 void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it,
863 bool & inserted, size_t hash_value)
864 {
865 const auto & key = keyHolderGetKey(key_holder);
866 if (!emplaceIfZero(key, it, inserted, hash_value))
867 emplaceNonZero(key_holder, it, inserted, hash_value);
868 }
869
870 /// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet.
871 void ALWAYS_INLINE insertUniqueNonZero(const Cell * cell, size_t hash_value)
872 {
873 size_t place_value = findEmptyCell(grower.place(hash_value));
874
875 memcpy(static_cast<void*>(&buf[place_value]), cell, sizeof(*cell));
876 ++m_size;
877
878 if (unlikely(grower.overflow(m_size)))
879 resize();
880 }
881
882 LookupResult ALWAYS_INLINE find(const Key & x)
883 {
884 if (Cell::isZero(x, *this))
885 return this->hasZero() ? this->zeroValue() : nullptr;
886
887 size_t hash_value = hash(x);
888 size_t place_value = findCell(x, hash_value, grower.place(hash_value));
889 return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr;
890 }
891
892 ConstLookupResult ALWAYS_INLINE find(const Key & x) const
893 {
894 return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x);
895 }
896
897 LookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value)
898 {
899 if (Cell::isZero(x, *this))
900 return this->hasZero() ? this->zeroValue() : nullptr;
901
902 size_t place_value = findCell(x, hash_value, grower.place(hash_value));
903 return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr;
904 }
905
906 ConstLookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value) const
907 {
908 return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
909 }
910
911 bool ALWAYS_INLINE has(const Key & x) const
912 {
913 if (Cell::isZero(x, *this))
914 return this->hasZero();
915
916 size_t hash_value = hash(x);
917 size_t place_value = findCell(x, hash_value, grower.place(hash_value));
918 return !buf[place_value].isZero(*this);
919 }
920
921
922 bool ALWAYS_INLINE has(const Key & x, size_t hash_value) const
923 {
924 if (Cell::isZero(x, *this))
925 return this->hasZero();
926
927 size_t place_value = findCell(x, hash_value, grower.place(hash_value));
928 return !buf[place_value].isZero(*this);
929 }
930
931
932 void write(DB::WriteBuffer & wb) const
933 {
934 Cell::State::write(wb);
935 DB::writeVarUInt(m_size, wb);
936
937 if (this->hasZero())
938 this->zeroValue()->write(wb);
939
940 for (auto ptr = buf, buf_end = buf + grower.bufSize(); ptr < buf_end; ++ptr)
941 if (!ptr->isZero(*this))
942 ptr->write(wb);
943 }
944
945 void writeText(DB::WriteBuffer & wb) const
946 {
947 Cell::State::writeText(wb);
948 DB::writeText(m_size, wb);
949
950 if (this->hasZero())
951 {
952 DB::writeChar(',', wb);
953 this->zeroValue()->writeText(wb);
954 }
955
956 for (auto ptr = buf, buf_end = buf + grower.bufSize(); ptr < buf_end; ++ptr)
957 {
958 if (!ptr->isZero(*this))
959 {
960 DB::writeChar(',', wb);
961 ptr->writeText(wb);
962 }
963 }
964 }
965
966 void read(DB::ReadBuffer & rb)
967 {
968 Cell::State::read(rb);
969
970 destroyElements();
971 this->clearHasZero();
972 m_size = 0;
973
974 size_t new_size = 0;
975 DB::readVarUInt(new_size, rb);
976
977 free();
978 Grower new_grower = grower;
979 new_grower.set(new_size);
980 alloc(new_grower);
981
982 for (size_t i = 0; i < new_size; ++i)
983 {
984 Cell x;
985 x.read(rb);
986 insert(Cell::getKey(x.getValue()));
987 }
988 }
989
990 void readText(DB::ReadBuffer & rb)
991 {
992 Cell::State::readText(rb);
993
994 destroyElements();
995 this->clearHasZero();
996 m_size = 0;
997
998 size_t new_size = 0;
999 DB::readText(new_size, rb);
1000
1001 free();
1002 Grower new_grower = grower;
1003 new_grower.set(new_size);
1004 alloc(new_grower);
1005
1006 for (size_t i = 0; i < new_size; ++i)
1007 {
1008 Cell x;
1009 DB::assertChar(',', rb);
1010 x.readText(rb);
1011 insert(Cell::getKey(x.getValue()));
1012 }
1013 }
1014
1015
1016 size_t size() const
1017 {
1018 return m_size;
1019 }
1020
1021 bool empty() const
1022 {
1023 return 0 == m_size;
1024 }
1025
1026 void clear()
1027 {
1028 destroyElements();
1029 this->clearHasZero();
1030 m_size = 0;
1031
1032 memset(static_cast<void*>(buf), 0, grower.bufSize() * sizeof(*buf));
1033 }
1034
1035 /// After executing this function, the table can only be destroyed,
1036 /// and also you can use the methods `size`, `empty`, `begin`, `end`.
1037 void clearAndShrink()
1038 {
1039 destroyElements();
1040 this->clearHasZero();
1041 m_size = 0;
1042 free();
1043 }
1044
1045 size_t getBufferSizeInBytes() const
1046 {
1047 return grower.bufSize() * sizeof(Cell);
1048 }
1049
1050 size_t getBufferSizeInCells() const
1051 {
1052 return grower.bufSize();
1053 }
1054
1055#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
1056 size_t getCollisions() const
1057 {
1058 return collisions;
1059 }
1060#endif
1061};
1062