1#pragma once
2
3#include <Common/HashTable/Hash.h>
4#include <Common/HashTable/HashTable.h>
5#include <Common/HashTable/HashTableAllocator.h>
6
7
8/** NOTE HashMap could only be used for memmoveable (position independent) types.
9 * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++.
10 * Also, key in hash table must be of type, that zero bytes is compared equals to zero key.
11 */
12
13
14struct NoInitTag
15{
16};
17
18/// A pair that does not initialize the elements, if not needed.
19template <typename First, typename Second>
20struct PairNoInit
21{
22 First first;
23 Second second;
24
25 PairNoInit() {}
26
27 template <typename First_>
28 PairNoInit(First_ && first_, NoInitTag) : first(std::forward<First_>(first_))
29 {
30 }
31
32 template <typename First_, typename Second_>
33 PairNoInit(First_ && first_, Second_ && second_) : first(std::forward<First_>(first_)), second(std::forward<Second_>(second_))
34 {
35 }
36};
37
38
39template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
40struct HashMapCell
41{
42 using Mapped = TMapped;
43 using State = TState;
44
45 using value_type = PairNoInit<Key, Mapped>;
46 using mapped_type = Mapped;
47 using key_type = Key;
48
49 value_type value;
50
51 HashMapCell() {}
52 HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
53 HashMapCell(const value_type & value_, const State &) : value(value_) {}
54
55 /// Get the key (externally).
56 const Key & getKey() const { return value.first; }
57 Mapped & getMapped() { return value.second; }
58 const Mapped & getMapped() const { return value.second; }
59 const value_type & getValue() const { return value; }
60
61 /// Get the key (internally).
62 static const Key & getKey(const value_type & value) { return value.first; }
63
64 bool keyEquals(const Key & key_) const { return value.first == key_; }
65 bool keyEquals(const Key & key_, size_t /*hash_*/) const { return value.first == key_; }
66 bool keyEquals(const Key & key_, size_t /*hash_*/, const State & /*state*/) const { return value.first == key_; }
67
68 void setHash(size_t /*hash_value*/) {}
69 size_t getHash(const Hash & hash) const { return hash(value.first); }
70
71 bool isZero(const State & state) const { return isZero(value.first, state); }
72 static bool isZero(const Key & key, const State & /*state*/) { return ZeroTraits::check(key); }
73
74 /// Set the key value to zero.
75 void setZero() { ZeroTraits::set(value.first); }
76
77 /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
78 static constexpr bool need_zero_value_storage = true;
79
80 /// Whether the cell was deleted.
81 bool isDeleted() const { return false; }
82
83 void setMapped(const value_type & value_) { value.second = value_.second; }
84
85 /// Serialization, in binary and text form.
86 void write(DB::WriteBuffer & wb) const
87 {
88 DB::writeBinary(value.first, wb);
89 DB::writeBinary(value.second, wb);
90 }
91
92 void writeText(DB::WriteBuffer & wb) const
93 {
94 DB::writeDoubleQuoted(value.first, wb);
95 DB::writeChar(',', wb);
96 DB::writeDoubleQuoted(value.second, wb);
97 }
98
99 /// Deserialization, in binary and text form.
100 void read(DB::ReadBuffer & rb)
101 {
102 DB::readBinary(value.first, rb);
103 DB::readBinary(value.second, rb);
104 }
105
106 void readText(DB::ReadBuffer & rb)
107 {
108 DB::readDoubleQuoted(value.first, rb);
109 DB::assertChar(',', rb);
110 DB::readDoubleQuoted(value.second, rb);
111 }
112};
113
114template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
115struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
116{
117 using Base = HashMapCell<Key, TMapped, Hash, TState>;
118
119 size_t saved_hash;
120
121 using Base::Base;
122
123 bool keyEquals(const Key & key_) const { return this->value.first == key_; }
124 bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; }
125 bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); }
126
127 void setHash(size_t hash_value) { saved_hash = hash_value; }
128 size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
129};
130
131template <
132 typename Key,
133 typename Cell,
134 typename Hash = DefaultHash<Key>,
135 typename Grower = HashTableGrower<>,
136 typename Allocator = HashTableAllocator>
137class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
138{
139public:
140 using Self = HashMapTable;
141 using Base = HashTable<Key, Cell, Hash, Grower, Allocator>;
142 using LookupResult = typename Base::LookupResult;
143
144 using Base::Base;
145
146 /// Merge every cell's value of current map into the destination map via emplace.
147 /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
148 /// Each filled cell in current map will invoke func once. If that map doesn't
149 /// have a key equals to the given cell, a new cell gets emplaced into that map,
150 /// and func is invoked with the third argument emplaced set to true. Otherwise
151 /// emplaced is set to false.
152 template <typename Func>
153 void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
154 {
155 for (auto it = this->begin(), end = this->end(); it != end; ++it)
156 {
157 typename Self::LookupResult res_it;
158 bool inserted;
159 that.emplace(Cell::getKey(it->getValue()), res_it, inserted, it.getHash());
160 func(res_it->getMapped(), it->getMapped(), inserted);
161 }
162 }
163
164 /// Merge every cell's value of current map into the destination map via find.
165 /// Func should have signature void(Mapped & dst, Mapped & src, bool exist).
166 /// Each filled cell in current map will invoke func once. If that map doesn't
167 /// have a key equals to the given cell, func is invoked with the third argument
168 /// exist set to false. Otherwise exist is set to true.
169 template <typename Func>
170 void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
171 {
172 for (auto it = this->begin(), end = this->end(); it != end; ++it)
173 {
174 auto res_it = that.find(Cell::getKey(it->getValue()), it.getHash());
175 if (!res_it)
176 func(it->getMapped(), it->getMapped(), false);
177 else
178 func(res_it->getMapped(), it->getMapped(), true);
179 }
180 }
181
182 /// Call func(const Key &, Mapped &) for each hash map element.
183 template <typename Func>
184 void forEachValue(Func && func)
185 {
186 for (auto & v : *this)
187 func(v.getKey(), v.getMapped());
188 }
189
190 /// Call func(Mapped &) for each hash map element.
191 template <typename Func>
192 void forEachMapped(Func && func)
193 {
194 for (auto & v : *this)
195 func(v.getMapped());
196 }
197
198 typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
199 {
200 LookupResult it;
201 bool inserted;
202 this->emplace(x, it, inserted);
203
204 /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor),
205 * since the hash table memory is initially initialized with zeros.
206 * But, in fact, an empty cell may not be initialized with zeros in the following cases:
207 * - ZeroValueStorage (it only zeros the key);
208 * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero.
209 *
210 * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately
211 * after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization.
212 *
213 * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`.
214 * When we do the initialization, for new cells, it's enough to make `store 1` right away.
215 * And if we did not initialize, then even though there was zero in the cell,
216 * the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
217 */
218 if (inserted)
219 new (&it->getMapped()) typename Cell::Mapped();
220
221 return it->getMapped();
222 }
223};
224
225
226template <
227 typename Key,
228 typename Mapped,
229 typename Hash = DefaultHash<Key>,
230 typename Grower = HashTableGrower<>,
231 typename Allocator = HashTableAllocator>
232using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>;
233
234
235template <
236 typename Key,
237 typename Mapped,
238 typename Hash = DefaultHash<Key>,
239 typename Grower = HashTableGrower<>,
240 typename Allocator = HashTableAllocator>
241using HashMapWithSavedHash = HashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator>;
242