1 | #pragma once |
2 | |
3 | #include <Common/HashTable/Hash.h> |
4 | #include <Common/HashTable/HashTable.h> |
5 | #include <Common/HashTable/HashTableAllocator.h> |
6 | |
7 | |
8 | /** NOTE HashMap could only be used for memmoveable (position independent) types. |
9 | * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. |
10 | * Also, key in hash table must be of type, that zero bytes is compared equals to zero key. |
11 | */ |
12 | |
13 | |
14 | struct NoInitTag |
15 | { |
16 | }; |
17 | |
18 | /// A pair that does not initialize the elements, if not needed. |
19 | template <typename First, typename Second> |
20 | struct PairNoInit |
21 | { |
22 | First first; |
23 | Second second; |
24 | |
25 | PairNoInit() {} |
26 | |
27 | template <typename First_> |
28 | PairNoInit(First_ && first_, NoInitTag) : first(std::forward<First_>(first_)) |
29 | { |
30 | } |
31 | |
32 | template <typename First_, typename Second_> |
33 | PairNoInit(First_ && first_, Second_ && second_) : first(std::forward<First_>(first_)), second(std::forward<Second_>(second_)) |
34 | { |
35 | } |
36 | }; |
37 | |
38 | |
39 | template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState> |
40 | struct HashMapCell |
41 | { |
42 | using Mapped = TMapped; |
43 | using State = TState; |
44 | |
45 | using value_type = PairNoInit<Key, Mapped>; |
46 | using mapped_type = Mapped; |
47 | using key_type = Key; |
48 | |
49 | value_type value; |
50 | |
51 | HashMapCell() {} |
52 | HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {} |
53 | HashMapCell(const value_type & value_, const State &) : value(value_) {} |
54 | |
55 | /// Get the key (externally). |
56 | const Key & getKey() const { return value.first; } |
57 | Mapped & getMapped() { return value.second; } |
58 | const Mapped & getMapped() const { return value.second; } |
59 | const value_type & getValue() const { return value; } |
60 | |
61 | /// Get the key (internally). |
62 | static const Key & getKey(const value_type & value) { return value.first; } |
63 | |
64 | bool keyEquals(const Key & key_) const { return value.first == key_; } |
65 | bool keyEquals(const Key & key_, size_t /*hash_*/) const { return value.first == key_; } |
66 | bool keyEquals(const Key & key_, size_t /*hash_*/, const State & /*state*/) const { return value.first == key_; } |
67 | |
68 | void setHash(size_t /*hash_value*/) {} |
69 | size_t getHash(const Hash & hash) const { return hash(value.first); } |
70 | |
71 | bool isZero(const State & state) const { return isZero(value.first, state); } |
72 | static bool isZero(const Key & key, const State & /*state*/) { return ZeroTraits::check(key); } |
73 | |
74 | /// Set the key value to zero. |
75 | void setZero() { ZeroTraits::set(value.first); } |
76 | |
77 | /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table). |
78 | static constexpr bool need_zero_value_storage = true; |
79 | |
80 | /// Whether the cell was deleted. |
81 | bool isDeleted() const { return false; } |
82 | |
83 | void setMapped(const value_type & value_) { value.second = value_.second; } |
84 | |
85 | /// Serialization, in binary and text form. |
86 | void write(DB::WriteBuffer & wb) const |
87 | { |
88 | DB::writeBinary(value.first, wb); |
89 | DB::writeBinary(value.second, wb); |
90 | } |
91 | |
92 | void writeText(DB::WriteBuffer & wb) const |
93 | { |
94 | DB::writeDoubleQuoted(value.first, wb); |
95 | DB::writeChar(',', wb); |
96 | DB::writeDoubleQuoted(value.second, wb); |
97 | } |
98 | |
99 | /// Deserialization, in binary and text form. |
100 | void read(DB::ReadBuffer & rb) |
101 | { |
102 | DB::readBinary(value.first, rb); |
103 | DB::readBinary(value.second, rb); |
104 | } |
105 | |
106 | void readText(DB::ReadBuffer & rb) |
107 | { |
108 | DB::readDoubleQuoted(value.first, rb); |
109 | DB::assertChar(',', rb); |
110 | DB::readDoubleQuoted(value.second, rb); |
111 | } |
112 | }; |
113 | |
114 | template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState> |
115 | struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState> |
116 | { |
117 | using Base = HashMapCell<Key, TMapped, Hash, TState>; |
118 | |
119 | size_t saved_hash; |
120 | |
121 | using Base::Base; |
122 | |
123 | bool keyEquals(const Key & key_) const { return this->value.first == key_; } |
124 | bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; } |
125 | bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); } |
126 | |
127 | void setHash(size_t hash_value) { saved_hash = hash_value; } |
128 | size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; } |
129 | }; |
130 | |
131 | template < |
132 | typename Key, |
133 | typename Cell, |
134 | typename Hash = DefaultHash<Key>, |
135 | typename Grower = HashTableGrower<>, |
136 | typename Allocator = HashTableAllocator> |
137 | class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator> |
138 | { |
139 | public: |
140 | using Self = HashMapTable; |
141 | using Base = HashTable<Key, Cell, Hash, Grower, Allocator>; |
142 | using LookupResult = typename Base::LookupResult; |
143 | |
144 | using Base::Base; |
145 | |
146 | /// Merge every cell's value of current map into the destination map via emplace. |
147 | /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). |
148 | /// Each filled cell in current map will invoke func once. If that map doesn't |
149 | /// have a key equals to the given cell, a new cell gets emplaced into that map, |
150 | /// and func is invoked with the third argument emplaced set to true. Otherwise |
151 | /// emplaced is set to false. |
152 | template <typename Func> |
153 | void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) |
154 | { |
155 | for (auto it = this->begin(), end = this->end(); it != end; ++it) |
156 | { |
157 | typename Self::LookupResult res_it; |
158 | bool inserted; |
159 | that.emplace(Cell::getKey(it->getValue()), res_it, inserted, it.getHash()); |
160 | func(res_it->getMapped(), it->getMapped(), inserted); |
161 | } |
162 | } |
163 | |
164 | /// Merge every cell's value of current map into the destination map via find. |
165 | /// Func should have signature void(Mapped & dst, Mapped & src, bool exist). |
166 | /// Each filled cell in current map will invoke func once. If that map doesn't |
167 | /// have a key equals to the given cell, func is invoked with the third argument |
168 | /// exist set to false. Otherwise exist is set to true. |
169 | template <typename Func> |
170 | void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) |
171 | { |
172 | for (auto it = this->begin(), end = this->end(); it != end; ++it) |
173 | { |
174 | auto res_it = that.find(Cell::getKey(it->getValue()), it.getHash()); |
175 | if (!res_it) |
176 | func(it->getMapped(), it->getMapped(), false); |
177 | else |
178 | func(res_it->getMapped(), it->getMapped(), true); |
179 | } |
180 | } |
181 | |
182 | /// Call func(const Key &, Mapped &) for each hash map element. |
183 | template <typename Func> |
184 | void forEachValue(Func && func) |
185 | { |
186 | for (auto & v : *this) |
187 | func(v.getKey(), v.getMapped()); |
188 | } |
189 | |
190 | /// Call func(Mapped &) for each hash map element. |
191 | template <typename Func> |
192 | void forEachMapped(Func && func) |
193 | { |
194 | for (auto & v : *this) |
195 | func(v.getMapped()); |
196 | } |
197 | |
198 | typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x) |
199 | { |
200 | LookupResult it; |
201 | bool inserted; |
202 | this->emplace(x, it, inserted); |
203 | |
204 | /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor), |
205 | * since the hash table memory is initially initialized with zeros. |
206 | * But, in fact, an empty cell may not be initialized with zeros in the following cases: |
207 | * - ZeroValueStorage (it only zeros the key); |
208 | * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero. |
209 | * |
210 | * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately |
211 | * after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization. |
212 | * |
213 | * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`. |
214 | * When we do the initialization, for new cells, it's enough to make `store 1` right away. |
215 | * And if we did not initialize, then even though there was zero in the cell, |
216 | * the compiler can not guess about this, and generates the `load`, `increment`, `store` code. |
217 | */ |
218 | if (inserted) |
219 | new (&it->getMapped()) typename Cell::Mapped(); |
220 | |
221 | return it->getMapped(); |
222 | } |
223 | }; |
224 | |
225 | |
226 | template < |
227 | typename Key, |
228 | typename Mapped, |
229 | typename Hash = DefaultHash<Key>, |
230 | typename Grower = HashTableGrower<>, |
231 | typename Allocator = HashTableAllocator> |
232 | using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>; |
233 | |
234 | |
235 | template < |
236 | typename Key, |
237 | typename Mapped, |
238 | typename Hash = DefaultHash<Key>, |
239 | typename Grower = HashTableGrower<>, |
240 | typename Allocator = HashTableAllocator> |
241 | using HashMapWithSavedHash = HashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator>; |
242 | |