1 | #pragma once |
2 | |
3 | #include <Common/HashTable/HashMap.h> |
4 | |
5 | |
6 | namespace DB |
7 | { |
8 | namespace ErrorCodes |
9 | { |
10 | extern const int INCORRECT_DATA; |
11 | } |
12 | } |
13 | |
14 | |
15 | /** Replacement of the hash table for a small number (<10) of keys. |
16 | * Implemented as an array with linear search. |
17 | * The array is located inside the object. |
18 | * The interface is a subset of the HashTable interface. |
19 | * |
20 | * Insert is possible only if the `full` method returns false. |
21 | * With an unknown number of different keys, |
22 | * you should check if the table is not full, |
23 | * and do a `fallback` in this case (for example, use a real hash table). |
24 | */ |
25 | template |
26 | < |
27 | typename Key, |
28 | typename Cell, |
29 | size_t capacity |
30 | > |
31 | class SmallTable : |
32 | private boost::noncopyable, |
33 | protected Cell::State |
34 | { |
35 | protected: |
36 | friend class const_iterator; |
37 | friend class iterator; |
38 | friend class Reader; |
39 | |
40 | using Self = SmallTable; |
41 | |
42 | size_t m_size = 0; /// Amount of elements. |
43 | Cell buf[capacity]; /// A piece of memory for all elements. |
44 | |
45 | |
46 | /// Find a cell with the same key or an empty cell, starting from the specified position and then by the collision resolution chain. |
47 | const Cell * ALWAYS_INLINE findCell(const Key & x) const |
48 | { |
49 | const Cell * it = buf; |
50 | while (it < buf + m_size) |
51 | { |
52 | if (it->keyEquals(x)) |
53 | break; |
54 | ++it; |
55 | } |
56 | return it; |
57 | } |
58 | |
59 | Cell * ALWAYS_INLINE findCell(const Key & x) |
60 | { |
61 | Cell * it = buf; |
62 | while (it < buf + m_size) |
63 | { |
64 | if (it->keyEquals(x)) |
65 | break; |
66 | ++it; |
67 | } |
68 | return it; |
69 | } |
70 | |
71 | |
72 | public: |
73 | using key_type = Key; |
74 | using mapped_type = typename Cell::mapped_type; |
75 | using value_type = typename Cell::value_type; |
76 | using cell_type = Cell; |
77 | |
78 | class Reader final : private Cell::State |
79 | { |
80 | public: |
81 | Reader(DB::ReadBuffer & in_) |
82 | : in(in_) |
83 | { |
84 | } |
85 | |
86 | Reader(const Reader &) = delete; |
87 | Reader & operator=(const Reader &) = delete; |
88 | |
89 | bool next() |
90 | { |
91 | if (!is_initialized) |
92 | { |
93 | Cell::State::read(in); |
94 | DB::readVarUInt(size, in); |
95 | |
96 | if (size > capacity) |
97 | throw DB::Exception("Illegal size" , DB::ErrorCodes::INCORRECT_DATA); |
98 | |
99 | is_initialized = true; |
100 | } |
101 | |
102 | if (read_count == size) |
103 | { |
104 | is_eof = true; |
105 | return false; |
106 | } |
107 | |
108 | cell.read(in); |
109 | ++read_count; |
110 | |
111 | return true; |
112 | } |
113 | |
114 | inline const value_type & get() const |
115 | { |
116 | if (!is_initialized || is_eof) |
117 | throw DB::Exception("No available data" , DB::ErrorCodes::NO_AVAILABLE_DATA); |
118 | |
119 | return cell.getValue(); |
120 | } |
121 | |
122 | private: |
123 | DB::ReadBuffer & in; |
124 | Cell cell; |
125 | size_t read_count = 0; |
126 | size_t size; |
127 | bool is_eof = false; |
128 | bool is_initialized = false; |
129 | }; |
130 | |
131 | class iterator |
132 | { |
133 | Self * container; |
134 | Cell * ptr; |
135 | |
136 | friend class SmallTable; |
137 | |
138 | public: |
139 | iterator() {} |
140 | iterator(Self * container_, Cell * ptr_) : container(container_), ptr(ptr_) {} |
141 | |
142 | bool operator== (const iterator & rhs) const { return ptr == rhs.ptr; } |
143 | bool operator!= (const iterator & rhs) const { return ptr != rhs.ptr; } |
144 | |
145 | iterator & operator++() |
146 | { |
147 | ++ptr; |
148 | return *this; |
149 | } |
150 | |
151 | Cell & operator* () const { return *ptr; } |
152 | Cell * operator->() const { return ptr; } |
153 | |
154 | Cell * getPtr() const { return ptr; } |
155 | }; |
156 | |
157 | |
158 | class const_iterator |
159 | { |
160 | const Self * container; |
161 | const Cell * ptr; |
162 | |
163 | friend class SmallTable; |
164 | |
165 | public: |
166 | const_iterator() {} |
167 | const_iterator(const Self * container_, const Cell * ptr_) : container(container_), ptr(ptr_) {} |
168 | const_iterator(const iterator & rhs) : container(rhs.container), ptr(rhs.ptr) {} |
169 | |
170 | bool operator== (const const_iterator & rhs) const { return ptr == rhs.ptr; } |
171 | bool operator!= (const const_iterator & rhs) const { return ptr != rhs.ptr; } |
172 | |
173 | const_iterator & operator++() |
174 | { |
175 | ++ptr; |
176 | return *this; |
177 | } |
178 | |
179 | const Cell & operator* () const { return *ptr; } |
180 | const Cell * operator->() const { return ptr; } |
181 | |
182 | const Cell * getPtr() const { return ptr; } |
183 | }; |
184 | |
185 | |
186 | const_iterator begin() const { return iteratorTo(buf); } |
187 | iterator begin() { return iteratorTo(buf); } |
188 | |
189 | const_iterator end() const { return iteratorTo(buf + m_size); } |
190 | iterator end() { return iteratorTo(buf + m_size); } |
191 | |
192 | |
193 | protected: |
194 | const_iterator iteratorTo(const Cell * ptr) const { return const_iterator(this, ptr); } |
195 | iterator iteratorTo(Cell * ptr) { return iterator(this, ptr); } |
196 | |
197 | |
198 | public: |
199 | /** The table is full. |
200 | * You can not insert anything into the full table. |
201 | */ |
202 | bool full() |
203 | { |
204 | return m_size == capacity; |
205 | } |
206 | |
207 | |
208 | /// Insert the value. In the case of any more complex values, it is better to use the `emplace` function. |
209 | std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x) |
210 | { |
211 | std::pair<iterator, bool> res; |
212 | |
213 | emplace(Cell::getKey(x), res.first, res.second); |
214 | |
215 | if (res.second) |
216 | res.first.ptr->setMapped(x); |
217 | |
218 | return res; |
219 | } |
220 | |
221 | |
222 | /** Insert the key, |
223 | * return an iterator to a position that can be used for `placement new` of value, |
224 | * as well as the flag - whether a new key was inserted. |
225 | * |
226 | * You have to make `placement new` of value if you inserted a new key, |
227 | * since when destroying a hash table, a destructor will be called for it! |
228 | * |
229 | * Example usage: |
230 | * |
231 | * Map::iterator it; |
232 | * bool inserted; |
233 | * map.emplace(key, it, inserted); |
234 | * if (inserted) |
235 | * new(&it->second) Mapped(value); |
236 | */ |
237 | void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted) |
238 | { |
239 | Cell * res = findCell(x); |
240 | it = iteratorTo(res); |
241 | inserted = res == buf + m_size; |
242 | if (inserted) |
243 | { |
244 | new(res) Cell(x, *this); |
245 | ++m_size; |
246 | } |
247 | } |
248 | |
249 | |
250 | /// Same, but return false if it's full. |
251 | bool ALWAYS_INLINE tryEmplace(Key x, iterator & it, bool & inserted) |
252 | { |
253 | Cell * res = findCell(x); |
254 | it = iteratorTo(res); |
255 | inserted = res == buf + m_size; |
256 | if (inserted) |
257 | { |
258 | if (res == buf + capacity) |
259 | return false; |
260 | |
261 | new(res) Cell(x, *this); |
262 | ++m_size; |
263 | } |
264 | return true; |
265 | } |
266 | |
267 | |
268 | /// Copy the cell from another hash table. It is assumed that there was no such key in the table yet. |
269 | void ALWAYS_INLINE insertUnique(const Cell * cell) |
270 | { |
271 | memcpy(&buf[m_size], cell, sizeof(*cell)); |
272 | ++m_size; |
273 | } |
274 | |
275 | void ALWAYS_INLINE insertUnique(Key x) |
276 | { |
277 | new(&buf[m_size]) Cell(x, *this); |
278 | ++m_size; |
279 | } |
280 | |
281 | |
282 | iterator ALWAYS_INLINE find(Key x) { return iteratorTo(findCell(x)); } |
283 | const_iterator ALWAYS_INLINE find(Key x) const { return iteratorTo(findCell(x)); } |
284 | |
285 | |
286 | void write(DB::WriteBuffer & wb) const |
287 | { |
288 | Cell::State::write(wb); |
289 | DB::writeVarUInt(m_size, wb); |
290 | |
291 | for (size_t i = 0; i < m_size; ++i) |
292 | buf[i].write(wb); |
293 | } |
294 | |
295 | void writeText(DB::WriteBuffer & wb) const |
296 | { |
297 | Cell::State::writeText(wb); |
298 | DB::writeText(m_size, wb); |
299 | |
300 | for (size_t i = 0; i < m_size; ++i) |
301 | { |
302 | DB::writeChar(',', wb); |
303 | buf[i].writeText(wb); |
304 | } |
305 | } |
306 | |
307 | void read(DB::ReadBuffer & rb) |
308 | { |
309 | Cell::State::read(rb); |
310 | |
311 | m_size = 0; |
312 | |
313 | size_t new_size = 0; |
314 | DB::readVarUInt(new_size, rb); |
315 | |
316 | if (new_size > capacity) |
317 | throw DB::Exception("Illegal size" , DB::ErrorCodes::INCORRECT_DATA); |
318 | |
319 | for (size_t i = 0; i < new_size; ++i) |
320 | buf[i].read(rb); |
321 | |
322 | m_size = new_size; |
323 | } |
324 | |
325 | void readText(DB::ReadBuffer & rb) |
326 | { |
327 | Cell::State::readText(rb); |
328 | |
329 | m_size = 0; |
330 | |
331 | size_t new_size = 0; |
332 | DB::readText(new_size, rb); |
333 | |
334 | if (new_size > capacity) |
335 | throw DB::Exception("Illegal size" , DB::ErrorCodes::INCORRECT_DATA); |
336 | |
337 | for (size_t i = 0; i < new_size; ++i) |
338 | { |
339 | DB::assertChar(',', rb); |
340 | buf[i].readText(rb); |
341 | } |
342 | |
343 | m_size = new_size; |
344 | } |
345 | |
346 | |
347 | size_t size() const |
348 | { |
349 | return m_size; |
350 | } |
351 | |
352 | bool empty() const |
353 | { |
354 | return 0 == m_size; |
355 | } |
356 | |
357 | void clear() |
358 | { |
359 | if (!std::is_trivially_destructible_v<Cell>) |
360 | for (iterator it = begin(); it != end(); ++it) |
361 | it.ptr->~Cell(); |
362 | |
363 | m_size = 0; |
364 | } |
365 | |
366 | size_t getBufferSizeInBytes() const |
367 | { |
368 | return sizeof(buf); |
369 | } |
370 | }; |
371 | |
372 | |
373 | struct HashUnused {}; |
374 | |
375 | |
376 | template |
377 | < |
378 | typename Key, |
379 | size_t capacity |
380 | > |
381 | using SmallSet = SmallTable<Key, HashTableCell<Key, HashUnused>, capacity>; |
382 | |
383 | |
384 | template |
385 | < |
386 | typename Key, |
387 | typename Cell, |
388 | size_t capacity |
389 | > |
390 | class SmallMapTable : public SmallTable<Key, Cell, capacity> |
391 | { |
392 | public: |
393 | using key_type = Key; |
394 | using mapped_type = typename Cell::mapped_type; |
395 | using value_type = typename Cell::value_type; |
396 | using cell_type = Cell; |
397 | |
398 | mapped_type & ALWAYS_INLINE operator[](Key x) |
399 | { |
400 | typename SmallMapTable::iterator it; |
401 | bool inserted; |
402 | this->emplace(x, it, inserted); |
403 | new (&it->getMapped()) mapped_type(); |
404 | return it->getMapped(); |
405 | } |
406 | }; |
407 | |
408 | |
409 | template |
410 | < |
411 | typename Key, |
412 | typename Mapped, |
413 | size_t capacity |
414 | > |
415 | using SmallMap = SmallMapTable<Key, HashMapCell<Key, Mapped, HashUnused>, capacity>; |
416 | |