1#pragma once
2
3#include <Common/HashTable/HashMap.h>
4
5
6namespace DB
7{
8 namespace ErrorCodes
9 {
10 extern const int INCORRECT_DATA;
11 }
12}
13
14
15/** Replacement of the hash table for a small number (<10) of keys.
16 * Implemented as an array with linear search.
17 * The array is located inside the object.
18 * The interface is a subset of the HashTable interface.
19 *
20 * Insert is possible only if the `full` method returns false.
21 * With an unknown number of different keys,
22 * you should check if the table is not full,
23 * and do a `fallback` in this case (for example, use a real hash table).
24 */
25template
26<
27 typename Key,
28 typename Cell,
29 size_t capacity
30>
31class SmallTable :
32 private boost::noncopyable,
33 protected Cell::State
34{
35protected:
36 friend class const_iterator;
37 friend class iterator;
38 friend class Reader;
39
40 using Self = SmallTable;
41
42 size_t m_size = 0; /// Amount of elements.
43 Cell buf[capacity]; /// A piece of memory for all elements.
44
45
46 /// Find a cell with the same key or an empty cell, starting from the specified position and then by the collision resolution chain.
47 const Cell * ALWAYS_INLINE findCell(const Key & x) const
48 {
49 const Cell * it = buf;
50 while (it < buf + m_size)
51 {
52 if (it->keyEquals(x))
53 break;
54 ++it;
55 }
56 return it;
57 }
58
59 Cell * ALWAYS_INLINE findCell(const Key & x)
60 {
61 Cell * it = buf;
62 while (it < buf + m_size)
63 {
64 if (it->keyEquals(x))
65 break;
66 ++it;
67 }
68 return it;
69 }
70
71
72public:
73 using key_type = Key;
74 using mapped_type = typename Cell::mapped_type;
75 using value_type = typename Cell::value_type;
76 using cell_type = Cell;
77
78 class Reader final : private Cell::State
79 {
80 public:
81 Reader(DB::ReadBuffer & in_)
82 : in(in_)
83 {
84 }
85
86 Reader(const Reader &) = delete;
87 Reader & operator=(const Reader &) = delete;
88
89 bool next()
90 {
91 if (!is_initialized)
92 {
93 Cell::State::read(in);
94 DB::readVarUInt(size, in);
95
96 if (size > capacity)
97 throw DB::Exception("Illegal size", DB::ErrorCodes::INCORRECT_DATA);
98
99 is_initialized = true;
100 }
101
102 if (read_count == size)
103 {
104 is_eof = true;
105 return false;
106 }
107
108 cell.read(in);
109 ++read_count;
110
111 return true;
112 }
113
114 inline const value_type & get() const
115 {
116 if (!is_initialized || is_eof)
117 throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA);
118
119 return cell.getValue();
120 }
121
122 private:
123 DB::ReadBuffer & in;
124 Cell cell;
125 size_t read_count = 0;
126 size_t size;
127 bool is_eof = false;
128 bool is_initialized = false;
129 };
130
131 class iterator
132 {
133 Self * container;
134 Cell * ptr;
135
136 friend class SmallTable;
137
138 public:
139 iterator() {}
140 iterator(Self * container_, Cell * ptr_) : container(container_), ptr(ptr_) {}
141
142 bool operator== (const iterator & rhs) const { return ptr == rhs.ptr; }
143 bool operator!= (const iterator & rhs) const { return ptr != rhs.ptr; }
144
145 iterator & operator++()
146 {
147 ++ptr;
148 return *this;
149 }
150
151 Cell & operator* () const { return *ptr; }
152 Cell * operator->() const { return ptr; }
153
154 Cell * getPtr() const { return ptr; }
155 };
156
157
158 class const_iterator
159 {
160 const Self * container;
161 const Cell * ptr;
162
163 friend class SmallTable;
164
165 public:
166 const_iterator() {}
167 const_iterator(const Self * container_, const Cell * ptr_) : container(container_), ptr(ptr_) {}
168 const_iterator(const iterator & rhs) : container(rhs.container), ptr(rhs.ptr) {}
169
170 bool operator== (const const_iterator & rhs) const { return ptr == rhs.ptr; }
171 bool operator!= (const const_iterator & rhs) const { return ptr != rhs.ptr; }
172
173 const_iterator & operator++()
174 {
175 ++ptr;
176 return *this;
177 }
178
179 const Cell & operator* () const { return *ptr; }
180 const Cell * operator->() const { return ptr; }
181
182 const Cell * getPtr() const { return ptr; }
183 };
184
185
186 const_iterator begin() const { return iteratorTo(buf); }
187 iterator begin() { return iteratorTo(buf); }
188
189 const_iterator end() const { return iteratorTo(buf + m_size); }
190 iterator end() { return iteratorTo(buf + m_size); }
191
192
193protected:
194 const_iterator iteratorTo(const Cell * ptr) const { return const_iterator(this, ptr); }
195 iterator iteratorTo(Cell * ptr) { return iterator(this, ptr); }
196
197
198public:
199 /** The table is full.
200 * You can not insert anything into the full table.
201 */
202 bool full()
203 {
204 return m_size == capacity;
205 }
206
207
208 /// Insert the value. In the case of any more complex values, it is better to use the `emplace` function.
209 std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
210 {
211 std::pair<iterator, bool> res;
212
213 emplace(Cell::getKey(x), res.first, res.second);
214
215 if (res.second)
216 res.first.ptr->setMapped(x);
217
218 return res;
219 }
220
221
222 /** Insert the key,
223 * return an iterator to a position that can be used for `placement new` of value,
224 * as well as the flag - whether a new key was inserted.
225 *
226 * You have to make `placement new` of value if you inserted a new key,
227 * since when destroying a hash table, a destructor will be called for it!
228 *
229 * Example usage:
230 *
231 * Map::iterator it;
232 * bool inserted;
233 * map.emplace(key, it, inserted);
234 * if (inserted)
235 * new(&it->second) Mapped(value);
236 */
237 void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted)
238 {
239 Cell * res = findCell(x);
240 it = iteratorTo(res);
241 inserted = res == buf + m_size;
242 if (inserted)
243 {
244 new(res) Cell(x, *this);
245 ++m_size;
246 }
247 }
248
249
250 /// Same, but return false if it's full.
251 bool ALWAYS_INLINE tryEmplace(Key x, iterator & it, bool & inserted)
252 {
253 Cell * res = findCell(x);
254 it = iteratorTo(res);
255 inserted = res == buf + m_size;
256 if (inserted)
257 {
258 if (res == buf + capacity)
259 return false;
260
261 new(res) Cell(x, *this);
262 ++m_size;
263 }
264 return true;
265 }
266
267
268 /// Copy the cell from another hash table. It is assumed that there was no such key in the table yet.
269 void ALWAYS_INLINE insertUnique(const Cell * cell)
270 {
271 memcpy(&buf[m_size], cell, sizeof(*cell));
272 ++m_size;
273 }
274
275 void ALWAYS_INLINE insertUnique(Key x)
276 {
277 new(&buf[m_size]) Cell(x, *this);
278 ++m_size;
279 }
280
281
282 iterator ALWAYS_INLINE find(Key x) { return iteratorTo(findCell(x)); }
283 const_iterator ALWAYS_INLINE find(Key x) const { return iteratorTo(findCell(x)); }
284
285
286 void write(DB::WriteBuffer & wb) const
287 {
288 Cell::State::write(wb);
289 DB::writeVarUInt(m_size, wb);
290
291 for (size_t i = 0; i < m_size; ++i)
292 buf[i].write(wb);
293 }
294
295 void writeText(DB::WriteBuffer & wb) const
296 {
297 Cell::State::writeText(wb);
298 DB::writeText(m_size, wb);
299
300 for (size_t i = 0; i < m_size; ++i)
301 {
302 DB::writeChar(',', wb);
303 buf[i].writeText(wb);
304 }
305 }
306
307 void read(DB::ReadBuffer & rb)
308 {
309 Cell::State::read(rb);
310
311 m_size = 0;
312
313 size_t new_size = 0;
314 DB::readVarUInt(new_size, rb);
315
316 if (new_size > capacity)
317 throw DB::Exception("Illegal size", DB::ErrorCodes::INCORRECT_DATA);
318
319 for (size_t i = 0; i < new_size; ++i)
320 buf[i].read(rb);
321
322 m_size = new_size;
323 }
324
325 void readText(DB::ReadBuffer & rb)
326 {
327 Cell::State::readText(rb);
328
329 m_size = 0;
330
331 size_t new_size = 0;
332 DB::readText(new_size, rb);
333
334 if (new_size > capacity)
335 throw DB::Exception("Illegal size", DB::ErrorCodes::INCORRECT_DATA);
336
337 for (size_t i = 0; i < new_size; ++i)
338 {
339 DB::assertChar(',', rb);
340 buf[i].readText(rb);
341 }
342
343 m_size = new_size;
344 }
345
346
347 size_t size() const
348 {
349 return m_size;
350 }
351
352 bool empty() const
353 {
354 return 0 == m_size;
355 }
356
357 void clear()
358 {
359 if (!std::is_trivially_destructible_v<Cell>)
360 for (iterator it = begin(); it != end(); ++it)
361 it.ptr->~Cell();
362
363 m_size = 0;
364 }
365
366 size_t getBufferSizeInBytes() const
367 {
368 return sizeof(buf);
369 }
370};
371
372
373struct HashUnused {};
374
375
376template
377<
378 typename Key,
379 size_t capacity
380>
381using SmallSet = SmallTable<Key, HashTableCell<Key, HashUnused>, capacity>;
382
383
384template
385<
386 typename Key,
387 typename Cell,
388 size_t capacity
389>
390class SmallMapTable : public SmallTable<Key, Cell, capacity>
391{
392public:
393 using key_type = Key;
394 using mapped_type = typename Cell::mapped_type;
395 using value_type = typename Cell::value_type;
396 using cell_type = Cell;
397
398 mapped_type & ALWAYS_INLINE operator[](Key x)
399 {
400 typename SmallMapTable::iterator it;
401 bool inserted;
402 this->emplace(x, it, inserted);
403 new (&it->getMapped()) mapped_type();
404 return it->getMapped();
405 }
406};
407
408
409template
410<
411 typename Key,
412 typename Mapped,
413 size_t capacity
414>
415using SmallMap = SmallMapTable<Key, HashMapCell<Key, Mapped, HashUnused>, capacity>;
416