| 1 | #pragma once |
| 2 | |
| 3 | #include <Common/HashTable/HashMap.h> |
| 4 | |
| 5 | |
| 6 | namespace DB |
| 7 | { |
| 8 | namespace ErrorCodes |
| 9 | { |
| 10 | extern const int INCORRECT_DATA; |
| 11 | } |
| 12 | } |
| 13 | |
| 14 | |
| 15 | /** Replacement of the hash table for a small number (<10) of keys. |
| 16 | * Implemented as an array with linear search. |
| 17 | * The array is located inside the object. |
| 18 | * The interface is a subset of the HashTable interface. |
| 19 | * |
| 20 | * Insert is possible only if the `full` method returns false. |
| 21 | * With an unknown number of different keys, |
| 22 | * you should check if the table is not full, |
| 23 | * and do a `fallback` in this case (for example, use a real hash table). |
| 24 | */ |
| 25 | template |
| 26 | < |
| 27 | typename Key, |
| 28 | typename Cell, |
| 29 | size_t capacity |
| 30 | > |
| 31 | class SmallTable : |
| 32 | private boost::noncopyable, |
| 33 | protected Cell::State |
| 34 | { |
| 35 | protected: |
| 36 | friend class const_iterator; |
| 37 | friend class iterator; |
| 38 | friend class Reader; |
| 39 | |
| 40 | using Self = SmallTable; |
| 41 | |
| 42 | size_t m_size = 0; /// Amount of elements. |
| 43 | Cell buf[capacity]; /// A piece of memory for all elements. |
| 44 | |
| 45 | |
| 46 | /// Find a cell with the same key or an empty cell, starting from the specified position and then by the collision resolution chain. |
| 47 | const Cell * ALWAYS_INLINE findCell(const Key & x) const |
| 48 | { |
| 49 | const Cell * it = buf; |
| 50 | while (it < buf + m_size) |
| 51 | { |
| 52 | if (it->keyEquals(x)) |
| 53 | break; |
| 54 | ++it; |
| 55 | } |
| 56 | return it; |
| 57 | } |
| 58 | |
| 59 | Cell * ALWAYS_INLINE findCell(const Key & x) |
| 60 | { |
| 61 | Cell * it = buf; |
| 62 | while (it < buf + m_size) |
| 63 | { |
| 64 | if (it->keyEquals(x)) |
| 65 | break; |
| 66 | ++it; |
| 67 | } |
| 68 | return it; |
| 69 | } |
| 70 | |
| 71 | |
| 72 | public: |
| 73 | using key_type = Key; |
| 74 | using mapped_type = typename Cell::mapped_type; |
| 75 | using value_type = typename Cell::value_type; |
| 76 | using cell_type = Cell; |
| 77 | |
| 78 | class Reader final : private Cell::State |
| 79 | { |
| 80 | public: |
| 81 | Reader(DB::ReadBuffer & in_) |
| 82 | : in(in_) |
| 83 | { |
| 84 | } |
| 85 | |
| 86 | Reader(const Reader &) = delete; |
| 87 | Reader & operator=(const Reader &) = delete; |
| 88 | |
| 89 | bool next() |
| 90 | { |
| 91 | if (!is_initialized) |
| 92 | { |
| 93 | Cell::State::read(in); |
| 94 | DB::readVarUInt(size, in); |
| 95 | |
| 96 | if (size > capacity) |
| 97 | throw DB::Exception("Illegal size" , DB::ErrorCodes::INCORRECT_DATA); |
| 98 | |
| 99 | is_initialized = true; |
| 100 | } |
| 101 | |
| 102 | if (read_count == size) |
| 103 | { |
| 104 | is_eof = true; |
| 105 | return false; |
| 106 | } |
| 107 | |
| 108 | cell.read(in); |
| 109 | ++read_count; |
| 110 | |
| 111 | return true; |
| 112 | } |
| 113 | |
| 114 | inline const value_type & get() const |
| 115 | { |
| 116 | if (!is_initialized || is_eof) |
| 117 | throw DB::Exception("No available data" , DB::ErrorCodes::NO_AVAILABLE_DATA); |
| 118 | |
| 119 | return cell.getValue(); |
| 120 | } |
| 121 | |
| 122 | private: |
| 123 | DB::ReadBuffer & in; |
| 124 | Cell cell; |
| 125 | size_t read_count = 0; |
| 126 | size_t size; |
| 127 | bool is_eof = false; |
| 128 | bool is_initialized = false; |
| 129 | }; |
| 130 | |
| 131 | class iterator |
| 132 | { |
| 133 | Self * container; |
| 134 | Cell * ptr; |
| 135 | |
| 136 | friend class SmallTable; |
| 137 | |
| 138 | public: |
| 139 | iterator() {} |
| 140 | iterator(Self * container_, Cell * ptr_) : container(container_), ptr(ptr_) {} |
| 141 | |
| 142 | bool operator== (const iterator & rhs) const { return ptr == rhs.ptr; } |
| 143 | bool operator!= (const iterator & rhs) const { return ptr != rhs.ptr; } |
| 144 | |
| 145 | iterator & operator++() |
| 146 | { |
| 147 | ++ptr; |
| 148 | return *this; |
| 149 | } |
| 150 | |
| 151 | Cell & operator* () const { return *ptr; } |
| 152 | Cell * operator->() const { return ptr; } |
| 153 | |
| 154 | Cell * getPtr() const { return ptr; } |
| 155 | }; |
| 156 | |
| 157 | |
| 158 | class const_iterator |
| 159 | { |
| 160 | const Self * container; |
| 161 | const Cell * ptr; |
| 162 | |
| 163 | friend class SmallTable; |
| 164 | |
| 165 | public: |
| 166 | const_iterator() {} |
| 167 | const_iterator(const Self * container_, const Cell * ptr_) : container(container_), ptr(ptr_) {} |
| 168 | const_iterator(const iterator & rhs) : container(rhs.container), ptr(rhs.ptr) {} |
| 169 | |
| 170 | bool operator== (const const_iterator & rhs) const { return ptr == rhs.ptr; } |
| 171 | bool operator!= (const const_iterator & rhs) const { return ptr != rhs.ptr; } |
| 172 | |
| 173 | const_iterator & operator++() |
| 174 | { |
| 175 | ++ptr; |
| 176 | return *this; |
| 177 | } |
| 178 | |
| 179 | const Cell & operator* () const { return *ptr; } |
| 180 | const Cell * operator->() const { return ptr; } |
| 181 | |
| 182 | const Cell * getPtr() const { return ptr; } |
| 183 | }; |
| 184 | |
| 185 | |
| 186 | const_iterator begin() const { return iteratorTo(buf); } |
| 187 | iterator begin() { return iteratorTo(buf); } |
| 188 | |
| 189 | const_iterator end() const { return iteratorTo(buf + m_size); } |
| 190 | iterator end() { return iteratorTo(buf + m_size); } |
| 191 | |
| 192 | |
| 193 | protected: |
| 194 | const_iterator iteratorTo(const Cell * ptr) const { return const_iterator(this, ptr); } |
| 195 | iterator iteratorTo(Cell * ptr) { return iterator(this, ptr); } |
| 196 | |
| 197 | |
| 198 | public: |
| 199 | /** The table is full. |
| 200 | * You can not insert anything into the full table. |
| 201 | */ |
| 202 | bool full() |
| 203 | { |
| 204 | return m_size == capacity; |
| 205 | } |
| 206 | |
| 207 | |
| 208 | /// Insert the value. In the case of any more complex values, it is better to use the `emplace` function. |
| 209 | std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x) |
| 210 | { |
| 211 | std::pair<iterator, bool> res; |
| 212 | |
| 213 | emplace(Cell::getKey(x), res.first, res.second); |
| 214 | |
| 215 | if (res.second) |
| 216 | res.first.ptr->setMapped(x); |
| 217 | |
| 218 | return res; |
| 219 | } |
| 220 | |
| 221 | |
| 222 | /** Insert the key, |
| 223 | * return an iterator to a position that can be used for `placement new` of value, |
| 224 | * as well as the flag - whether a new key was inserted. |
| 225 | * |
| 226 | * You have to make `placement new` of value if you inserted a new key, |
| 227 | * since when destroying a hash table, a destructor will be called for it! |
| 228 | * |
| 229 | * Example usage: |
| 230 | * |
| 231 | * Map::iterator it; |
| 232 | * bool inserted; |
| 233 | * map.emplace(key, it, inserted); |
| 234 | * if (inserted) |
| 235 | * new(&it->second) Mapped(value); |
| 236 | */ |
| 237 | void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted) |
| 238 | { |
| 239 | Cell * res = findCell(x); |
| 240 | it = iteratorTo(res); |
| 241 | inserted = res == buf + m_size; |
| 242 | if (inserted) |
| 243 | { |
| 244 | new(res) Cell(x, *this); |
| 245 | ++m_size; |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | |
| 250 | /// Same, but return false if it's full. |
| 251 | bool ALWAYS_INLINE tryEmplace(Key x, iterator & it, bool & inserted) |
| 252 | { |
| 253 | Cell * res = findCell(x); |
| 254 | it = iteratorTo(res); |
| 255 | inserted = res == buf + m_size; |
| 256 | if (inserted) |
| 257 | { |
| 258 | if (res == buf + capacity) |
| 259 | return false; |
| 260 | |
| 261 | new(res) Cell(x, *this); |
| 262 | ++m_size; |
| 263 | } |
| 264 | return true; |
| 265 | } |
| 266 | |
| 267 | |
| 268 | /// Copy the cell from another hash table. It is assumed that there was no such key in the table yet. |
| 269 | void ALWAYS_INLINE insertUnique(const Cell * cell) |
| 270 | { |
| 271 | memcpy(&buf[m_size], cell, sizeof(*cell)); |
| 272 | ++m_size; |
| 273 | } |
| 274 | |
| 275 | void ALWAYS_INLINE insertUnique(Key x) |
| 276 | { |
| 277 | new(&buf[m_size]) Cell(x, *this); |
| 278 | ++m_size; |
| 279 | } |
| 280 | |
| 281 | |
| 282 | iterator ALWAYS_INLINE find(Key x) { return iteratorTo(findCell(x)); } |
| 283 | const_iterator ALWAYS_INLINE find(Key x) const { return iteratorTo(findCell(x)); } |
| 284 | |
| 285 | |
| 286 | void write(DB::WriteBuffer & wb) const |
| 287 | { |
| 288 | Cell::State::write(wb); |
| 289 | DB::writeVarUInt(m_size, wb); |
| 290 | |
| 291 | for (size_t i = 0; i < m_size; ++i) |
| 292 | buf[i].write(wb); |
| 293 | } |
| 294 | |
| 295 | void writeText(DB::WriteBuffer & wb) const |
| 296 | { |
| 297 | Cell::State::writeText(wb); |
| 298 | DB::writeText(m_size, wb); |
| 299 | |
| 300 | for (size_t i = 0; i < m_size; ++i) |
| 301 | { |
| 302 | DB::writeChar(',', wb); |
| 303 | buf[i].writeText(wb); |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | void read(DB::ReadBuffer & rb) |
| 308 | { |
| 309 | Cell::State::read(rb); |
| 310 | |
| 311 | m_size = 0; |
| 312 | |
| 313 | size_t new_size = 0; |
| 314 | DB::readVarUInt(new_size, rb); |
| 315 | |
| 316 | if (new_size > capacity) |
| 317 | throw DB::Exception("Illegal size" , DB::ErrorCodes::INCORRECT_DATA); |
| 318 | |
| 319 | for (size_t i = 0; i < new_size; ++i) |
| 320 | buf[i].read(rb); |
| 321 | |
| 322 | m_size = new_size; |
| 323 | } |
| 324 | |
| 325 | void readText(DB::ReadBuffer & rb) |
| 326 | { |
| 327 | Cell::State::readText(rb); |
| 328 | |
| 329 | m_size = 0; |
| 330 | |
| 331 | size_t new_size = 0; |
| 332 | DB::readText(new_size, rb); |
| 333 | |
| 334 | if (new_size > capacity) |
| 335 | throw DB::Exception("Illegal size" , DB::ErrorCodes::INCORRECT_DATA); |
| 336 | |
| 337 | for (size_t i = 0; i < new_size; ++i) |
| 338 | { |
| 339 | DB::assertChar(',', rb); |
| 340 | buf[i].readText(rb); |
| 341 | } |
| 342 | |
| 343 | m_size = new_size; |
| 344 | } |
| 345 | |
| 346 | |
| 347 | size_t size() const |
| 348 | { |
| 349 | return m_size; |
| 350 | } |
| 351 | |
| 352 | bool empty() const |
| 353 | { |
| 354 | return 0 == m_size; |
| 355 | } |
| 356 | |
| 357 | void clear() |
| 358 | { |
| 359 | if (!std::is_trivially_destructible_v<Cell>) |
| 360 | for (iterator it = begin(); it != end(); ++it) |
| 361 | it.ptr->~Cell(); |
| 362 | |
| 363 | m_size = 0; |
| 364 | } |
| 365 | |
| 366 | size_t getBufferSizeInBytes() const |
| 367 | { |
| 368 | return sizeof(buf); |
| 369 | } |
| 370 | }; |
| 371 | |
| 372 | |
| 373 | struct HashUnused {}; |
| 374 | |
| 375 | |
| 376 | template |
| 377 | < |
| 378 | typename Key, |
| 379 | size_t capacity |
| 380 | > |
| 381 | using SmallSet = SmallTable<Key, HashTableCell<Key, HashUnused>, capacity>; |
| 382 | |
| 383 | |
| 384 | template |
| 385 | < |
| 386 | typename Key, |
| 387 | typename Cell, |
| 388 | size_t capacity |
| 389 | > |
| 390 | class SmallMapTable : public SmallTable<Key, Cell, capacity> |
| 391 | { |
| 392 | public: |
| 393 | using key_type = Key; |
| 394 | using mapped_type = typename Cell::mapped_type; |
| 395 | using value_type = typename Cell::value_type; |
| 396 | using cell_type = Cell; |
| 397 | |
| 398 | mapped_type & ALWAYS_INLINE operator[](Key x) |
| 399 | { |
| 400 | typename SmallMapTable::iterator it; |
| 401 | bool inserted; |
| 402 | this->emplace(x, it, inserted); |
| 403 | new (&it->getMapped()) mapped_type(); |
| 404 | return it->getMapped(); |
| 405 | } |
| 406 | }; |
| 407 | |
| 408 | |
| 409 | template |
| 410 | < |
| 411 | typename Key, |
| 412 | typename Mapped, |
| 413 | size_t capacity |
| 414 | > |
| 415 | using SmallMap = SmallMapTable<Key, HashMapCell<Key, Mapped, HashUnused>, capacity>; |
| 416 | |