| 1 | #pragma once |
| 2 | |
| 3 | #include <Common/HashTable/Hash.h> |
| 4 | #include <Common/HashTable/HashTable.h> |
| 5 | #include <Common/HashTable/HashTableAllocator.h> |
| 6 | |
| 7 | |
| 8 | /** NOTE HashMap could only be used for memmoveable (position independent) types. |
| 9 | * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. |
| 10 | * Also, key in hash table must be of type, that zero bytes is compared equals to zero key. |
| 11 | */ |
| 12 | |
| 13 | |
| 14 | struct NoInitTag |
| 15 | { |
| 16 | }; |
| 17 | |
| 18 | /// A pair that does not initialize the elements, if not needed. |
| 19 | template <typename First, typename Second> |
| 20 | struct PairNoInit |
| 21 | { |
| 22 | First first; |
| 23 | Second second; |
| 24 | |
| 25 | PairNoInit() {} |
| 26 | |
| 27 | template <typename First_> |
| 28 | PairNoInit(First_ && first_, NoInitTag) : first(std::forward<First_>(first_)) |
| 29 | { |
| 30 | } |
| 31 | |
| 32 | template <typename First_, typename Second_> |
| 33 | PairNoInit(First_ && first_, Second_ && second_) : first(std::forward<First_>(first_)), second(std::forward<Second_>(second_)) |
| 34 | { |
| 35 | } |
| 36 | }; |
| 37 | |
| 38 | |
| 39 | template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState> |
| 40 | struct HashMapCell |
| 41 | { |
| 42 | using Mapped = TMapped; |
| 43 | using State = TState; |
| 44 | |
| 45 | using value_type = PairNoInit<Key, Mapped>; |
| 46 | using mapped_type = Mapped; |
| 47 | using key_type = Key; |
| 48 | |
| 49 | value_type value; |
| 50 | |
| 51 | HashMapCell() {} |
| 52 | HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {} |
| 53 | HashMapCell(const value_type & value_, const State &) : value(value_) {} |
| 54 | |
| 55 | /// Get the key (externally). |
| 56 | const Key & getKey() const { return value.first; } |
| 57 | Mapped & getMapped() { return value.second; } |
| 58 | const Mapped & getMapped() const { return value.second; } |
| 59 | const value_type & getValue() const { return value; } |
| 60 | |
| 61 | /// Get the key (internally). |
| 62 | static const Key & getKey(const value_type & value) { return value.first; } |
| 63 | |
| 64 | bool keyEquals(const Key & key_) const { return value.first == key_; } |
| 65 | bool keyEquals(const Key & key_, size_t /*hash_*/) const { return value.first == key_; } |
| 66 | bool keyEquals(const Key & key_, size_t /*hash_*/, const State & /*state*/) const { return value.first == key_; } |
| 67 | |
| 68 | void setHash(size_t /*hash_value*/) {} |
| 69 | size_t getHash(const Hash & hash) const { return hash(value.first); } |
| 70 | |
| 71 | bool isZero(const State & state) const { return isZero(value.first, state); } |
| 72 | static bool isZero(const Key & key, const State & /*state*/) { return ZeroTraits::check(key); } |
| 73 | |
| 74 | /// Set the key value to zero. |
| 75 | void setZero() { ZeroTraits::set(value.first); } |
| 76 | |
| 77 | /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table). |
| 78 | static constexpr bool need_zero_value_storage = true; |
| 79 | |
| 80 | /// Whether the cell was deleted. |
| 81 | bool isDeleted() const { return false; } |
| 82 | |
| 83 | void setMapped(const value_type & value_) { value.second = value_.second; } |
| 84 | |
| 85 | /// Serialization, in binary and text form. |
| 86 | void write(DB::WriteBuffer & wb) const |
| 87 | { |
| 88 | DB::writeBinary(value.first, wb); |
| 89 | DB::writeBinary(value.second, wb); |
| 90 | } |
| 91 | |
| 92 | void writeText(DB::WriteBuffer & wb) const |
| 93 | { |
| 94 | DB::writeDoubleQuoted(value.first, wb); |
| 95 | DB::writeChar(',', wb); |
| 96 | DB::writeDoubleQuoted(value.second, wb); |
| 97 | } |
| 98 | |
| 99 | /// Deserialization, in binary and text form. |
| 100 | void read(DB::ReadBuffer & rb) |
| 101 | { |
| 102 | DB::readBinary(value.first, rb); |
| 103 | DB::readBinary(value.second, rb); |
| 104 | } |
| 105 | |
| 106 | void readText(DB::ReadBuffer & rb) |
| 107 | { |
| 108 | DB::readDoubleQuoted(value.first, rb); |
| 109 | DB::assertChar(',', rb); |
| 110 | DB::readDoubleQuoted(value.second, rb); |
| 111 | } |
| 112 | }; |
| 113 | |
| 114 | template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState> |
| 115 | struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState> |
| 116 | { |
| 117 | using Base = HashMapCell<Key, TMapped, Hash, TState>; |
| 118 | |
| 119 | size_t saved_hash; |
| 120 | |
| 121 | using Base::Base; |
| 122 | |
| 123 | bool keyEquals(const Key & key_) const { return this->value.first == key_; } |
| 124 | bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; } |
| 125 | bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); } |
| 126 | |
| 127 | void setHash(size_t hash_value) { saved_hash = hash_value; } |
| 128 | size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; } |
| 129 | }; |
| 130 | |
| 131 | template < |
| 132 | typename Key, |
| 133 | typename Cell, |
| 134 | typename Hash = DefaultHash<Key>, |
| 135 | typename Grower = HashTableGrower<>, |
| 136 | typename Allocator = HashTableAllocator> |
| 137 | class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator> |
| 138 | { |
| 139 | public: |
| 140 | using Self = HashMapTable; |
| 141 | using Base = HashTable<Key, Cell, Hash, Grower, Allocator>; |
| 142 | using LookupResult = typename Base::LookupResult; |
| 143 | |
| 144 | using Base::Base; |
| 145 | |
| 146 | /// Merge every cell's value of current map into the destination map via emplace. |
| 147 | /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). |
| 148 | /// Each filled cell in current map will invoke func once. If that map doesn't |
| 149 | /// have a key equals to the given cell, a new cell gets emplaced into that map, |
| 150 | /// and func is invoked with the third argument emplaced set to true. Otherwise |
| 151 | /// emplaced is set to false. |
| 152 | template <typename Func> |
| 153 | void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) |
| 154 | { |
| 155 | for (auto it = this->begin(), end = this->end(); it != end; ++it) |
| 156 | { |
| 157 | typename Self::LookupResult res_it; |
| 158 | bool inserted; |
| 159 | that.emplace(Cell::getKey(it->getValue()), res_it, inserted, it.getHash()); |
| 160 | func(res_it->getMapped(), it->getMapped(), inserted); |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | /// Merge every cell's value of current map into the destination map via find. |
| 165 | /// Func should have signature void(Mapped & dst, Mapped & src, bool exist). |
| 166 | /// Each filled cell in current map will invoke func once. If that map doesn't |
| 167 | /// have a key equals to the given cell, func is invoked with the third argument |
| 168 | /// exist set to false. Otherwise exist is set to true. |
| 169 | template <typename Func> |
| 170 | void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) |
| 171 | { |
| 172 | for (auto it = this->begin(), end = this->end(); it != end; ++it) |
| 173 | { |
| 174 | auto res_it = that.find(Cell::getKey(it->getValue()), it.getHash()); |
| 175 | if (!res_it) |
| 176 | func(it->getMapped(), it->getMapped(), false); |
| 177 | else |
| 178 | func(res_it->getMapped(), it->getMapped(), true); |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | /// Call func(const Key &, Mapped &) for each hash map element. |
| 183 | template <typename Func> |
| 184 | void forEachValue(Func && func) |
| 185 | { |
| 186 | for (auto & v : *this) |
| 187 | func(v.getKey(), v.getMapped()); |
| 188 | } |
| 189 | |
| 190 | /// Call func(Mapped &) for each hash map element. |
| 191 | template <typename Func> |
| 192 | void forEachMapped(Func && func) |
| 193 | { |
| 194 | for (auto & v : *this) |
| 195 | func(v.getMapped()); |
| 196 | } |
| 197 | |
| 198 | typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x) |
| 199 | { |
| 200 | LookupResult it; |
| 201 | bool inserted; |
| 202 | this->emplace(x, it, inserted); |
| 203 | |
| 204 | /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor), |
| 205 | * since the hash table memory is initially initialized with zeros. |
| 206 | * But, in fact, an empty cell may not be initialized with zeros in the following cases: |
| 207 | * - ZeroValueStorage (it only zeros the key); |
| 208 | * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero. |
| 209 | * |
| 210 | * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately |
| 211 | * after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization. |
| 212 | * |
| 213 | * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`. |
| 214 | * When we do the initialization, for new cells, it's enough to make `store 1` right away. |
| 215 | * And if we did not initialize, then even though there was zero in the cell, |
| 216 | * the compiler can not guess about this, and generates the `load`, `increment`, `store` code. |
| 217 | */ |
| 218 | if (inserted) |
| 219 | new (&it->getMapped()) typename Cell::Mapped(); |
| 220 | |
| 221 | return it->getMapped(); |
| 222 | } |
| 223 | }; |
| 224 | |
| 225 | |
| 226 | template < |
| 227 | typename Key, |
| 228 | typename Mapped, |
| 229 | typename Hash = DefaultHash<Key>, |
| 230 | typename Grower = HashTableGrower<>, |
| 231 | typename Allocator = HashTableAllocator> |
| 232 | using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>; |
| 233 | |
| 234 | |
| 235 | template < |
| 236 | typename Key, |
| 237 | typename Mapped, |
| 238 | typename Hash = DefaultHash<Key>, |
| 239 | typename Grower = HashTableGrower<>, |
| 240 | typename Allocator = HashTableAllocator> |
| 241 | using HashMapWithSavedHash = HashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator>; |
| 242 | |