| 1 | #pragma once | 
|---|
| 2 |  | 
|---|
| 3 | #include <Common/HashTable/Hash.h> | 
|---|
| 4 | #include <Common/HashTable/HashTable.h> | 
|---|
| 5 | #include <Common/HashTable/HashTableAllocator.h> | 
|---|
| 6 |  | 
|---|
| 7 |  | 
|---|
| 8 | /** NOTE HashMap could only be used for memmoveable (position independent) types. | 
|---|
| 9 | * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. | 
|---|
| 10 | * Also, key in hash table must be of type, that zero bytes is compared equals to zero key. | 
|---|
| 11 | */ | 
|---|
| 12 |  | 
|---|
| 13 |  | 
|---|
| 14 | struct NoInitTag | 
|---|
| 15 | { | 
|---|
| 16 | }; | 
|---|
| 17 |  | 
|---|
| 18 | /// A pair that does not initialize the elements, if not needed. | 
|---|
| 19 | template <typename First, typename Second> | 
|---|
| 20 | struct PairNoInit | 
|---|
| 21 | { | 
|---|
| 22 | First first; | 
|---|
| 23 | Second second; | 
|---|
| 24 |  | 
|---|
| 25 | PairNoInit() {} | 
|---|
| 26 |  | 
|---|
| 27 | template <typename First_> | 
|---|
| 28 | PairNoInit(First_ && first_, NoInitTag) : first(std::forward<First_>(first_)) | 
|---|
| 29 | { | 
|---|
| 30 | } | 
|---|
| 31 |  | 
|---|
| 32 | template <typename First_, typename Second_> | 
|---|
| 33 | PairNoInit(First_ && first_, Second_ && second_) : first(std::forward<First_>(first_)), second(std::forward<Second_>(second_)) | 
|---|
| 34 | { | 
|---|
| 35 | } | 
|---|
| 36 | }; | 
|---|
| 37 |  | 
|---|
| 38 |  | 
|---|
| 39 | template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState> | 
|---|
| 40 | struct HashMapCell | 
|---|
| 41 | { | 
|---|
| 42 | using Mapped = TMapped; | 
|---|
| 43 | using State = TState; | 
|---|
| 44 |  | 
|---|
| 45 | using value_type = PairNoInit<Key, Mapped>; | 
|---|
| 46 | using mapped_type = Mapped; | 
|---|
| 47 | using key_type = Key; | 
|---|
| 48 |  | 
|---|
| 49 | value_type value; | 
|---|
| 50 |  | 
|---|
| 51 | HashMapCell() {} | 
|---|
| 52 | HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {} | 
|---|
| 53 | HashMapCell(const value_type & value_, const State &) : value(value_) {} | 
|---|
| 54 |  | 
|---|
| 55 | /// Get the key (externally). | 
|---|
| 56 | const Key & getKey() const { return value.first; } | 
|---|
| 57 | Mapped & getMapped() { return value.second; } | 
|---|
| 58 | const Mapped & getMapped() const { return value.second; } | 
|---|
| 59 | const value_type & getValue() const { return value; } | 
|---|
| 60 |  | 
|---|
| 61 | /// Get the key (internally). | 
|---|
| 62 | static const Key & getKey(const value_type & value) { return value.first; } | 
|---|
| 63 |  | 
|---|
| 64 | bool keyEquals(const Key & key_) const { return value.first == key_; } | 
|---|
| 65 | bool keyEquals(const Key & key_, size_t /*hash_*/) const { return value.first == key_; } | 
|---|
| 66 | bool keyEquals(const Key & key_, size_t /*hash_*/, const State & /*state*/) const { return value.first == key_; } | 
|---|
| 67 |  | 
|---|
| 68 | void setHash(size_t /*hash_value*/) {} | 
|---|
| 69 | size_t getHash(const Hash & hash) const { return hash(value.first); } | 
|---|
| 70 |  | 
|---|
| 71 | bool isZero(const State & state) const { return isZero(value.first, state); } | 
|---|
| 72 | static bool isZero(const Key & key, const State & /*state*/) { return ZeroTraits::check(key); } | 
|---|
| 73 |  | 
|---|
| 74 | /// Set the key value to zero. | 
|---|
| 75 | void setZero() { ZeroTraits::set(value.first); } | 
|---|
| 76 |  | 
|---|
| 77 | /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table). | 
|---|
| 78 | static constexpr bool need_zero_value_storage = true; | 
|---|
| 79 |  | 
|---|
| 80 | /// Whether the cell was deleted. | 
|---|
| 81 | bool isDeleted() const { return false; } | 
|---|
| 82 |  | 
|---|
| 83 | void setMapped(const value_type & value_) { value.second = value_.second; } | 
|---|
| 84 |  | 
|---|
| 85 | /// Serialization, in binary and text form. | 
|---|
| 86 | void write(DB::WriteBuffer & wb) const | 
|---|
| 87 | { | 
|---|
| 88 | DB::writeBinary(value.first, wb); | 
|---|
| 89 | DB::writeBinary(value.second, wb); | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | void writeText(DB::WriteBuffer & wb) const | 
|---|
| 93 | { | 
|---|
| 94 | DB::writeDoubleQuoted(value.first, wb); | 
|---|
| 95 | DB::writeChar(',', wb); | 
|---|
| 96 | DB::writeDoubleQuoted(value.second, wb); | 
|---|
| 97 | } | 
|---|
| 98 |  | 
|---|
| 99 | /// Deserialization, in binary and text form. | 
|---|
| 100 | void read(DB::ReadBuffer & rb) | 
|---|
| 101 | { | 
|---|
| 102 | DB::readBinary(value.first, rb); | 
|---|
| 103 | DB::readBinary(value.second, rb); | 
|---|
| 104 | } | 
|---|
| 105 |  | 
|---|
| 106 | void readText(DB::ReadBuffer & rb) | 
|---|
| 107 | { | 
|---|
| 108 | DB::readDoubleQuoted(value.first, rb); | 
|---|
| 109 | DB::assertChar(',', rb); | 
|---|
| 110 | DB::readDoubleQuoted(value.second, rb); | 
|---|
| 111 | } | 
|---|
| 112 | }; | 
|---|
| 113 |  | 
|---|
| 114 | template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState> | 
|---|
| 115 | struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState> | 
|---|
| 116 | { | 
|---|
| 117 | using Base = HashMapCell<Key, TMapped, Hash, TState>; | 
|---|
| 118 |  | 
|---|
| 119 | size_t saved_hash; | 
|---|
| 120 |  | 
|---|
| 121 | using Base::Base; | 
|---|
| 122 |  | 
|---|
| 123 | bool keyEquals(const Key & key_) const { return this->value.first == key_; } | 
|---|
| 124 | bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; } | 
|---|
| 125 | bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); } | 
|---|
| 126 |  | 
|---|
| 127 | void setHash(size_t hash_value) { saved_hash = hash_value; } | 
|---|
| 128 | size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; } | 
|---|
| 129 | }; | 
|---|
| 130 |  | 
|---|
| 131 | template < | 
|---|
| 132 | typename Key, | 
|---|
| 133 | typename Cell, | 
|---|
| 134 | typename Hash = DefaultHash<Key>, | 
|---|
| 135 | typename Grower = HashTableGrower<>, | 
|---|
| 136 | typename Allocator = HashTableAllocator> | 
|---|
| 137 | class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator> | 
|---|
| 138 | { | 
|---|
| 139 | public: | 
|---|
| 140 | using Self = HashMapTable; | 
|---|
| 141 | using Base = HashTable<Key, Cell, Hash, Grower, Allocator>; | 
|---|
| 142 | using LookupResult = typename Base::LookupResult; | 
|---|
| 143 |  | 
|---|
| 144 | using Base::Base; | 
|---|
| 145 |  | 
|---|
| 146 | /// Merge every cell's value of current map into the destination map via emplace. | 
|---|
| 147 | ///  Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). | 
|---|
| 148 | ///  Each filled cell in current map will invoke func once. If that map doesn't | 
|---|
| 149 | ///  have a key equals to the given cell, a new cell gets emplaced into that map, | 
|---|
| 150 | ///  and func is invoked with the third argument emplaced set to true. Otherwise | 
|---|
| 151 | ///  emplaced is set to false. | 
|---|
| 152 | template <typename Func> | 
|---|
| 153 | void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) | 
|---|
| 154 | { | 
|---|
| 155 | for (auto it = this->begin(), end = this->end(); it != end; ++it) | 
|---|
| 156 | { | 
|---|
| 157 | typename Self::LookupResult res_it; | 
|---|
| 158 | bool inserted; | 
|---|
| 159 | that.emplace(Cell::getKey(it->getValue()), res_it, inserted, it.getHash()); | 
|---|
| 160 | func(res_it->getMapped(), it->getMapped(), inserted); | 
|---|
| 161 | } | 
|---|
| 162 | } | 
|---|
| 163 |  | 
|---|
| 164 | /// Merge every cell's value of current map into the destination map via find. | 
|---|
| 165 | ///  Func should have signature void(Mapped & dst, Mapped & src, bool exist). | 
|---|
| 166 | ///  Each filled cell in current map will invoke func once. If that map doesn't | 
|---|
| 167 | ///  have a key equals to the given cell, func is invoked with the third argument | 
|---|
| 168 | ///  exist set to false. Otherwise exist is set to true. | 
|---|
| 169 | template <typename Func> | 
|---|
| 170 | void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) | 
|---|
| 171 | { | 
|---|
| 172 | for (auto it = this->begin(), end = this->end(); it != end; ++it) | 
|---|
| 173 | { | 
|---|
| 174 | auto res_it = that.find(Cell::getKey(it->getValue()), it.getHash()); | 
|---|
| 175 | if (!res_it) | 
|---|
| 176 | func(it->getMapped(), it->getMapped(), false); | 
|---|
| 177 | else | 
|---|
| 178 | func(res_it->getMapped(), it->getMapped(), true); | 
|---|
| 179 | } | 
|---|
| 180 | } | 
|---|
| 181 |  | 
|---|
| 182 | /// Call func(const Key &, Mapped &) for each hash map element. | 
|---|
| 183 | template <typename Func> | 
|---|
| 184 | void forEachValue(Func && func) | 
|---|
| 185 | { | 
|---|
| 186 | for (auto & v : *this) | 
|---|
| 187 | func(v.getKey(), v.getMapped()); | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | /// Call func(Mapped &) for each hash map element. | 
|---|
| 191 | template <typename Func> | 
|---|
| 192 | void forEachMapped(Func && func) | 
|---|
| 193 | { | 
|---|
| 194 | for (auto & v : *this) | 
|---|
| 195 | func(v.getMapped()); | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x) | 
|---|
| 199 | { | 
|---|
| 200 | LookupResult it; | 
|---|
| 201 | bool inserted; | 
|---|
| 202 | this->emplace(x, it, inserted); | 
|---|
| 203 |  | 
|---|
| 204 | /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor), | 
|---|
| 205 | *  since the hash table memory is initially initialized with zeros. | 
|---|
| 206 | * But, in fact, an empty cell may not be initialized with zeros in the following cases: | 
|---|
| 207 | * - ZeroValueStorage (it only zeros the key); | 
|---|
| 208 | * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero. | 
|---|
| 209 | * | 
|---|
| 210 | * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately | 
|---|
| 211 | *  after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization. | 
|---|
| 212 | * | 
|---|
| 213 | * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`. | 
|---|
| 214 | * When we do the initialization, for new cells, it's enough to make `store 1` right away. | 
|---|
| 215 | * And if we did not initialize, then even though there was zero in the cell, | 
|---|
| 216 | *  the compiler can not guess about this, and generates the `load`, `increment`, `store` code. | 
|---|
| 217 | */ | 
|---|
| 218 | if (inserted) | 
|---|
| 219 | new (&it->getMapped()) typename Cell::Mapped(); | 
|---|
| 220 |  | 
|---|
| 221 | return it->getMapped(); | 
|---|
| 222 | } | 
|---|
| 223 | }; | 
|---|
| 224 |  | 
|---|
| 225 |  | 
|---|
| 226 | template < | 
|---|
| 227 | typename Key, | 
|---|
| 228 | typename Mapped, | 
|---|
| 229 | typename Hash = DefaultHash<Key>, | 
|---|
| 230 | typename Grower = HashTableGrower<>, | 
|---|
| 231 | typename Allocator = HashTableAllocator> | 
|---|
| 232 | using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>; | 
|---|
| 233 |  | 
|---|
| 234 |  | 
|---|
| 235 | template < | 
|---|
| 236 | typename Key, | 
|---|
| 237 | typename Mapped, | 
|---|
| 238 | typename Hash = DefaultHash<Key>, | 
|---|
| 239 | typename Grower = HashTableGrower<>, | 
|---|
| 240 | typename Allocator = HashTableAllocator> | 
|---|
| 241 | using HashMapWithSavedHash = HashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator>; | 
|---|
| 242 |  | 
|---|