| 1 | #pragma once | 
|---|
| 2 |  | 
|---|
| 3 | #include <Columns/IColumn.h> | 
|---|
| 4 | #include <Common/assert_cast.h> | 
|---|
| 5 | #include <Common/HashTable/HashTableKeyHolder.h> | 
|---|
| 6 | #include <Interpreters/AggregationCommon.h> | 
|---|
| 7 |  | 
|---|
| 8 |  | 
|---|
| 9 | namespace DB | 
|---|
| 10 | { | 
|---|
| 11 |  | 
|---|
| 12 | namespace ColumnsHashing | 
|---|
| 13 | { | 
|---|
| 14 |  | 
|---|
| 15 | /// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe. | 
|---|
| 16 | /// Is used for caching. | 
|---|
| 17 | class HashMethodContext | 
|---|
| 18 | { | 
|---|
| 19 | public: | 
|---|
| 20 | virtual ~HashMethodContext() = default; | 
|---|
| 21 |  | 
|---|
| 22 | struct Settings | 
|---|
| 23 | { | 
|---|
| 24 | size_t max_threads; | 
|---|
| 25 | }; | 
|---|
| 26 | }; | 
|---|
| 27 |  | 
|---|
| 28 | using HashMethodContextPtr = std::shared_ptr<HashMethodContext>; | 
|---|
| 29 |  | 
|---|
| 30 |  | 
|---|
| 31 | namespace columns_hashing_impl | 
|---|
| 32 | { | 
|---|
| 33 |  | 
|---|
| 34 | template <typename Value, bool consecutive_keys_optimization_> | 
|---|
| 35 | struct LastElementCache | 
|---|
| 36 | { | 
|---|
| 37 | static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_; | 
|---|
| 38 | Value value; | 
|---|
| 39 | bool empty = true; | 
|---|
| 40 | bool found = false; | 
|---|
| 41 |  | 
|---|
| 42 | bool check(const Value & value_) { return !empty && value == value_; } | 
|---|
| 43 |  | 
|---|
| 44 | template <typename Key> | 
|---|
| 45 | bool check(const Key & key) { return !empty && value.first == key; } | 
|---|
| 46 | }; | 
|---|
| 47 |  | 
|---|
| 48 | template <typename Data> | 
|---|
| 49 | struct LastElementCache<Data, false> | 
|---|
| 50 | { | 
|---|
| 51 | static constexpr bool consecutive_keys_optimization = false; | 
|---|
| 52 | }; | 
|---|
| 53 |  | 
|---|
| 54 | template <typename Mapped> | 
|---|
| 55 | class EmplaceResultImpl | 
|---|
| 56 | { | 
|---|
| 57 | Mapped & value; | 
|---|
| 58 | Mapped & cached_value; | 
|---|
| 59 | bool inserted; | 
|---|
| 60 |  | 
|---|
| 61 | public: | 
|---|
| 62 | EmplaceResultImpl(Mapped & value_, Mapped & cached_value_, bool inserted_) | 
|---|
| 63 | : value(value_), cached_value(cached_value_), inserted(inserted_) {} | 
|---|
| 64 |  | 
|---|
| 65 | bool isInserted() const { return inserted; } | 
|---|
| 66 | auto & getMapped() const { return value; } | 
|---|
| 67 |  | 
|---|
| 68 | void setMapped(const Mapped & mapped) | 
|---|
| 69 | { | 
|---|
| 70 | cached_value = mapped; | 
|---|
| 71 | value = mapped; | 
|---|
| 72 | } | 
|---|
| 73 | }; | 
|---|
| 74 |  | 
|---|
| 75 | template <> | 
|---|
| 76 | class EmplaceResultImpl<void> | 
|---|
| 77 | { | 
|---|
| 78 | bool inserted; | 
|---|
| 79 |  | 
|---|
| 80 | public: | 
|---|
| 81 | explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {} | 
|---|
| 82 | bool isInserted() const { return inserted; } | 
|---|
| 83 | }; | 
|---|
| 84 |  | 
|---|
| 85 | template <typename Mapped> | 
|---|
| 86 | class FindResultImpl | 
|---|
| 87 | { | 
|---|
| 88 | Mapped * value; | 
|---|
| 89 | bool found; | 
|---|
| 90 |  | 
|---|
| 91 | public: | 
|---|
| 92 | FindResultImpl(Mapped * value_, bool found_) : value(value_), found(found_) {} | 
|---|
| 93 | bool isFound() const { return found; } | 
|---|
| 94 | Mapped & getMapped() const { return *value; } | 
|---|
| 95 | }; | 
|---|
| 96 |  | 
|---|
| 97 | template <> | 
|---|
| 98 | class FindResultImpl<void> | 
|---|
| 99 | { | 
|---|
| 100 | bool found; | 
|---|
| 101 |  | 
|---|
| 102 | public: | 
|---|
| 103 | explicit FindResultImpl(bool found_) : found(found_) {} | 
|---|
| 104 | bool isFound() const { return found; } | 
|---|
| 105 | }; | 
|---|
| 106 |  | 
|---|
| 107 | template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization> | 
|---|
| 108 | class HashMethodBase | 
|---|
| 109 | { | 
|---|
| 110 | public: | 
|---|
| 111 | using EmplaceResult = EmplaceResultImpl<Mapped>; | 
|---|
| 112 | using FindResult = FindResultImpl<Mapped>; | 
|---|
| 113 | static constexpr bool has_mapped = !std::is_same<Mapped, void>::value; | 
|---|
| 114 | using Cache = LastElementCache<Value, consecutive_keys_optimization>; | 
|---|
| 115 |  | 
|---|
| 116 | static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } | 
|---|
| 117 |  | 
|---|
| 118 | template <typename Data> | 
|---|
| 119 | ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) | 
|---|
| 120 | { | 
|---|
| 121 | auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); | 
|---|
| 122 | return emplaceImpl(key_holder, data); | 
|---|
| 123 | } | 
|---|
| 124 |  | 
|---|
| 125 | template <typename Data> | 
|---|
| 126 | ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool) | 
|---|
| 127 | { | 
|---|
| 128 | auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); | 
|---|
| 129 | return findKeyImpl(keyHolderGetKey(key_holder), data); | 
|---|
| 130 | } | 
|---|
| 131 |  | 
|---|
| 132 | template <typename Data> | 
|---|
| 133 | ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) | 
|---|
| 134 | { | 
|---|
| 135 | auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); | 
|---|
| 136 | return data.hash(keyHolderGetKey(key_holder)); | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | protected: | 
|---|
| 140 | Cache cache; | 
|---|
| 141 |  | 
|---|
| 142 | HashMethodBase() | 
|---|
| 143 | { | 
|---|
| 144 | if constexpr (consecutive_keys_optimization) | 
|---|
| 145 | { | 
|---|
| 146 | if constexpr (has_mapped) | 
|---|
| 147 | { | 
|---|
| 148 | /// Init PairNoInit elements. | 
|---|
| 149 | cache.value.second = Mapped(); | 
|---|
| 150 | cache.value.first = {}; | 
|---|
| 151 | } | 
|---|
| 152 | else | 
|---|
| 153 | cache.value = Value(); | 
|---|
| 154 | } | 
|---|
| 155 | } | 
|---|
| 156 |  | 
|---|
| 157 | template <typename Data, typename KeyHolder> | 
|---|
| 158 | ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data) | 
|---|
| 159 | { | 
|---|
| 160 | if constexpr (Cache::consecutive_keys_optimization) | 
|---|
| 161 | { | 
|---|
| 162 | if (cache.found && cache.check(keyHolderGetKey(key_holder))) | 
|---|
| 163 | { | 
|---|
| 164 | if constexpr (has_mapped) | 
|---|
| 165 | return EmplaceResult(cache.value.second, cache.value.second, false); | 
|---|
| 166 | else | 
|---|
| 167 | return EmplaceResult(false); | 
|---|
| 168 | } | 
|---|
| 169 | } | 
|---|
| 170 |  | 
|---|
| 171 | typename Data::LookupResult it; | 
|---|
| 172 | bool inserted = false; | 
|---|
| 173 | data.emplace(key_holder, it, inserted); | 
|---|
| 174 |  | 
|---|
| 175 | [[maybe_unused]] Mapped * cached = nullptr; | 
|---|
| 176 | if constexpr (has_mapped) | 
|---|
| 177 | cached = &it->getMapped(); | 
|---|
| 178 |  | 
|---|
| 179 | if (inserted) | 
|---|
| 180 | { | 
|---|
| 181 | if constexpr (has_mapped) | 
|---|
| 182 | { | 
|---|
| 183 | new (&it->getMapped()) Mapped(); | 
|---|
| 184 | } | 
|---|
| 185 | } | 
|---|
| 186 |  | 
|---|
| 187 | if constexpr (consecutive_keys_optimization) | 
|---|
| 188 | { | 
|---|
| 189 | cache.found = true; | 
|---|
| 190 | cache.empty = false; | 
|---|
| 191 |  | 
|---|
| 192 | if constexpr (has_mapped) | 
|---|
| 193 | { | 
|---|
| 194 | cache.value.first = it->getKey(); | 
|---|
| 195 | cache.value.second = it->getMapped(); | 
|---|
| 196 | cached = &cache.value.second; | 
|---|
| 197 | } | 
|---|
| 198 | else | 
|---|
| 199 | { | 
|---|
| 200 | cache.value = it->getKey(); | 
|---|
| 201 | } | 
|---|
| 202 | } | 
|---|
| 203 |  | 
|---|
| 204 | if constexpr (has_mapped) | 
|---|
| 205 | return EmplaceResult(it->getMapped(), *cached, inserted); | 
|---|
| 206 | else | 
|---|
| 207 | return EmplaceResult(inserted); | 
|---|
| 208 | } | 
|---|
| 209 |  | 
|---|
| 210 | template <typename Data, typename Key> | 
|---|
| 211 | ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data) | 
|---|
| 212 | { | 
|---|
| 213 | if constexpr (Cache::consecutive_keys_optimization) | 
|---|
| 214 | { | 
|---|
| 215 | if (cache.check(key)) | 
|---|
| 216 | { | 
|---|
| 217 | if constexpr (has_mapped) | 
|---|
| 218 | return FindResult(&cache.value.second, cache.found); | 
|---|
| 219 | else | 
|---|
| 220 | return FindResult(cache.found); | 
|---|
| 221 | } | 
|---|
| 222 | } | 
|---|
| 223 |  | 
|---|
| 224 | auto it = data.find(key); | 
|---|
| 225 |  | 
|---|
| 226 | if constexpr (consecutive_keys_optimization) | 
|---|
| 227 | { | 
|---|
| 228 | cache.found = it != nullptr; | 
|---|
| 229 | cache.empty = false; | 
|---|
| 230 |  | 
|---|
| 231 | if constexpr (has_mapped) | 
|---|
| 232 | { | 
|---|
| 233 | cache.value.first = key; | 
|---|
| 234 | if (it) | 
|---|
| 235 | { | 
|---|
| 236 | cache.value.second = it->getMapped(); | 
|---|
| 237 | } | 
|---|
| 238 | } | 
|---|
| 239 | else | 
|---|
| 240 | { | 
|---|
| 241 | cache.value = key; | 
|---|
| 242 | } | 
|---|
| 243 | } | 
|---|
| 244 |  | 
|---|
| 245 | if constexpr (has_mapped) | 
|---|
| 246 | return FindResult(it ? &it->getMapped() : nullptr, it != nullptr); | 
|---|
| 247 | else | 
|---|
| 248 | return FindResult(it != nullptr); | 
|---|
| 249 | } | 
|---|
| 250 | }; | 
|---|
| 251 |  | 
|---|
| 252 |  | 
|---|
| 253 | template <typename T> | 
|---|
| 254 | struct MappedCache : public PaddedPODArray<T> {}; | 
|---|
| 255 |  | 
|---|
| 256 | template <> | 
|---|
| 257 | struct MappedCache<void> {}; | 
|---|
| 258 |  | 
|---|
| 259 |  | 
|---|
| 260 | /// This class is designed to provide the functionality that is required for | 
|---|
| 261 | /// supporting nullable keys in HashMethodKeysFixed. If there are | 
|---|
| 262 | /// no nullable keys, this class is merely implemented as an empty shell. | 
|---|
| 263 | template <typename Key, bool has_nullable_keys> | 
|---|
| 264 | class BaseStateKeysFixed; | 
|---|
| 265 |  | 
|---|
| 266 | /// Case where nullable keys are supported. | 
|---|
| 267 | template <typename Key> | 
|---|
| 268 | class BaseStateKeysFixed<Key, true> | 
|---|
| 269 | { | 
|---|
| 270 | protected: | 
|---|
| 271 | BaseStateKeysFixed(const ColumnRawPtrs & key_columns) | 
|---|
| 272 | { | 
|---|
| 273 | null_maps.reserve(key_columns.size()); | 
|---|
| 274 | actual_columns.reserve(key_columns.size()); | 
|---|
| 275 |  | 
|---|
| 276 | for (const auto & col : key_columns) | 
|---|
| 277 | { | 
|---|
| 278 | if (auto * nullable_col = checkAndGetColumn<ColumnNullable>(col)) | 
|---|
| 279 | { | 
|---|
| 280 | actual_columns.push_back(&nullable_col->getNestedColumn()); | 
|---|
| 281 | null_maps.push_back(&nullable_col->getNullMapColumn()); | 
|---|
| 282 | } | 
|---|
| 283 | else | 
|---|
| 284 | { | 
|---|
| 285 | actual_columns.push_back(col); | 
|---|
| 286 | null_maps.push_back(nullptr); | 
|---|
| 287 | } | 
|---|
| 288 | } | 
|---|
| 289 | } | 
|---|
| 290 |  | 
|---|
| 291 | /// Return the columns which actually contain the values of the keys. | 
|---|
| 292 | /// For a given key column, if it is nullable, we return its nested | 
|---|
| 293 | /// column. Otherwise we return the key column itself. | 
|---|
| 294 | inline const ColumnRawPtrs & getActualColumns() const | 
|---|
| 295 | { | 
|---|
| 296 | return actual_columns; | 
|---|
| 297 | } | 
|---|
| 298 |  | 
|---|
| 299 | /// Create a bitmap that indicates whether, for a particular row, | 
|---|
| 300 | /// a key column bears a null value or not. | 
|---|
| 301 | KeysNullMap<Key> createBitmap(size_t row) const | 
|---|
| 302 | { | 
|---|
| 303 | KeysNullMap<Key> bitmap{}; | 
|---|
| 304 |  | 
|---|
| 305 | for (size_t k = 0; k < null_maps.size(); ++k) | 
|---|
| 306 | { | 
|---|
| 307 | if (null_maps[k] != nullptr) | 
|---|
| 308 | { | 
|---|
| 309 | const auto & null_map = assert_cast<const ColumnUInt8 &>(*null_maps[k]).getData(); | 
|---|
| 310 | if (null_map[row] == 1) | 
|---|
| 311 | { | 
|---|
| 312 | size_t bucket = k / 8; | 
|---|
| 313 | size_t offset = k % 8; | 
|---|
| 314 | bitmap[bucket] |= UInt8(1) << offset; | 
|---|
| 315 | } | 
|---|
| 316 | } | 
|---|
| 317 | } | 
|---|
| 318 |  | 
|---|
| 319 | return bitmap; | 
|---|
| 320 | } | 
|---|
| 321 |  | 
|---|
| 322 | private: | 
|---|
| 323 | ColumnRawPtrs actual_columns; | 
|---|
| 324 | ColumnRawPtrs null_maps; | 
|---|
| 325 | }; | 
|---|
| 326 |  | 
|---|
| 327 | /// Case where nullable keys are not supported. | 
|---|
| 328 | template <typename Key> | 
|---|
| 329 | class BaseStateKeysFixed<Key, false> | 
|---|
| 330 | { | 
|---|
| 331 | protected: | 
|---|
| 332 | BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {} | 
|---|
| 333 |  | 
|---|
| 334 | const ColumnRawPtrs & getActualColumns() const { return actual_columns; } | 
|---|
| 335 |  | 
|---|
| 336 | KeysNullMap<Key> createBitmap(size_t) const | 
|---|
| 337 | { | 
|---|
| 338 | throw Exception{ "Internal error: calling createBitmap() for non-nullable keys" | 
|---|
| 339 | " is forbidden", ErrorCodes::LOGICAL_ERROR}; | 
|---|
| 340 | } | 
|---|
| 341 |  | 
|---|
| 342 | private: | 
|---|
| 343 | ColumnRawPtrs actual_columns; | 
|---|
| 344 | }; | 
|---|
| 345 |  | 
|---|
| 346 | } | 
|---|
| 347 |  | 
|---|
| 348 | } | 
|---|
| 349 |  | 
|---|
| 350 | } | 
|---|
| 351 |  | 
|---|