| 1 | #pragma once | 
|---|
| 2 |  | 
|---|
| 3 |  | 
|---|
| 4 | #include <Common/HashTable/HashTable.h> | 
|---|
| 5 | #include <Common/HashTable/HashTableKeyHolder.h> | 
|---|
| 6 | #include <Common/ColumnsHashingImpl.h> | 
|---|
| 7 | #include <Common/Arena.h> | 
|---|
| 8 | #include <Common/LRUCache.h> | 
|---|
| 9 | #include <Common/assert_cast.h> | 
|---|
| 10 | #include <common/unaligned.h> | 
|---|
| 11 |  | 
|---|
| 12 | #include <Columns/ColumnString.h> | 
|---|
| 13 | #include <Columns/ColumnFixedString.h> | 
|---|
| 14 | #include <Columns/ColumnLowCardinality.h> | 
|---|
| 15 |  | 
|---|
| 16 | #include <Core/Defines.h> | 
|---|
| 17 | #include <memory> | 
|---|
| 18 |  | 
|---|
| 19 | namespace DB | 
|---|
| 20 | { | 
|---|
| 21 |  | 
|---|
| 22 | namespace ColumnsHashing | 
|---|
| 23 | { | 
|---|
| 24 |  | 
|---|
| 25 | /// For the case when there is one numeric key. | 
|---|
| 26 | /// UInt8/16/32/64 for any type with corresponding bit width. | 
|---|
| 27 | template <typename Value, typename Mapped, typename FieldType, bool use_cache = true> | 
|---|
| 28 | struct HashMethodOneNumber | 
|---|
| 29 | : public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache>, Value, Mapped, use_cache> | 
|---|
| 30 | { | 
|---|
| 31 | using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache>; | 
|---|
| 32 | using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; | 
|---|
| 33 |  | 
|---|
| 34 | const char * vec; | 
|---|
| 35 |  | 
|---|
| 36 | /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise. | 
|---|
| 37 | HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) | 
|---|
| 38 | { | 
|---|
| 39 | vec = key_columns[0]->getRawData().data; | 
|---|
| 40 | } | 
|---|
| 41 |  | 
|---|
| 42 | HashMethodOneNumber(const IColumn * column) | 
|---|
| 43 | { | 
|---|
| 44 | vec = column->getRawData().data; | 
|---|
| 45 | } | 
|---|
| 46 |  | 
|---|
| 47 | /// Creates context. Method is called once and result context is used in all threads. | 
|---|
| 48 | using Base::createContext; /// (const HashMethodContext::Settings &) -> HashMethodContextPtr | 
|---|
| 49 |  | 
|---|
| 50 | /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr. | 
|---|
| 51 | /// Data is a HashTable where to insert key from column's row. | 
|---|
| 52 | /// For Serialized method, key may be placed in pool. | 
|---|
| 53 | using Base::emplaceKey; /// (Data & data, size_t row, Arena & pool) -> EmplaceResult | 
|---|
| 54 |  | 
|---|
| 55 | /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr. | 
|---|
| 56 | using Base::findKey;  /// (Data & data, size_t row, Arena & pool) -> FindResult | 
|---|
| 57 |  | 
|---|
| 58 | /// Get hash value of row. | 
|---|
| 59 | using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t | 
|---|
| 60 |  | 
|---|
| 61 | /// Is used for default implementation in HashMethodBase. | 
|---|
| 62 | FieldType getKeyHolder(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); } | 
|---|
| 63 | }; | 
|---|
| 64 |  | 
|---|
| 65 |  | 
|---|
| 66 | /// For the case when there is one string key. | 
|---|
| 67 | template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true> | 
|---|
| 68 | struct HashMethodString | 
|---|
| 69 | : public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache> | 
|---|
| 70 | { | 
|---|
| 71 | using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>; | 
|---|
| 72 | using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; | 
|---|
| 73 |  | 
|---|
| 74 | const IColumn::Offset * offsets; | 
|---|
| 75 | const UInt8 * chars; | 
|---|
| 76 |  | 
|---|
| 77 | HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) | 
|---|
| 78 | { | 
|---|
| 79 | const IColumn & column = *key_columns[0]; | 
|---|
| 80 | const ColumnString & column_string = assert_cast<const ColumnString &>(column); | 
|---|
| 81 | offsets = column_string.getOffsets().data(); | 
|---|
| 82 | chars = column_string.getChars().data(); | 
|---|
| 83 | } | 
|---|
| 84 |  | 
|---|
| 85 | auto getKeyHolder(ssize_t row, [[maybe_unused]] Arena & pool) const | 
|---|
| 86 | { | 
|---|
| 87 | StringRef key(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1); | 
|---|
| 88 |  | 
|---|
| 89 | if constexpr (place_string_to_arena) | 
|---|
| 90 | { | 
|---|
| 91 | return ArenaKeyHolder{key, pool}; | 
|---|
| 92 | } | 
|---|
| 93 | else | 
|---|
| 94 | { | 
|---|
| 95 | return key; | 
|---|
| 96 | } | 
|---|
| 97 | } | 
|---|
| 98 |  | 
|---|
| 99 | protected: | 
|---|
| 100 | friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; | 
|---|
| 101 | }; | 
|---|
| 102 |  | 
|---|
| 103 |  | 
|---|
| 104 | /// For the case when there is one fixed-length string key. | 
|---|
| 105 | template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true> | 
|---|
| 106 | struct HashMethodFixedString | 
|---|
| 107 | : public columns_hashing_impl::HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache> | 
|---|
| 108 | { | 
|---|
| 109 | using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>; | 
|---|
| 110 | using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; | 
|---|
| 111 |  | 
|---|
| 112 | size_t n; | 
|---|
| 113 | const ColumnFixedString::Chars * chars; | 
|---|
| 114 |  | 
|---|
| 115 | HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) | 
|---|
| 116 | { | 
|---|
| 117 | const IColumn & column = *key_columns[0]; | 
|---|
| 118 | const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(column); | 
|---|
| 119 | n = column_string.getN(); | 
|---|
| 120 | chars = &column_string.getChars(); | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | auto getKeyHolder(size_t row, [[maybe_unused]] Arena & pool) const | 
|---|
| 124 | { | 
|---|
| 125 | StringRef key(&(*chars)[row * n], n); | 
|---|
| 126 |  | 
|---|
| 127 | if constexpr (place_string_to_arena) | 
|---|
| 128 | { | 
|---|
| 129 | return ArenaKeyHolder{key, pool}; | 
|---|
| 130 | } | 
|---|
| 131 | else | 
|---|
| 132 | { | 
|---|
| 133 | return key; | 
|---|
| 134 | } | 
|---|
| 135 | } | 
|---|
| 136 |  | 
|---|
| 137 | protected: | 
|---|
| 138 | friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; | 
|---|
| 139 | }; | 
|---|
| 140 |  | 
|---|
| 141 |  | 
|---|
| 142 | /// Cache stores dictionaries and saved_hash per dictionary key. | 
|---|
| 143 | class LowCardinalityDictionaryCache : public HashMethodContext | 
|---|
| 144 | { | 
|---|
| 145 | public: | 
|---|
| 146 | /// Will assume that dictionaries with same hash has the same keys. | 
|---|
| 147 | /// Just in case, check that they have also the same size. | 
|---|
| 148 | struct DictionaryKey | 
|---|
| 149 | { | 
|---|
| 150 | UInt128 hash; | 
|---|
| 151 | UInt64 size; | 
|---|
| 152 |  | 
|---|
| 153 | bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; } | 
|---|
| 154 | }; | 
|---|
| 155 |  | 
|---|
| 156 | struct DictionaryKeyHash | 
|---|
| 157 | { | 
|---|
| 158 | size_t operator()(const DictionaryKey & key) const | 
|---|
| 159 | { | 
|---|
| 160 | SipHash hash; | 
|---|
| 161 | hash.update(key.hash.low); | 
|---|
| 162 | hash.update(key.hash.high); | 
|---|
| 163 | hash.update(key.size); | 
|---|
| 164 | return hash.get64(); | 
|---|
| 165 | } | 
|---|
| 166 | }; | 
|---|
| 167 |  | 
|---|
| 168 | struct CachedValues | 
|---|
| 169 | { | 
|---|
| 170 | /// Store ptr to dictionary to be sure it won't be deleted. | 
|---|
| 171 | ColumnPtr dictionary_holder; | 
|---|
| 172 | /// Hashes for dictionary keys. | 
|---|
| 173 | const UInt64 * saved_hash = nullptr; | 
|---|
| 174 | }; | 
|---|
| 175 |  | 
|---|
| 176 | using CachedValuesPtr = std::shared_ptr<CachedValues>; | 
|---|
| 177 |  | 
|---|
| 178 | explicit LowCardinalityDictionaryCache(const HashMethodContext::Settings & settings) : cache(settings.max_threads) {} | 
|---|
| 179 |  | 
|---|
| 180 | CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); } | 
|---|
| 181 | void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); } | 
|---|
| 182 |  | 
|---|
| 183 | private: | 
|---|
| 184 | using Cache = LRUCache<DictionaryKey, CachedValues, DictionaryKeyHash>; | 
|---|
| 185 | Cache cache; | 
|---|
| 186 | }; | 
|---|
| 187 |  | 
|---|
| 188 |  | 
|---|
| 189 | /// Single low cardinality column. | 
|---|
| 190 | template <typename SingleColumnMethod, typename Mapped, bool use_cache> | 
|---|
| 191 | struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod | 
|---|
| 192 | { | 
|---|
| 193 | using Base = SingleColumnMethod; | 
|---|
| 194 |  | 
|---|
| 195 | enum class VisitValue | 
|---|
| 196 | { | 
|---|
| 197 | Empty = 0, | 
|---|
| 198 | Found = 1, | 
|---|
| 199 | NotFound = 2, | 
|---|
| 200 | }; | 
|---|
| 201 |  | 
|---|
| 202 | static constexpr bool has_mapped = !std::is_same<Mapped, void>::value; | 
|---|
| 203 | using EmplaceResult = columns_hashing_impl::EmplaceResultImpl<Mapped>; | 
|---|
| 204 | using FindResult = columns_hashing_impl::FindResultImpl<Mapped>; | 
|---|
| 205 |  | 
|---|
| 206 | static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings) | 
|---|
| 207 | { | 
|---|
| 208 | return std::make_shared<LowCardinalityDictionaryCache>(settings); | 
|---|
| 209 | } | 
|---|
| 210 |  | 
|---|
| 211 | ColumnRawPtrs key_columns; | 
|---|
| 212 | const IColumn * positions = nullptr; | 
|---|
| 213 | size_t size_of_index_type = 0; | 
|---|
| 214 |  | 
|---|
| 215 | /// saved hash is from current column or from cache. | 
|---|
| 216 | const UInt64 * saved_hash = nullptr; | 
|---|
| 217 | /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted. | 
|---|
| 218 | ColumnPtr dictionary_holder; | 
|---|
| 219 |  | 
|---|
| 220 | /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages. | 
|---|
| 221 | columns_hashing_impl::MappedCache<Mapped> mapped_cache; | 
|---|
| 222 | PaddedPODArray<VisitValue> visit_cache; | 
|---|
| 223 |  | 
|---|
| 224 | /// If initialized column is nullable. | 
|---|
| 225 | bool is_nullable = false; | 
|---|
| 226 |  | 
|---|
| 227 | static const ColumnLowCardinality & getLowCardinalityColumn(const IColumn * low_cardinality_column) | 
|---|
| 228 | { | 
|---|
| 229 | auto column = typeid_cast<const ColumnLowCardinality *>(low_cardinality_column); | 
|---|
| 230 | if (!column) | 
|---|
| 231 | throw Exception( "Invalid aggregation key type for HashMethodSingleLowCardinalityColumn method. " | 
|---|
| 232 | "Excepted LowCardinality, got "+ column->getName(), ErrorCodes::LOGICAL_ERROR); | 
|---|
| 233 | return *column; | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | HashMethodSingleLowCardinalityColumn( | 
|---|
| 237 | const ColumnRawPtrs & key_columns_low_cardinality, const Sizes & key_sizes, const HashMethodContextPtr & context) | 
|---|
| 238 | : Base({getLowCardinalityColumn(key_columns_low_cardinality[0]).getDictionary().getNestedNotNullableColumn().get()}, key_sizes, context) | 
|---|
| 239 | { | 
|---|
| 240 | auto column = &getLowCardinalityColumn(key_columns_low_cardinality[0]); | 
|---|
| 241 |  | 
|---|
| 242 | if (!context) | 
|---|
| 243 | throw Exception( "Cache wasn't created for HashMethodSingleLowCardinalityColumn", | 
|---|
| 244 | ErrorCodes::LOGICAL_ERROR); | 
|---|
| 245 |  | 
|---|
| 246 | LowCardinalityDictionaryCache * lcd_cache; | 
|---|
| 247 | if constexpr (use_cache) | 
|---|
| 248 | { | 
|---|
| 249 | lcd_cache = typeid_cast<LowCardinalityDictionaryCache *>(context.get()); | 
|---|
| 250 | if (!lcd_cache) | 
|---|
| 251 | { | 
|---|
| 252 | const auto & cached_val = *context; | 
|---|
| 253 | throw Exception( "Invalid type for HashMethodSingleLowCardinalityColumn cache: " | 
|---|
| 254 | + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR); | 
|---|
| 255 | } | 
|---|
| 256 | } | 
|---|
| 257 |  | 
|---|
| 258 | auto * dict = column->getDictionary().getNestedNotNullableColumn().get(); | 
|---|
| 259 | is_nullable = column->getDictionary().nestedColumnIsNullable(); | 
|---|
| 260 | key_columns = {dict}; | 
|---|
| 261 | bool is_shared_dict = column->isSharedDictionary(); | 
|---|
| 262 |  | 
|---|
| 263 | typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key; | 
|---|
| 264 | typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values; | 
|---|
| 265 |  | 
|---|
| 266 | if (is_shared_dict) | 
|---|
| 267 | { | 
|---|
| 268 | dictionary_key = {column->getDictionary().getHash(), dict->size()}; | 
|---|
| 269 | if constexpr (use_cache) | 
|---|
| 270 | cached_values = lcd_cache->get(dictionary_key); | 
|---|
| 271 | } | 
|---|
| 272 |  | 
|---|
| 273 | if (cached_values) | 
|---|
| 274 | { | 
|---|
| 275 | saved_hash = cached_values->saved_hash; | 
|---|
| 276 | dictionary_holder = cached_values->dictionary_holder; | 
|---|
| 277 | } | 
|---|
| 278 | else | 
|---|
| 279 | { | 
|---|
| 280 | saved_hash = column->getDictionary().tryGetSavedHash(); | 
|---|
| 281 | dictionary_holder = column->getDictionaryPtr(); | 
|---|
| 282 |  | 
|---|
| 283 | if constexpr (use_cache) | 
|---|
| 284 | { | 
|---|
| 285 | if (is_shared_dict) | 
|---|
| 286 | { | 
|---|
| 287 | cached_values = std::make_shared<typename LowCardinalityDictionaryCache::CachedValues>(); | 
|---|
| 288 | cached_values->saved_hash = saved_hash; | 
|---|
| 289 | cached_values->dictionary_holder = dictionary_holder; | 
|---|
| 290 |  | 
|---|
| 291 | lcd_cache->set(dictionary_key, cached_values); | 
|---|
| 292 | } | 
|---|
| 293 | } | 
|---|
| 294 | } | 
|---|
| 295 |  | 
|---|
| 296 | if constexpr (has_mapped) | 
|---|
| 297 | mapped_cache.resize(key_columns[0]->size()); | 
|---|
| 298 |  | 
|---|
| 299 | VisitValue empty(VisitValue::Empty); | 
|---|
| 300 | visit_cache.assign(key_columns[0]->size(), empty); | 
|---|
| 301 |  | 
|---|
| 302 | size_of_index_type = column->getSizeOfIndexType(); | 
|---|
| 303 | positions = column->getIndexesPtr().get(); | 
|---|
| 304 | } | 
|---|
| 305 |  | 
|---|
| 306 | ALWAYS_INLINE size_t getIndexAt(size_t row) const | 
|---|
| 307 | { | 
|---|
| 308 | switch (size_of_index_type) | 
|---|
| 309 | { | 
|---|
| 310 | case sizeof(UInt8): return assert_cast<const ColumnUInt8 *>(positions)->getElement(row); | 
|---|
| 311 | case sizeof(UInt16): return assert_cast<const ColumnUInt16 *>(positions)->getElement(row); | 
|---|
| 312 | case sizeof(UInt32): return assert_cast<const ColumnUInt32 *>(positions)->getElement(row); | 
|---|
| 313 | case sizeof(UInt64): return assert_cast<const ColumnUInt64 *>(positions)->getElement(row); | 
|---|
| 314 | default: throw Exception( "Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR); | 
|---|
| 315 | } | 
|---|
| 316 | } | 
|---|
| 317 |  | 
|---|
| 318 | /// Get the key holder from the key columns for insertion into the hash table. | 
|---|
| 319 | ALWAYS_INLINE auto getKeyHolder(size_t row, Arena & pool) const | 
|---|
| 320 | { | 
|---|
| 321 | return Base::getKeyHolder(getIndexAt(row), pool); | 
|---|
| 322 | } | 
|---|
| 323 |  | 
|---|
| 324 | template <typename Data> | 
|---|
| 325 | ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row_, Arena & pool) | 
|---|
| 326 | { | 
|---|
| 327 | size_t row = getIndexAt(row_); | 
|---|
| 328 |  | 
|---|
| 329 | if (is_nullable && row == 0) | 
|---|
| 330 | { | 
|---|
| 331 | visit_cache[row] = VisitValue::Found; | 
|---|
| 332 | bool has_null_key = data.hasNullKeyData(); | 
|---|
| 333 | data.hasNullKeyData() = true; | 
|---|
| 334 |  | 
|---|
| 335 | if constexpr (has_mapped) | 
|---|
| 336 | return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !has_null_key); | 
|---|
| 337 | else | 
|---|
| 338 | return EmplaceResult(!has_null_key); | 
|---|
| 339 | } | 
|---|
| 340 |  | 
|---|
| 341 | if (visit_cache[row] == VisitValue::Found) | 
|---|
| 342 | { | 
|---|
| 343 | if constexpr (has_mapped) | 
|---|
| 344 | return EmplaceResult(mapped_cache[row], mapped_cache[row], false); | 
|---|
| 345 | else | 
|---|
| 346 | return EmplaceResult(false); | 
|---|
| 347 | } | 
|---|
| 348 |  | 
|---|
| 349 | auto key_holder = getKeyHolder(row_, pool); | 
|---|
| 350 |  | 
|---|
| 351 | bool inserted = false; | 
|---|
| 352 | typename Data::LookupResult it; | 
|---|
| 353 | if (saved_hash) | 
|---|
| 354 | data.emplace(key_holder, it, inserted, saved_hash[row]); | 
|---|
| 355 | else | 
|---|
| 356 | data.emplace(key_holder, it, inserted); | 
|---|
| 357 |  | 
|---|
| 358 | visit_cache[row] = VisitValue::Found; | 
|---|
| 359 |  | 
|---|
| 360 | if constexpr (has_mapped) | 
|---|
| 361 | { | 
|---|
| 362 | auto & mapped = it->getMapped(); | 
|---|
| 363 | if (inserted) | 
|---|
| 364 | { | 
|---|
| 365 | new (&mapped) Mapped(); | 
|---|
| 366 | } | 
|---|
| 367 | mapped_cache[row] = mapped; | 
|---|
| 368 | return EmplaceResult(mapped, mapped_cache[row], inserted); | 
|---|
| 369 | } | 
|---|
| 370 | else | 
|---|
| 371 | return EmplaceResult(inserted); | 
|---|
| 372 | } | 
|---|
| 373 |  | 
|---|
| 374 | ALWAYS_INLINE bool isNullAt(size_t i) | 
|---|
| 375 | { | 
|---|
| 376 | if (!is_nullable) | 
|---|
| 377 | return false; | 
|---|
| 378 |  | 
|---|
| 379 | return getIndexAt(i) == 0; | 
|---|
| 380 | } | 
|---|
| 381 |  | 
|---|
| 382 | template <typename Data> | 
|---|
| 383 | ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool) | 
|---|
| 384 | { | 
|---|
| 385 | size_t row = getIndexAt(row_); | 
|---|
| 386 |  | 
|---|
| 387 | if (is_nullable && row == 0) | 
|---|
| 388 | { | 
|---|
| 389 | if constexpr (has_mapped) | 
|---|
| 390 | return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData()); | 
|---|
| 391 | else | 
|---|
| 392 | return FindResult(data.hasNullKeyData()); | 
|---|
| 393 | } | 
|---|
| 394 |  | 
|---|
| 395 | if (visit_cache[row] != VisitValue::Empty) | 
|---|
| 396 | { | 
|---|
| 397 | if constexpr (has_mapped) | 
|---|
| 398 | return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found); | 
|---|
| 399 | else | 
|---|
| 400 | return FindResult(visit_cache[row] == VisitValue::Found); | 
|---|
| 401 | } | 
|---|
| 402 |  | 
|---|
| 403 | auto key_holder = getKeyHolder(row_, pool); | 
|---|
| 404 |  | 
|---|
| 405 | typename Data::iterator it; | 
|---|
| 406 | if (saved_hash) | 
|---|
| 407 | it = data.find(*key_holder, saved_hash[row]); | 
|---|
| 408 | else | 
|---|
| 409 | it = data.find(*key_holder); | 
|---|
| 410 |  | 
|---|
| 411 | bool found = it != data.end(); | 
|---|
| 412 | visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound; | 
|---|
| 413 |  | 
|---|
| 414 | if constexpr (has_mapped) | 
|---|
| 415 | { | 
|---|
| 416 | if (found) | 
|---|
| 417 | mapped_cache[row] = it->second; | 
|---|
| 418 | } | 
|---|
| 419 |  | 
|---|
| 420 | if constexpr (has_mapped) | 
|---|
| 421 | return FindResult(&mapped_cache[row], found); | 
|---|
| 422 | else | 
|---|
| 423 | return FindResult(found); | 
|---|
| 424 | } | 
|---|
| 425 |  | 
|---|
| 426 | template <typename Data> | 
|---|
| 427 | ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) | 
|---|
| 428 | { | 
|---|
| 429 | row = getIndexAt(row); | 
|---|
| 430 | if (saved_hash) | 
|---|
| 431 | return saved_hash[row]; | 
|---|
| 432 |  | 
|---|
| 433 | return Base::getHash(data, row, pool); | 
|---|
| 434 | } | 
|---|
| 435 | }; | 
|---|
| 436 |  | 
|---|
| 437 |  | 
|---|
| 438 | // Optional mask for low cardinality columns. | 
|---|
| 439 | template <bool has_low_cardinality> | 
|---|
| 440 | struct LowCardinalityKeys | 
|---|
| 441 | { | 
|---|
| 442 | ColumnRawPtrs nested_columns; | 
|---|
| 443 | ColumnRawPtrs positions; | 
|---|
| 444 | Sizes position_sizes; | 
|---|
| 445 | }; | 
|---|
| 446 |  | 
|---|
| 447 | template <> | 
|---|
| 448 | struct LowCardinalityKeys<false> {}; | 
|---|
| 449 |  | 
|---|
| 450 | /// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits. | 
|---|
| 451 | template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true> | 
|---|
| 452 | struct HashMethodKeysFixed | 
|---|
| 453 | : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_> | 
|---|
| 454 | , public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>, Value, Mapped, use_cache> | 
|---|
| 455 | { | 
|---|
| 456 | using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>; | 
|---|
| 457 | using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; | 
|---|
| 458 | using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>; | 
|---|
| 459 |  | 
|---|
| 460 | static constexpr bool has_nullable_keys = has_nullable_keys_; | 
|---|
| 461 | static constexpr bool has_low_cardinality = has_low_cardinality_; | 
|---|
| 462 |  | 
|---|
| 463 | LowCardinalityKeys<has_low_cardinality> low_cardinality_keys; | 
|---|
| 464 | Sizes key_sizes; | 
|---|
| 465 | size_t keys_size; | 
|---|
| 466 |  | 
|---|
| 467 | HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const HashMethodContextPtr &) | 
|---|
| 468 | : Base(key_columns), key_sizes(std::move(key_sizes_)), keys_size(key_columns.size()) | 
|---|
| 469 | { | 
|---|
| 470 | if constexpr (has_low_cardinality) | 
|---|
| 471 | { | 
|---|
| 472 | low_cardinality_keys.nested_columns.resize(key_columns.size()); | 
|---|
| 473 | low_cardinality_keys.positions.assign(key_columns.size(), nullptr); | 
|---|
| 474 | low_cardinality_keys.position_sizes.resize(key_columns.size()); | 
|---|
| 475 | for (size_t i = 0; i < key_columns.size(); ++i) | 
|---|
| 476 | { | 
|---|
| 477 | if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i])) | 
|---|
| 478 | { | 
|---|
| 479 | low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get(); | 
|---|
| 480 | low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes(); | 
|---|
| 481 | low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType(); | 
|---|
| 482 | } | 
|---|
| 483 | else | 
|---|
| 484 | low_cardinality_keys.nested_columns[i] = key_columns[i]; | 
|---|
| 485 | } | 
|---|
| 486 | } | 
|---|
| 487 | } | 
|---|
| 488 |  | 
|---|
| 489 | ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const | 
|---|
| 490 | { | 
|---|
| 491 | if constexpr (has_nullable_keys) | 
|---|
| 492 | { | 
|---|
| 493 | auto bitmap = Base::createBitmap(row); | 
|---|
| 494 | return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes, bitmap); | 
|---|
| 495 | } | 
|---|
| 496 | else | 
|---|
| 497 | { | 
|---|
| 498 | if constexpr (has_low_cardinality) | 
|---|
| 499 | return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes, | 
|---|
| 500 | &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes); | 
|---|
| 501 |  | 
|---|
| 502 | return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes); | 
|---|
| 503 | } | 
|---|
| 504 | } | 
|---|
| 505 | }; | 
|---|
| 506 |  | 
|---|
| 507 | /** Hash by concatenating serialized key values. | 
|---|
| 508 | * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts. | 
|---|
| 509 | * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes. | 
|---|
| 510 | * Therefore, when aggregating by several strings, there is no ambiguity. | 
|---|
| 511 | */ | 
|---|
| 512 | template <typename Value, typename Mapped> | 
|---|
| 513 | struct HashMethodSerialized | 
|---|
| 514 | : public columns_hashing_impl::HashMethodBase<HashMethodSerialized<Value, Mapped>, Value, Mapped, false> | 
|---|
| 515 | { | 
|---|
| 516 | using Self = HashMethodSerialized<Value, Mapped>; | 
|---|
| 517 | using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; | 
|---|
| 518 |  | 
|---|
| 519 | ColumnRawPtrs key_columns; | 
|---|
| 520 | size_t keys_size; | 
|---|
| 521 |  | 
|---|
| 522 | HashMethodSerialized(const ColumnRawPtrs & key_columns_, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) | 
|---|
| 523 | : key_columns(key_columns_), keys_size(key_columns_.size()) {} | 
|---|
| 524 |  | 
|---|
| 525 | protected: | 
|---|
| 526 | friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; | 
|---|
| 527 |  | 
|---|
| 528 | ALWAYS_INLINE SerializedKeyHolder getKeyHolder(size_t row, Arena & pool) const | 
|---|
| 529 | { | 
|---|
| 530 | return SerializedKeyHolder{ | 
|---|
| 531 | serializeKeysToPoolContiguous(row, keys_size, key_columns, pool), | 
|---|
| 532 | pool}; | 
|---|
| 533 | } | 
|---|
| 534 | }; | 
|---|
| 535 |  | 
|---|
| 536 | /// For the case when there is one string key. | 
|---|
| 537 | template <typename Value, typename Mapped, bool use_cache = true> | 
|---|
| 538 | struct HashMethodHashed | 
|---|
| 539 | : public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache>, Value, Mapped, use_cache> | 
|---|
| 540 | { | 
|---|
| 541 | using Key = UInt128; | 
|---|
| 542 | using Self = HashMethodHashed<Value, Mapped, use_cache>; | 
|---|
| 543 | using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; | 
|---|
| 544 |  | 
|---|
| 545 | ColumnRawPtrs key_columns; | 
|---|
| 546 |  | 
|---|
| 547 | HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const HashMethodContextPtr &) | 
|---|
| 548 | : key_columns(std::move(key_columns_)) {} | 
|---|
| 549 |  | 
|---|
| 550 | ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const | 
|---|
| 551 | { | 
|---|
| 552 | return hash128(row, key_columns.size(), key_columns); | 
|---|
| 553 | } | 
|---|
| 554 | }; | 
|---|
| 555 |  | 
|---|
| 556 | } | 
|---|
| 557 | } | 
|---|
| 558 |  | 
|---|