| 1 | #pragma once |
| 2 | |
| 3 | |
| 4 | #include <Common/HashTable/HashTable.h> |
| 5 | #include <Common/HashTable/HashTableKeyHolder.h> |
| 6 | #include <Common/ColumnsHashingImpl.h> |
| 7 | #include <Common/Arena.h> |
| 8 | #include <Common/LRUCache.h> |
| 9 | #include <Common/assert_cast.h> |
| 10 | #include <common/unaligned.h> |
| 11 | |
| 12 | #include <Columns/ColumnString.h> |
| 13 | #include <Columns/ColumnFixedString.h> |
| 14 | #include <Columns/ColumnLowCardinality.h> |
| 15 | |
| 16 | #include <Core/Defines.h> |
| 17 | #include <memory> |
| 18 | |
| 19 | namespace DB |
| 20 | { |
| 21 | |
| 22 | namespace ColumnsHashing |
| 23 | { |
| 24 | |
| 25 | /// For the case when there is one numeric key. |
| 26 | /// UInt8/16/32/64 for any type with corresponding bit width. |
| 27 | template <typename Value, typename Mapped, typename FieldType, bool use_cache = true> |
| 28 | struct HashMethodOneNumber |
| 29 | : public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache>, Value, Mapped, use_cache> |
| 30 | { |
| 31 | using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache>; |
| 32 | using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; |
| 33 | |
| 34 | const char * vec; |
| 35 | |
| 36 | /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise. |
| 37 | HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) |
| 38 | { |
| 39 | vec = key_columns[0]->getRawData().data; |
| 40 | } |
| 41 | |
| 42 | HashMethodOneNumber(const IColumn * column) |
| 43 | { |
| 44 | vec = column->getRawData().data; |
| 45 | } |
| 46 | |
| 47 | /// Creates context. Method is called once and result context is used in all threads. |
| 48 | using Base::createContext; /// (const HashMethodContext::Settings &) -> HashMethodContextPtr |
| 49 | |
| 50 | /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr. |
| 51 | /// Data is a HashTable where to insert key from column's row. |
| 52 | /// For Serialized method, key may be placed in pool. |
| 53 | using Base::emplaceKey; /// (Data & data, size_t row, Arena & pool) -> EmplaceResult |
| 54 | |
| 55 | /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr. |
| 56 | using Base::findKey; /// (Data & data, size_t row, Arena & pool) -> FindResult |
| 57 | |
| 58 | /// Get hash value of row. |
| 59 | using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t |
| 60 | |
| 61 | /// Is used for default implementation in HashMethodBase. |
| 62 | FieldType getKeyHolder(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); } |
| 63 | }; |
| 64 | |
| 65 | |
| 66 | /// For the case when there is one string key. |
| 67 | template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true> |
| 68 | struct HashMethodString |
| 69 | : public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache> |
| 70 | { |
| 71 | using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>; |
| 72 | using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; |
| 73 | |
| 74 | const IColumn::Offset * offsets; |
| 75 | const UInt8 * chars; |
| 76 | |
| 77 | HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) |
| 78 | { |
| 79 | const IColumn & column = *key_columns[0]; |
| 80 | const ColumnString & column_string = assert_cast<const ColumnString &>(column); |
| 81 | offsets = column_string.getOffsets().data(); |
| 82 | chars = column_string.getChars().data(); |
| 83 | } |
| 84 | |
| 85 | auto getKeyHolder(ssize_t row, [[maybe_unused]] Arena & pool) const |
| 86 | { |
| 87 | StringRef key(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1); |
| 88 | |
| 89 | if constexpr (place_string_to_arena) |
| 90 | { |
| 91 | return ArenaKeyHolder{key, pool}; |
| 92 | } |
| 93 | else |
| 94 | { |
| 95 | return key; |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | protected: |
| 100 | friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; |
| 101 | }; |
| 102 | |
| 103 | |
| 104 | /// For the case when there is one fixed-length string key. |
| 105 | template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true> |
| 106 | struct HashMethodFixedString |
| 107 | : public columns_hashing_impl::HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache> |
| 108 | { |
| 109 | using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>; |
| 110 | using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; |
| 111 | |
| 112 | size_t n; |
| 113 | const ColumnFixedString::Chars * chars; |
| 114 | |
| 115 | HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) |
| 116 | { |
| 117 | const IColumn & column = *key_columns[0]; |
| 118 | const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(column); |
| 119 | n = column_string.getN(); |
| 120 | chars = &column_string.getChars(); |
| 121 | } |
| 122 | |
| 123 | auto getKeyHolder(size_t row, [[maybe_unused]] Arena & pool) const |
| 124 | { |
| 125 | StringRef key(&(*chars)[row * n], n); |
| 126 | |
| 127 | if constexpr (place_string_to_arena) |
| 128 | { |
| 129 | return ArenaKeyHolder{key, pool}; |
| 130 | } |
| 131 | else |
| 132 | { |
| 133 | return key; |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | protected: |
| 138 | friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; |
| 139 | }; |
| 140 | |
| 141 | |
| 142 | /// Cache stores dictionaries and saved_hash per dictionary key. |
| 143 | class LowCardinalityDictionaryCache : public HashMethodContext |
| 144 | { |
| 145 | public: |
| 146 | /// Will assume that dictionaries with same hash has the same keys. |
| 147 | /// Just in case, check that they have also the same size. |
| 148 | struct DictionaryKey |
| 149 | { |
| 150 | UInt128 hash; |
| 151 | UInt64 size; |
| 152 | |
| 153 | bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; } |
| 154 | }; |
| 155 | |
| 156 | struct DictionaryKeyHash |
| 157 | { |
| 158 | size_t operator()(const DictionaryKey & key) const |
| 159 | { |
| 160 | SipHash hash; |
| 161 | hash.update(key.hash.low); |
| 162 | hash.update(key.hash.high); |
| 163 | hash.update(key.size); |
| 164 | return hash.get64(); |
| 165 | } |
| 166 | }; |
| 167 | |
| 168 | struct CachedValues |
| 169 | { |
| 170 | /// Store ptr to dictionary to be sure it won't be deleted. |
| 171 | ColumnPtr dictionary_holder; |
| 172 | /// Hashes for dictionary keys. |
| 173 | const UInt64 * saved_hash = nullptr; |
| 174 | }; |
| 175 | |
| 176 | using CachedValuesPtr = std::shared_ptr<CachedValues>; |
| 177 | |
| 178 | explicit LowCardinalityDictionaryCache(const HashMethodContext::Settings & settings) : cache(settings.max_threads) {} |
| 179 | |
| 180 | CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); } |
| 181 | void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); } |
| 182 | |
| 183 | private: |
| 184 | using Cache = LRUCache<DictionaryKey, CachedValues, DictionaryKeyHash>; |
| 185 | Cache cache; |
| 186 | }; |
| 187 | |
| 188 | |
| 189 | /// Single low cardinality column. |
| 190 | template <typename SingleColumnMethod, typename Mapped, bool use_cache> |
| 191 | struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod |
| 192 | { |
| 193 | using Base = SingleColumnMethod; |
| 194 | |
| 195 | enum class VisitValue |
| 196 | { |
| 197 | Empty = 0, |
| 198 | Found = 1, |
| 199 | NotFound = 2, |
| 200 | }; |
| 201 | |
| 202 | static constexpr bool has_mapped = !std::is_same<Mapped, void>::value; |
| 203 | using EmplaceResult = columns_hashing_impl::EmplaceResultImpl<Mapped>; |
| 204 | using FindResult = columns_hashing_impl::FindResultImpl<Mapped>; |
| 205 | |
| 206 | static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings) |
| 207 | { |
| 208 | return std::make_shared<LowCardinalityDictionaryCache>(settings); |
| 209 | } |
| 210 | |
| 211 | ColumnRawPtrs key_columns; |
| 212 | const IColumn * positions = nullptr; |
| 213 | size_t size_of_index_type = 0; |
| 214 | |
| 215 | /// saved hash is from current column or from cache. |
| 216 | const UInt64 * saved_hash = nullptr; |
| 217 | /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted. |
| 218 | ColumnPtr dictionary_holder; |
| 219 | |
| 220 | /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages. |
| 221 | columns_hashing_impl::MappedCache<Mapped> mapped_cache; |
| 222 | PaddedPODArray<VisitValue> visit_cache; |
| 223 | |
| 224 | /// If initialized column is nullable. |
| 225 | bool is_nullable = false; |
| 226 | |
| 227 | static const ColumnLowCardinality & getLowCardinalityColumn(const IColumn * low_cardinality_column) |
| 228 | { |
| 229 | auto column = typeid_cast<const ColumnLowCardinality *>(low_cardinality_column); |
| 230 | if (!column) |
| 231 | throw Exception("Invalid aggregation key type for HashMethodSingleLowCardinalityColumn method. " |
| 232 | "Excepted LowCardinality, got " + column->getName(), ErrorCodes::LOGICAL_ERROR); |
| 233 | return *column; |
| 234 | } |
| 235 | |
| 236 | HashMethodSingleLowCardinalityColumn( |
| 237 | const ColumnRawPtrs & key_columns_low_cardinality, const Sizes & key_sizes, const HashMethodContextPtr & context) |
| 238 | : Base({getLowCardinalityColumn(key_columns_low_cardinality[0]).getDictionary().getNestedNotNullableColumn().get()}, key_sizes, context) |
| 239 | { |
| 240 | auto column = &getLowCardinalityColumn(key_columns_low_cardinality[0]); |
| 241 | |
| 242 | if (!context) |
| 243 | throw Exception("Cache wasn't created for HashMethodSingleLowCardinalityColumn" , |
| 244 | ErrorCodes::LOGICAL_ERROR); |
| 245 | |
| 246 | LowCardinalityDictionaryCache * lcd_cache; |
| 247 | if constexpr (use_cache) |
| 248 | { |
| 249 | lcd_cache = typeid_cast<LowCardinalityDictionaryCache *>(context.get()); |
| 250 | if (!lcd_cache) |
| 251 | { |
| 252 | const auto & cached_val = *context; |
| 253 | throw Exception("Invalid type for HashMethodSingleLowCardinalityColumn cache: " |
| 254 | + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR); |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | auto * dict = column->getDictionary().getNestedNotNullableColumn().get(); |
| 259 | is_nullable = column->getDictionary().nestedColumnIsNullable(); |
| 260 | key_columns = {dict}; |
| 261 | bool is_shared_dict = column->isSharedDictionary(); |
| 262 | |
| 263 | typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key; |
| 264 | typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values; |
| 265 | |
| 266 | if (is_shared_dict) |
| 267 | { |
| 268 | dictionary_key = {column->getDictionary().getHash(), dict->size()}; |
| 269 | if constexpr (use_cache) |
| 270 | cached_values = lcd_cache->get(dictionary_key); |
| 271 | } |
| 272 | |
| 273 | if (cached_values) |
| 274 | { |
| 275 | saved_hash = cached_values->saved_hash; |
| 276 | dictionary_holder = cached_values->dictionary_holder; |
| 277 | } |
| 278 | else |
| 279 | { |
| 280 | saved_hash = column->getDictionary().tryGetSavedHash(); |
| 281 | dictionary_holder = column->getDictionaryPtr(); |
| 282 | |
| 283 | if constexpr (use_cache) |
| 284 | { |
| 285 | if (is_shared_dict) |
| 286 | { |
| 287 | cached_values = std::make_shared<typename LowCardinalityDictionaryCache::CachedValues>(); |
| 288 | cached_values->saved_hash = saved_hash; |
| 289 | cached_values->dictionary_holder = dictionary_holder; |
| 290 | |
| 291 | lcd_cache->set(dictionary_key, cached_values); |
| 292 | } |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | if constexpr (has_mapped) |
| 297 | mapped_cache.resize(key_columns[0]->size()); |
| 298 | |
| 299 | VisitValue empty(VisitValue::Empty); |
| 300 | visit_cache.assign(key_columns[0]->size(), empty); |
| 301 | |
| 302 | size_of_index_type = column->getSizeOfIndexType(); |
| 303 | positions = column->getIndexesPtr().get(); |
| 304 | } |
| 305 | |
| 306 | ALWAYS_INLINE size_t getIndexAt(size_t row) const |
| 307 | { |
| 308 | switch (size_of_index_type) |
| 309 | { |
| 310 | case sizeof(UInt8): return assert_cast<const ColumnUInt8 *>(positions)->getElement(row); |
| 311 | case sizeof(UInt16): return assert_cast<const ColumnUInt16 *>(positions)->getElement(row); |
| 312 | case sizeof(UInt32): return assert_cast<const ColumnUInt32 *>(positions)->getElement(row); |
| 313 | case sizeof(UInt64): return assert_cast<const ColumnUInt64 *>(positions)->getElement(row); |
| 314 | default: throw Exception("Unexpected size of index type for low cardinality column." , ErrorCodes::LOGICAL_ERROR); |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | /// Get the key holder from the key columns for insertion into the hash table. |
| 319 | ALWAYS_INLINE auto getKeyHolder(size_t row, Arena & pool) const |
| 320 | { |
| 321 | return Base::getKeyHolder(getIndexAt(row), pool); |
| 322 | } |
| 323 | |
| 324 | template <typename Data> |
| 325 | ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row_, Arena & pool) |
| 326 | { |
| 327 | size_t row = getIndexAt(row_); |
| 328 | |
| 329 | if (is_nullable && row == 0) |
| 330 | { |
| 331 | visit_cache[row] = VisitValue::Found; |
| 332 | bool has_null_key = data.hasNullKeyData(); |
| 333 | data.hasNullKeyData() = true; |
| 334 | |
| 335 | if constexpr (has_mapped) |
| 336 | return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !has_null_key); |
| 337 | else |
| 338 | return EmplaceResult(!has_null_key); |
| 339 | } |
| 340 | |
| 341 | if (visit_cache[row] == VisitValue::Found) |
| 342 | { |
| 343 | if constexpr (has_mapped) |
| 344 | return EmplaceResult(mapped_cache[row], mapped_cache[row], false); |
| 345 | else |
| 346 | return EmplaceResult(false); |
| 347 | } |
| 348 | |
| 349 | auto key_holder = getKeyHolder(row_, pool); |
| 350 | |
| 351 | bool inserted = false; |
| 352 | typename Data::LookupResult it; |
| 353 | if (saved_hash) |
| 354 | data.emplace(key_holder, it, inserted, saved_hash[row]); |
| 355 | else |
| 356 | data.emplace(key_holder, it, inserted); |
| 357 | |
| 358 | visit_cache[row] = VisitValue::Found; |
| 359 | |
| 360 | if constexpr (has_mapped) |
| 361 | { |
| 362 | auto & mapped = it->getMapped(); |
| 363 | if (inserted) |
| 364 | { |
| 365 | new (&mapped) Mapped(); |
| 366 | } |
| 367 | mapped_cache[row] = mapped; |
| 368 | return EmplaceResult(mapped, mapped_cache[row], inserted); |
| 369 | } |
| 370 | else |
| 371 | return EmplaceResult(inserted); |
| 372 | } |
| 373 | |
| 374 | ALWAYS_INLINE bool isNullAt(size_t i) |
| 375 | { |
| 376 | if (!is_nullable) |
| 377 | return false; |
| 378 | |
| 379 | return getIndexAt(i) == 0; |
| 380 | } |
| 381 | |
| 382 | template <typename Data> |
| 383 | ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool) |
| 384 | { |
| 385 | size_t row = getIndexAt(row_); |
| 386 | |
| 387 | if (is_nullable && row == 0) |
| 388 | { |
| 389 | if constexpr (has_mapped) |
| 390 | return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData()); |
| 391 | else |
| 392 | return FindResult(data.hasNullKeyData()); |
| 393 | } |
| 394 | |
| 395 | if (visit_cache[row] != VisitValue::Empty) |
| 396 | { |
| 397 | if constexpr (has_mapped) |
| 398 | return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found); |
| 399 | else |
| 400 | return FindResult(visit_cache[row] == VisitValue::Found); |
| 401 | } |
| 402 | |
| 403 | auto key_holder = getKeyHolder(row_, pool); |
| 404 | |
| 405 | typename Data::iterator it; |
| 406 | if (saved_hash) |
| 407 | it = data.find(*key_holder, saved_hash[row]); |
| 408 | else |
| 409 | it = data.find(*key_holder); |
| 410 | |
| 411 | bool found = it != data.end(); |
| 412 | visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound; |
| 413 | |
| 414 | if constexpr (has_mapped) |
| 415 | { |
| 416 | if (found) |
| 417 | mapped_cache[row] = it->second; |
| 418 | } |
| 419 | |
| 420 | if constexpr (has_mapped) |
| 421 | return FindResult(&mapped_cache[row], found); |
| 422 | else |
| 423 | return FindResult(found); |
| 424 | } |
| 425 | |
| 426 | template <typename Data> |
| 427 | ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) |
| 428 | { |
| 429 | row = getIndexAt(row); |
| 430 | if (saved_hash) |
| 431 | return saved_hash[row]; |
| 432 | |
| 433 | return Base::getHash(data, row, pool); |
| 434 | } |
| 435 | }; |
| 436 | |
| 437 | |
| 438 | // Optional mask for low cardinality columns. |
| 439 | template <bool has_low_cardinality> |
| 440 | struct LowCardinalityKeys |
| 441 | { |
| 442 | ColumnRawPtrs nested_columns; |
| 443 | ColumnRawPtrs positions; |
| 444 | Sizes position_sizes; |
| 445 | }; |
| 446 | |
| 447 | template <> |
| 448 | struct LowCardinalityKeys<false> {}; |
| 449 | |
| 450 | /// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits. |
| 451 | template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true> |
| 452 | struct HashMethodKeysFixed |
| 453 | : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_> |
| 454 | , public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>, Value, Mapped, use_cache> |
| 455 | { |
| 456 | using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>; |
| 457 | using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; |
| 458 | using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>; |
| 459 | |
| 460 | static constexpr bool has_nullable_keys = has_nullable_keys_; |
| 461 | static constexpr bool has_low_cardinality = has_low_cardinality_; |
| 462 | |
| 463 | LowCardinalityKeys<has_low_cardinality> low_cardinality_keys; |
| 464 | Sizes key_sizes; |
| 465 | size_t keys_size; |
| 466 | |
| 467 | HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const HashMethodContextPtr &) |
| 468 | : Base(key_columns), key_sizes(std::move(key_sizes_)), keys_size(key_columns.size()) |
| 469 | { |
| 470 | if constexpr (has_low_cardinality) |
| 471 | { |
| 472 | low_cardinality_keys.nested_columns.resize(key_columns.size()); |
| 473 | low_cardinality_keys.positions.assign(key_columns.size(), nullptr); |
| 474 | low_cardinality_keys.position_sizes.resize(key_columns.size()); |
| 475 | for (size_t i = 0; i < key_columns.size(); ++i) |
| 476 | { |
| 477 | if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i])) |
| 478 | { |
| 479 | low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get(); |
| 480 | low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes(); |
| 481 | low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType(); |
| 482 | } |
| 483 | else |
| 484 | low_cardinality_keys.nested_columns[i] = key_columns[i]; |
| 485 | } |
| 486 | } |
| 487 | } |
| 488 | |
| 489 | ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const |
| 490 | { |
| 491 | if constexpr (has_nullable_keys) |
| 492 | { |
| 493 | auto bitmap = Base::createBitmap(row); |
| 494 | return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes, bitmap); |
| 495 | } |
| 496 | else |
| 497 | { |
| 498 | if constexpr (has_low_cardinality) |
| 499 | return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes, |
| 500 | &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes); |
| 501 | |
| 502 | return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes); |
| 503 | } |
| 504 | } |
| 505 | }; |
| 506 | |
| 507 | /** Hash by concatenating serialized key values. |
| 508 | * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts. |
| 509 | * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes. |
| 510 | * Therefore, when aggregating by several strings, there is no ambiguity. |
| 511 | */ |
| 512 | template <typename Value, typename Mapped> |
| 513 | struct HashMethodSerialized |
| 514 | : public columns_hashing_impl::HashMethodBase<HashMethodSerialized<Value, Mapped>, Value, Mapped, false> |
| 515 | { |
| 516 | using Self = HashMethodSerialized<Value, Mapped>; |
| 517 | using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; |
| 518 | |
| 519 | ColumnRawPtrs key_columns; |
| 520 | size_t keys_size; |
| 521 | |
| 522 | HashMethodSerialized(const ColumnRawPtrs & key_columns_, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) |
| 523 | : key_columns(key_columns_), keys_size(key_columns_.size()) {} |
| 524 | |
| 525 | protected: |
| 526 | friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; |
| 527 | |
| 528 | ALWAYS_INLINE SerializedKeyHolder getKeyHolder(size_t row, Arena & pool) const |
| 529 | { |
| 530 | return SerializedKeyHolder{ |
| 531 | serializeKeysToPoolContiguous(row, keys_size, key_columns, pool), |
| 532 | pool}; |
| 533 | } |
| 534 | }; |
| 535 | |
| 536 | /// For the case when there is one string key. |
| 537 | template <typename Value, typename Mapped, bool use_cache = true> |
| 538 | struct HashMethodHashed |
| 539 | : public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache>, Value, Mapped, use_cache> |
| 540 | { |
| 541 | using Key = UInt128; |
| 542 | using Self = HashMethodHashed<Value, Mapped, use_cache>; |
| 543 | using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; |
| 544 | |
| 545 | ColumnRawPtrs key_columns; |
| 546 | |
| 547 | HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const HashMethodContextPtr &) |
| 548 | : key_columns(std::move(key_columns_)) {} |
| 549 | |
| 550 | ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const |
| 551 | { |
| 552 | return hash128(row, key_columns.size(), key_columns); |
| 553 | } |
| 554 | }; |
| 555 | |
| 556 | } |
| 557 | } |
| 558 | |