| 1 | //===----------------------------------------------------------------------===// |
| 2 | // DuckDB |
| 3 | // |
| 4 | // duckdb/common/radix_partitioning.hpp |
| 5 | // |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include "duckdb/common/fast_mem.hpp" |
| 12 | #include "duckdb/common/types/column/partitioned_column_data.hpp" |
| 13 | #include "duckdb/common/types/row/partitioned_tuple_data.hpp" |
| 14 | |
| 15 | namespace duckdb { |
| 16 | |
| 17 | class BufferManager; |
| 18 | class Vector; |
| 19 | struct UnifiedVectorFormat; |
| 20 | struct SelectionVector; |
| 21 | |
| 22 | //! Generic radix partitioning functions |
| 23 | struct RadixPartitioning { |
| 24 | public: |
| 25 | //! The number of partitions for a given number of radix bits |
| 26 | static inline constexpr idx_t NumberOfPartitions(idx_t radix_bits) { |
| 27 | return idx_t(1) << radix_bits; |
| 28 | } |
| 29 | |
| 30 | //! Inverse of NumberOfPartitions, given a number of partitions, get the number of radix bits |
| 31 | static inline idx_t RadixBits(idx_t n_partitions) { |
| 32 | D_ASSERT(IsPowerOfTwo(n_partitions)); |
| 33 | for (idx_t r = 0; r < sizeof(idx_t) * 8; r++) { |
| 34 | if (n_partitions == NumberOfPartitions(radix_bits: r)) { |
| 35 | return r; |
| 36 | } |
| 37 | } |
| 38 | throw InternalException("RadixPartitioning::RadixBits unable to find partition count!" ); |
| 39 | } |
| 40 | |
| 41 | static inline constexpr idx_t Shift(idx_t radix_bits) { |
| 42 | return 48 - radix_bits; |
| 43 | } |
| 44 | |
| 45 | static inline constexpr hash_t Mask(idx_t radix_bits) { |
| 46 | return (hash_t(1 << radix_bits) - 1) << Shift(radix_bits); |
| 47 | } |
| 48 | |
| 49 | //! Select using a cutoff on the radix bits of the hash |
| 50 | static idx_t Select(Vector &hashes, const SelectionVector *sel, idx_t count, idx_t radix_bits, idx_t cutoff, |
| 51 | SelectionVector *true_sel, SelectionVector *false_sel); |
| 52 | |
| 53 | //! Convert hashes to bins |
| 54 | static void HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count); |
| 55 | }; |
| 56 | |
| 57 | //! Templated radix partitioning constants, can be templated to the number of radix bits |
| 58 | template <idx_t radix_bits> |
| 59 | struct RadixPartitioningConstants { |
| 60 | public: |
| 61 | //! Bitmask of the upper bits of the 5th byte |
| 62 | static constexpr const idx_t NUM_PARTITIONS = RadixPartitioning::NumberOfPartitions(radix_bits); |
| 63 | static constexpr const idx_t SHIFT = RadixPartitioning::Shift(radix_bits); |
| 64 | static constexpr const hash_t MASK = RadixPartitioning::Mask(radix_bits); |
| 65 | |
| 66 | public: |
| 67 | //! Apply bitmask and right shift to get a number between 0 and NUM_PARTITIONS |
| 68 | static inline hash_t ApplyMask(hash_t hash) { |
| 69 | D_ASSERT((hash & MASK) >> SHIFT < NUM_PARTITIONS); |
| 70 | return (hash & MASK) >> SHIFT; |
| 71 | } |
| 72 | }; |
| 73 | |
| 74 | //! RadixPartitionedColumnData is a PartitionedColumnData that partitions input based on the radix of a hash |
| 75 | class RadixPartitionedColumnData : public PartitionedColumnData { |
| 76 | public: |
| 77 | RadixPartitionedColumnData(ClientContext &context, vector<LogicalType> types, idx_t radix_bits, idx_t hash_col_idx); |
| 78 | RadixPartitionedColumnData(const RadixPartitionedColumnData &other); |
| 79 | ~RadixPartitionedColumnData() override; |
| 80 | |
| 81 | idx_t GetRadixBits() const { |
| 82 | return radix_bits; |
| 83 | } |
| 84 | |
| 85 | protected: |
| 86 | //===--------------------------------------------------------------------===// |
| 87 | // Radix Partitioning interface implementation |
| 88 | //===--------------------------------------------------------------------===// |
| 89 | idx_t BufferSize() const override { |
| 90 | switch (radix_bits) { |
| 91 | case 1: |
| 92 | case 2: |
| 93 | case 3: |
| 94 | case 4: |
| 95 | return GetBufferSize(div: 1 << 1); |
| 96 | case 5: |
| 97 | return GetBufferSize(div: 1 << 2); |
| 98 | case 6: |
| 99 | return GetBufferSize(div: 1 << 3); |
| 100 | default: |
| 101 | return GetBufferSize(div: 1 << 4); |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | void InitializeAppendStateInternal(PartitionedColumnDataAppendState &state) const override; |
| 106 | void ComputePartitionIndices(PartitionedColumnDataAppendState &state, DataChunk &input) override; |
| 107 | |
| 108 | static constexpr idx_t GetBufferSize(idx_t div) { |
| 109 | return STANDARD_VECTOR_SIZE / div == 0 ? 1 : STANDARD_VECTOR_SIZE / div; |
| 110 | } |
| 111 | |
| 112 | private: |
| 113 | //! The number of radix bits |
| 114 | const idx_t radix_bits; |
| 115 | //! The index of the column holding the hashes |
| 116 | const idx_t hash_col_idx; |
| 117 | }; |
| 118 | |
| 119 | //! RadixPartitionedTupleData is a PartitionedTupleData that partitions input based on the radix of a hash |
| 120 | class RadixPartitionedTupleData : public PartitionedTupleData { |
| 121 | public: |
| 122 | RadixPartitionedTupleData(BufferManager &buffer_manager, const TupleDataLayout &layout, idx_t radix_bits_p, |
| 123 | idx_t hash_col_idx_p); |
| 124 | RadixPartitionedTupleData(const RadixPartitionedTupleData &other); |
| 125 | ~RadixPartitionedTupleData() override; |
| 126 | |
| 127 | idx_t GetRadixBits() const { |
| 128 | return radix_bits; |
| 129 | } |
| 130 | |
| 131 | private: |
| 132 | void Initialize(); |
| 133 | |
| 134 | protected: |
| 135 | //===--------------------------------------------------------------------===// |
| 136 | // Radix Partitioning interface implementation |
| 137 | //===--------------------------------------------------------------------===// |
| 138 | void InitializeAppendStateInternal(PartitionedTupleDataAppendState &state, |
| 139 | TupleDataPinProperties properties) const override; |
| 140 | void ComputePartitionIndices(PartitionedTupleDataAppendState &state, DataChunk &input) override; |
| 141 | void ComputePartitionIndices(Vector &row_locations, idx_t count, Vector &partition_indices) const override; |
| 142 | idx_t MaxPartitionIndex() const override { |
| 143 | return RadixPartitioning::NumberOfPartitions(radix_bits) - 1; |
| 144 | } |
| 145 | |
| 146 | bool RepartitionReverseOrder() const override { |
| 147 | return true; |
| 148 | } |
| 149 | void RepartitionFinalizeStates(PartitionedTupleData &old_partitioned_data, |
| 150 | PartitionedTupleData &new_partitioned_data, PartitionedTupleDataAppendState &state, |
| 151 | idx_t finished_partition_idx) const override; |
| 152 | |
| 153 | private: |
| 154 | //! The number of radix bits |
| 155 | const idx_t radix_bits; |
| 156 | //! The index of the column holding the hashes |
| 157 | const idx_t hash_col_idx; |
| 158 | }; |
| 159 | |
| 160 | } // namespace duckdb |
| 161 | |