1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/common/radix_partitioning.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/common/fast_mem.hpp" |
12 | #include "duckdb/common/types/column/partitioned_column_data.hpp" |
13 | #include "duckdb/common/types/row/partitioned_tuple_data.hpp" |
14 | |
15 | namespace duckdb { |
16 | |
17 | class BufferManager; |
18 | class Vector; |
19 | struct UnifiedVectorFormat; |
20 | struct SelectionVector; |
21 | |
22 | //! Generic radix partitioning functions |
23 | struct RadixPartitioning { |
24 | public: |
25 | //! The number of partitions for a given number of radix bits |
26 | static inline constexpr idx_t NumberOfPartitions(idx_t radix_bits) { |
27 | return idx_t(1) << radix_bits; |
28 | } |
29 | |
30 | //! Inverse of NumberOfPartitions, given a number of partitions, get the number of radix bits |
31 | static inline idx_t RadixBits(idx_t n_partitions) { |
32 | D_ASSERT(IsPowerOfTwo(n_partitions)); |
33 | for (idx_t r = 0; r < sizeof(idx_t) * 8; r++) { |
34 | if (n_partitions == NumberOfPartitions(radix_bits: r)) { |
35 | return r; |
36 | } |
37 | } |
38 | throw InternalException("RadixPartitioning::RadixBits unable to find partition count!" ); |
39 | } |
40 | |
41 | static inline constexpr idx_t Shift(idx_t radix_bits) { |
42 | return 48 - radix_bits; |
43 | } |
44 | |
45 | static inline constexpr hash_t Mask(idx_t radix_bits) { |
46 | return (hash_t(1 << radix_bits) - 1) << Shift(radix_bits); |
47 | } |
48 | |
49 | //! Select using a cutoff on the radix bits of the hash |
50 | static idx_t Select(Vector &hashes, const SelectionVector *sel, idx_t count, idx_t radix_bits, idx_t cutoff, |
51 | SelectionVector *true_sel, SelectionVector *false_sel); |
52 | |
53 | //! Convert hashes to bins |
54 | static void HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count); |
55 | }; |
56 | |
57 | //! Templated radix partitioning constants, can be templated to the number of radix bits |
58 | template <idx_t radix_bits> |
59 | struct RadixPartitioningConstants { |
60 | public: |
61 | //! Bitmask of the upper bits of the 5th byte |
62 | static constexpr const idx_t NUM_PARTITIONS = RadixPartitioning::NumberOfPartitions(radix_bits); |
63 | static constexpr const idx_t SHIFT = RadixPartitioning::Shift(radix_bits); |
64 | static constexpr const hash_t MASK = RadixPartitioning::Mask(radix_bits); |
65 | |
66 | public: |
67 | //! Apply bitmask and right shift to get a number between 0 and NUM_PARTITIONS |
68 | static inline hash_t ApplyMask(hash_t hash) { |
69 | D_ASSERT((hash & MASK) >> SHIFT < NUM_PARTITIONS); |
70 | return (hash & MASK) >> SHIFT; |
71 | } |
72 | }; |
73 | |
74 | //! RadixPartitionedColumnData is a PartitionedColumnData that partitions input based on the radix of a hash |
75 | class RadixPartitionedColumnData : public PartitionedColumnData { |
76 | public: |
77 | RadixPartitionedColumnData(ClientContext &context, vector<LogicalType> types, idx_t radix_bits, idx_t hash_col_idx); |
78 | RadixPartitionedColumnData(const RadixPartitionedColumnData &other); |
79 | ~RadixPartitionedColumnData() override; |
80 | |
81 | idx_t GetRadixBits() const { |
82 | return radix_bits; |
83 | } |
84 | |
85 | protected: |
86 | //===--------------------------------------------------------------------===// |
87 | // Radix Partitioning interface implementation |
88 | //===--------------------------------------------------------------------===// |
89 | idx_t BufferSize() const override { |
90 | switch (radix_bits) { |
91 | case 1: |
92 | case 2: |
93 | case 3: |
94 | case 4: |
95 | return GetBufferSize(div: 1 << 1); |
96 | case 5: |
97 | return GetBufferSize(div: 1 << 2); |
98 | case 6: |
99 | return GetBufferSize(div: 1 << 3); |
100 | default: |
101 | return GetBufferSize(div: 1 << 4); |
102 | } |
103 | } |
104 | |
105 | void InitializeAppendStateInternal(PartitionedColumnDataAppendState &state) const override; |
106 | void ComputePartitionIndices(PartitionedColumnDataAppendState &state, DataChunk &input) override; |
107 | |
108 | static constexpr idx_t GetBufferSize(idx_t div) { |
109 | return STANDARD_VECTOR_SIZE / div == 0 ? 1 : STANDARD_VECTOR_SIZE / div; |
110 | } |
111 | |
112 | private: |
113 | //! The number of radix bits |
114 | const idx_t radix_bits; |
115 | //! The index of the column holding the hashes |
116 | const idx_t hash_col_idx; |
117 | }; |
118 | |
119 | //! RadixPartitionedTupleData is a PartitionedTupleData that partitions input based on the radix of a hash |
120 | class RadixPartitionedTupleData : public PartitionedTupleData { |
121 | public: |
122 | RadixPartitionedTupleData(BufferManager &buffer_manager, const TupleDataLayout &layout, idx_t radix_bits_p, |
123 | idx_t hash_col_idx_p); |
124 | RadixPartitionedTupleData(const RadixPartitionedTupleData &other); |
125 | ~RadixPartitionedTupleData() override; |
126 | |
127 | idx_t GetRadixBits() const { |
128 | return radix_bits; |
129 | } |
130 | |
131 | private: |
132 | void Initialize(); |
133 | |
134 | protected: |
135 | //===--------------------------------------------------------------------===// |
136 | // Radix Partitioning interface implementation |
137 | //===--------------------------------------------------------------------===// |
138 | void InitializeAppendStateInternal(PartitionedTupleDataAppendState &state, |
139 | TupleDataPinProperties properties) const override; |
140 | void ComputePartitionIndices(PartitionedTupleDataAppendState &state, DataChunk &input) override; |
141 | void ComputePartitionIndices(Vector &row_locations, idx_t count, Vector &partition_indices) const override; |
142 | idx_t MaxPartitionIndex() const override { |
143 | return RadixPartitioning::NumberOfPartitions(radix_bits) - 1; |
144 | } |
145 | |
146 | bool RepartitionReverseOrder() const override { |
147 | return true; |
148 | } |
149 | void RepartitionFinalizeStates(PartitionedTupleData &old_partitioned_data, |
150 | PartitionedTupleData &new_partitioned_data, PartitionedTupleDataAppendState &state, |
151 | idx_t finished_partition_idx) const override; |
152 | |
153 | private: |
154 | //! The number of radix bits |
155 | const idx_t radix_bits; |
156 | //! The index of the column holding the hashes |
157 | const idx_t hash_col_idx; |
158 | }; |
159 | |
160 | } // namespace duckdb |
161 | |