1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/execution/partitionable_hashtable.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/execution/aggregate_hashtable.hpp" |
12 | |
13 | namespace duckdb { |
14 | |
15 | struct RadixPartitionInfo { |
16 | explicit RadixPartitionInfo(idx_t n_partitions_upper_bound); |
17 | const idx_t n_partitions; |
18 | const idx_t radix_bits; |
19 | const hash_t radix_mask; |
20 | const idx_t radix_shift; |
21 | |
22 | inline hash_t GetHashPartition(hash_t hash) const { |
23 | return (hash & radix_mask) >> radix_shift; |
24 | } |
25 | }; |
26 | |
27 | typedef vector<unique_ptr<GroupedAggregateHashTable>> HashTableList; // NOLINT |
28 | |
29 | class PartitionableHashTable { |
30 | public: |
31 | PartitionableHashTable(ClientContext &context, Allocator &allocator, RadixPartitionInfo &partition_info_p, |
32 | vector<LogicalType> group_types_p, vector<LogicalType> payload_types_p, |
33 | vector<BoundAggregateExpression *> bindings_p); |
34 | |
35 | idx_t AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition, const unsafe_vector<idx_t> &filter); |
36 | void Partition(); |
37 | bool IsPartitioned(); |
38 | |
39 | HashTableList GetPartition(idx_t partition); |
40 | HashTableList GetUnpartitioned(); |
41 | |
42 | void Finalize(); |
43 | |
44 | private: |
45 | ClientContext &context; |
46 | Allocator &allocator; |
47 | vector<LogicalType> group_types; |
48 | vector<LogicalType> payload_types; |
49 | vector<BoundAggregateExpression *> bindings; |
50 | |
51 | bool is_partitioned; |
52 | RadixPartitionInfo &partition_info; |
53 | vector<SelectionVector> sel_vectors; |
54 | unsafe_vector<idx_t> sel_vector_sizes; |
55 | DataChunk group_subset, payload_subset; |
56 | Vector hashes, hashes_subset; |
57 | AggregateHTAppendState append_state; |
58 | |
59 | HashTableList unpartitioned_hts; |
60 | vector<HashTableList> radix_partitioned_hts; |
61 | idx_t tuple_size; |
62 | |
63 | private: |
64 | idx_t ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes, DataChunk &payload, |
65 | const unsafe_vector<idx_t> &filter); |
66 | //! Returns the HT entry size used for intermediate hash tables |
67 | HtEntryType GetHTEntrySize(); |
68 | }; |
69 | } // namespace duckdb |
70 | |