| 1 | #include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp" |
| 2 | |
| 3 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
| 4 | #include "duckdb/execution/expression_executor.hpp" |
| 5 | #include "duckdb/planner/expression/bound_aggregate_expression.hpp" |
| 6 | #include "duckdb/planner/expression/bound_constant_expression.hpp" |
| 7 | #include "duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp" |
| 8 | |
| 9 | using namespace duckdb; |
| 10 | using namespace std; |
| 11 | |
| 12 | class PhysicalHashAggregateState : public PhysicalOperatorState { |
| 13 | public: |
| 14 | PhysicalHashAggregateState(PhysicalHashAggregate *parent, PhysicalOperator *child); |
| 15 | |
| 16 | //! Materialized GROUP BY expression |
| 17 | DataChunk group_chunk; |
| 18 | //! Materialized aggregates |
| 19 | DataChunk aggregate_chunk; |
| 20 | //! The current position to scan the HT for output tuples |
| 21 | idx_t ht_scan_position; |
| 22 | idx_t tuples_scanned; |
| 23 | //! The HT |
| 24 | unique_ptr<SuperLargeHashTable> ht; |
| 25 | //! The payload chunk, only used while filling the HT |
| 26 | DataChunk payload_chunk; |
| 27 | //! Expression executor for the GROUP BY chunk |
| 28 | ExpressionExecutor group_executor; |
| 29 | //! Expression state for the payload |
| 30 | ExpressionExecutor payload_executor; |
| 31 | }; |
| 32 | |
| 33 | PhysicalHashAggregate::PhysicalHashAggregate(vector<TypeId> types, vector<unique_ptr<Expression>> expressions, |
| 34 | PhysicalOperatorType type) |
| 35 | : PhysicalHashAggregate(types, move(expressions), {}, type) { |
| 36 | } |
| 37 | |
| 38 | PhysicalHashAggregate::PhysicalHashAggregate(vector<TypeId> types, vector<unique_ptr<Expression>> expressions, |
| 39 | vector<unique_ptr<Expression>> groups, PhysicalOperatorType type) |
| 40 | : PhysicalOperator(type, types), groups(move(groups)) { |
| 41 | // get a list of all aggregates to be computed |
| 42 | // fake a single group with a constant value for aggregation without groups |
| 43 | if (this->groups.size() == 0) { |
| 44 | auto ce = make_unique<BoundConstantExpression>(Value::TINYINT(42)); |
| 45 | this->groups.push_back(move(ce)); |
| 46 | is_implicit_aggr = true; |
| 47 | } else { |
| 48 | is_implicit_aggr = false; |
| 49 | } |
| 50 | for (auto &expr : expressions) { |
| 51 | assert(expr->expression_class == ExpressionClass::BOUND_AGGREGATE); |
| 52 | assert(expr->IsAggregate()); |
| 53 | aggregates.push_back(move(expr)); |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | void PhysicalHashAggregate::GetChunkInternal(ClientContext &context, DataChunk &chunk, PhysicalOperatorState *state_) { |
| 58 | auto state = reinterpret_cast<PhysicalHashAggregateState *>(state_); |
| 59 | do { |
| 60 | // resolve the child chunk if there is one |
| 61 | children[0]->GetChunk(context, state->child_chunk, state->child_state.get()); |
| 62 | if (state->child_chunk.size() == 0) { |
| 63 | break; |
| 64 | } |
| 65 | // aggregation with groups |
| 66 | DataChunk &group_chunk = state->group_chunk; |
| 67 | DataChunk &payload_chunk = state->payload_chunk; |
| 68 | state->group_executor.Execute(state->child_chunk, group_chunk); |
| 69 | state->payload_executor.SetChunk(state->child_chunk); |
| 70 | |
| 71 | payload_chunk.Reset(); |
| 72 | idx_t payload_idx = 0, payload_expr_idx = 0; |
| 73 | payload_chunk.SetCardinality(group_chunk); |
| 74 | for (idx_t i = 0; i < aggregates.size(); i++) { |
| 75 | auto &aggr = (BoundAggregateExpression &)*aggregates[i]; |
| 76 | if (aggr.children.size()) { |
| 77 | for (idx_t j = 0; j < aggr.children.size(); ++j) { |
| 78 | state->payload_executor.ExecuteExpression(payload_expr_idx, payload_chunk.data[payload_idx]); |
| 79 | payload_idx++; |
| 80 | payload_expr_idx++; |
| 81 | } |
| 82 | } else { |
| 83 | payload_idx++; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | group_chunk.Verify(); |
| 88 | payload_chunk.Verify(); |
| 89 | assert(payload_chunk.column_count() == 0 || group_chunk.size() == payload_chunk.size()); |
| 90 | |
| 91 | state->ht->AddChunk(group_chunk, payload_chunk); |
| 92 | state->tuples_scanned += state->child_chunk.size(); |
| 93 | } while (state->child_chunk.size() > 0); |
| 94 | |
| 95 | state->group_chunk.Reset(); |
| 96 | state->aggregate_chunk.Reset(); |
| 97 | idx_t elements_found = state->ht->Scan(state->ht_scan_position, state->group_chunk, state->aggregate_chunk); |
| 98 | |
| 99 | // special case hack to sort out aggregating from empty intermediates |
| 100 | // for aggregations without groups |
| 101 | if (elements_found == 0 && state->tuples_scanned == 0 && is_implicit_aggr) { |
| 102 | assert(chunk.column_count() == aggregates.size()); |
| 103 | // for each column in the aggregates, set to initial state |
| 104 | chunk.SetCardinality(1); |
| 105 | for (idx_t i = 0; i < chunk.column_count(); i++) { |
| 106 | assert(aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE); |
| 107 | auto &aggr = (BoundAggregateExpression &)*aggregates[i]; |
| 108 | auto aggr_state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]); |
| 109 | aggr.function.initialize(aggr_state.get()); |
| 110 | |
| 111 | Vector state_vector(Value::POINTER((uintptr_t)aggr_state.get())); |
| 112 | aggr.function.finalize(state_vector, chunk.data[i], 1); |
| 113 | } |
| 114 | state->finished = true; |
| 115 | return; |
| 116 | } |
| 117 | if (elements_found == 0 && !state->finished) { |
| 118 | state->finished = true; |
| 119 | return; |
| 120 | } |
| 121 | // we finished the child chunk |
| 122 | // actually compute the final projection list now |
| 123 | idx_t chunk_index = 0; |
| 124 | chunk.SetCardinality(elements_found); |
| 125 | if (state->group_chunk.column_count() + state->aggregate_chunk.column_count() == chunk.column_count()) { |
| 126 | for (idx_t col_idx = 0; col_idx < state->group_chunk.column_count(); col_idx++) { |
| 127 | chunk.data[chunk_index++].Reference(state->group_chunk.data[col_idx]); |
| 128 | } |
| 129 | } else { |
| 130 | assert(state->aggregate_chunk.column_count() == chunk.column_count()); |
| 131 | } |
| 132 | |
| 133 | for (idx_t col_idx = 0; col_idx < state->aggregate_chunk.column_count(); col_idx++) { |
| 134 | chunk.data[chunk_index++].Reference(state->aggregate_chunk.data[col_idx]); |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | unique_ptr<PhysicalOperatorState> PhysicalHashAggregate::GetOperatorState() { |
| 139 | assert(children.size() > 0); |
| 140 | auto state = make_unique<PhysicalHashAggregateState>(this, children[0].get()); |
| 141 | state->tuples_scanned = 0; |
| 142 | vector<TypeId> group_types, payload_types; |
| 143 | vector<BoundAggregateExpression *> aggregate_kind; |
| 144 | for (auto &expr : groups) { |
| 145 | group_types.push_back(expr->return_type); |
| 146 | } |
| 147 | for (auto &expr : aggregates) { |
| 148 | assert(expr->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE); |
| 149 | auto &aggr = (BoundAggregateExpression &)*expr; |
| 150 | aggregate_kind.push_back(&aggr); |
| 151 | if (aggr.children.size()) { |
| 152 | for (idx_t i = 0; i < aggr.children.size(); ++i) { |
| 153 | payload_types.push_back(aggr.children[i]->return_type); |
| 154 | state->payload_executor.AddExpression(*aggr.children[i]); |
| 155 | } |
| 156 | } else { |
| 157 | // COUNT(*) |
| 158 | payload_types.push_back(TypeId::INT64); |
| 159 | } |
| 160 | } |
| 161 | if (payload_types.size() > 0) { |
| 162 | state->payload_chunk.Initialize(payload_types); |
| 163 | } |
| 164 | |
| 165 | state->ht = make_unique<SuperLargeHashTable>(1024, group_types, payload_types, aggregate_kind); |
| 166 | return move(state); |
| 167 | } |
| 168 | |
| 169 | PhysicalHashAggregateState::PhysicalHashAggregateState(PhysicalHashAggregate *parent, PhysicalOperator *child) |
| 170 | : PhysicalOperatorState(child), ht_scan_position(0), tuples_scanned(0), group_executor(parent->groups) { |
| 171 | vector<TypeId> group_types, aggregate_types; |
| 172 | for (auto &expr : parent->groups) { |
| 173 | group_types.push_back(expr->return_type); |
| 174 | } |
| 175 | group_chunk.Initialize(group_types); |
| 176 | for (auto &expr : parent->aggregates) { |
| 177 | aggregate_types.push_back(expr->return_type); |
| 178 | } |
| 179 | if (aggregate_types.size() > 0) { |
| 180 | aggregate_chunk.Initialize(aggregate_types); |
| 181 | } |
| 182 | } |
| 183 | |