| 1 | #include "duckdb/execution/operator/scan/physical_positional_scan.hpp" |
| 2 | |
| 3 | #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" |
| 4 | #include "duckdb/common/string_util.hpp" |
| 5 | #include "duckdb/parallel/interrupt.hpp" |
| 6 | #include "duckdb/planner/expression/bound_conjunction_expression.hpp" |
| 7 | #include "duckdb/transaction/transaction.hpp" |
| 8 | |
| 9 | #include <utility> |
| 10 | |
| 11 | namespace duckdb { |
| 12 | |
| 13 | PhysicalPositionalScan::PhysicalPositionalScan(vector<LogicalType> types, unique_ptr<PhysicalOperator> left, |
| 14 | unique_ptr<PhysicalOperator> right) |
| 15 | : PhysicalOperator(PhysicalOperatorType::POSITIONAL_SCAN, std::move(types), |
| 16 | MaxValue(a: left->estimated_cardinality, b: right->estimated_cardinality)) { |
| 17 | |
| 18 | // Manage the children ourselves |
| 19 | if (left->type == PhysicalOperatorType::TABLE_SCAN) { |
| 20 | child_tables.emplace_back(args: std::move(left)); |
| 21 | } else if (left->type == PhysicalOperatorType::POSITIONAL_SCAN) { |
| 22 | auto &left_scan = left->Cast<PhysicalPositionalScan>(); |
| 23 | child_tables = std::move(left_scan.child_tables); |
| 24 | } else { |
| 25 | throw InternalException("Invalid left input for PhysicalPositionalScan" ); |
| 26 | } |
| 27 | |
| 28 | if (right->type == PhysicalOperatorType::TABLE_SCAN) { |
| 29 | child_tables.emplace_back(args: std::move(right)); |
| 30 | } else if (right->type == PhysicalOperatorType::POSITIONAL_SCAN) { |
| 31 | auto &right_scan = right->Cast<PhysicalPositionalScan>(); |
| 32 | auto &right_tables = right_scan.child_tables; |
| 33 | child_tables.reserve(n: child_tables.size() + right_tables.size()); |
| 34 | std::move(first: right_tables.begin(), last: right_tables.end(), result: std::back_inserter(x&: child_tables)); |
| 35 | } else { |
| 36 | throw InternalException("Invalid right input for PhysicalPositionalScan" ); |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | class PositionalScanGlobalSourceState : public GlobalSourceState { |
| 41 | public: |
| 42 | PositionalScanGlobalSourceState(ClientContext &context, const PhysicalPositionalScan &op) { |
| 43 | for (const auto &table : op.child_tables) { |
| 44 | global_states.emplace_back(args: table->GetGlobalSourceState(context)); |
| 45 | } |
| 46 | } |
| 47 | |
| 48 | vector<unique_ptr<GlobalSourceState>> global_states; |
| 49 | |
| 50 | idx_t MaxThreads() override { |
| 51 | return 1; |
| 52 | } |
| 53 | }; |
| 54 | |
| 55 | class PositionalTableScanner { |
| 56 | public: |
| 57 | PositionalTableScanner(ExecutionContext &context, PhysicalOperator &table_p, GlobalSourceState &gstate_p) |
| 58 | : table(table_p), global_state(gstate_p), source_offset(0), exhausted(false) { |
| 59 | local_state = table.GetLocalSourceState(context, gstate&: gstate_p); |
| 60 | source.Initialize(allocator&: Allocator::Get(context&: context.client), types: table.types); |
| 61 | } |
| 62 | |
| 63 | idx_t Refill(ExecutionContext &context) { |
| 64 | if (source_offset >= source.size()) { |
| 65 | if (!exhausted) { |
| 66 | source.Reset(); |
| 67 | |
| 68 | InterruptState interrupt_state; |
| 69 | OperatorSourceInput source_input {.global_state: global_state, .local_state: *local_state, .interrupt_state: interrupt_state}; |
| 70 | auto source_result = table.GetData(context, chunk&: source, input&: source_input); |
| 71 | if (source_result == SourceResultType::BLOCKED) { |
| 72 | throw NotImplementedException( |
| 73 | "Unexpected interrupt from table Source in PositionalTableScanner refill" ); |
| 74 | } |
| 75 | } |
| 76 | source_offset = 0; |
| 77 | } |
| 78 | |
| 79 | const auto available = source.size() - source_offset; |
| 80 | if (!available) { |
| 81 | if (!exhausted) { |
| 82 | source.Reset(); |
| 83 | for (idx_t i = 0; i < source.ColumnCount(); ++i) { |
| 84 | auto &vec = source.data[i]; |
| 85 | vec.SetVectorType(VectorType::CONSTANT_VECTOR); |
| 86 | ConstantVector::SetNull(vector&: vec, is_null: true); |
| 87 | } |
| 88 | exhausted = true; |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | return available; |
| 93 | } |
| 94 | |
| 95 | idx_t CopyData(ExecutionContext &context, DataChunk &output, const idx_t count, const idx_t col_offset) { |
| 96 | if (!source_offset && (source.size() >= count || exhausted)) { |
| 97 | // Fast track: aligned and has enough data |
| 98 | for (idx_t i = 0; i < source.ColumnCount(); ++i) { |
| 99 | output.data[col_offset + i].Reference(other&: source.data[i]); |
| 100 | } |
| 101 | source_offset += count; |
| 102 | } else { |
| 103 | // Copy data |
| 104 | for (idx_t target_offset = 0; target_offset < count;) { |
| 105 | const auto needed = count - target_offset; |
| 106 | const auto available = exhausted ? needed : (source.size() - source_offset); |
| 107 | const auto copy_size = MinValue(a: needed, b: available); |
| 108 | const auto source_count = source_offset + copy_size; |
| 109 | for (idx_t i = 0; i < source.ColumnCount(); ++i) { |
| 110 | VectorOperations::Copy(source: source.data[i], target&: output.data[col_offset + i], source_count, source_offset, |
| 111 | target_offset); |
| 112 | } |
| 113 | target_offset += copy_size; |
| 114 | source_offset += copy_size; |
| 115 | Refill(context); |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | return source.ColumnCount(); |
| 120 | } |
| 121 | |
| 122 | double GetProgress(ClientContext &context) { |
| 123 | return table.GetProgress(context, gstate&: global_state); |
| 124 | } |
| 125 | |
| 126 | PhysicalOperator &table; |
| 127 | GlobalSourceState &global_state; |
| 128 | unique_ptr<LocalSourceState> local_state; |
| 129 | DataChunk source; |
| 130 | idx_t source_offset; |
| 131 | bool exhausted; |
| 132 | }; |
| 133 | |
| 134 | class PositionalScanLocalSourceState : public LocalSourceState { |
| 135 | public: |
| 136 | PositionalScanLocalSourceState(ExecutionContext &context, PositionalScanGlobalSourceState &gstate, |
| 137 | const PhysicalPositionalScan &op) { |
| 138 | for (size_t i = 0; i < op.child_tables.size(); ++i) { |
| 139 | auto &child = *op.child_tables[i]; |
| 140 | auto &global_state = *gstate.global_states[i]; |
| 141 | scanners.emplace_back(args: make_uniq<PositionalTableScanner>(args&: context, args&: child, args&: global_state)); |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | vector<unique_ptr<PositionalTableScanner>> scanners; |
| 146 | }; |
| 147 | |
| 148 | unique_ptr<LocalSourceState> PhysicalPositionalScan::GetLocalSourceState(ExecutionContext &context, |
| 149 | GlobalSourceState &gstate) const { |
| 150 | return make_uniq<PositionalScanLocalSourceState>(args&: context, args&: gstate.Cast<PositionalScanGlobalSourceState>(), args: *this); |
| 151 | } |
| 152 | |
| 153 | unique_ptr<GlobalSourceState> PhysicalPositionalScan::GetGlobalSourceState(ClientContext &context) const { |
| 154 | return make_uniq<PositionalScanGlobalSourceState>(args&: context, args: *this); |
| 155 | } |
| 156 | |
| 157 | SourceResultType PhysicalPositionalScan::GetData(ExecutionContext &context, DataChunk &output, |
| 158 | OperatorSourceInput &input) const { |
| 159 | auto &lstate = input.local_state.Cast<PositionalScanLocalSourceState>(); |
| 160 | |
| 161 | // Find the longest source block |
| 162 | idx_t count = 0; |
| 163 | for (auto &scanner : lstate.scanners) { |
| 164 | count = MaxValue(a: count, b: scanner->Refill(context)); |
| 165 | } |
| 166 | |
| 167 | // All done? |
| 168 | if (!count) { |
| 169 | return SourceResultType::FINISHED; |
| 170 | } |
| 171 | |
| 172 | // Copy or reference the source columns |
| 173 | idx_t col_offset = 0; |
| 174 | for (auto &scanner : lstate.scanners) { |
| 175 | col_offset += scanner->CopyData(context, output, count, col_offset); |
| 176 | } |
| 177 | |
| 178 | output.SetCardinality(count); |
| 179 | return SourceResultType::HAVE_MORE_OUTPUT; |
| 180 | } |
| 181 | |
| 182 | double PhysicalPositionalScan::GetProgress(ClientContext &context, GlobalSourceState &gstate_p) const { |
| 183 | auto &gstate = gstate_p.Cast<PositionalScanGlobalSourceState>(); |
| 184 | |
| 185 | double result = child_tables[0]->GetProgress(context, gstate&: *gstate.global_states[0]); |
| 186 | for (size_t t = 1; t < child_tables.size(); ++t) { |
| 187 | result = MinValue(a: result, b: child_tables[t]->GetProgress(context, gstate&: *gstate.global_states[t])); |
| 188 | } |
| 189 | |
| 190 | return result; |
| 191 | } |
| 192 | |
| 193 | bool PhysicalPositionalScan::Equals(const PhysicalOperator &other_p) const { |
| 194 | if (type != other_p.type) { |
| 195 | return false; |
| 196 | } |
| 197 | |
| 198 | auto &other = other_p.Cast<PhysicalPositionalScan>(); |
| 199 | if (child_tables.size() != other.child_tables.size()) { |
| 200 | return false; |
| 201 | } |
| 202 | for (size_t i = 0; i < child_tables.size(); ++i) { |
| 203 | if (!child_tables[i]->Equals(other: *other.child_tables[i])) { |
| 204 | return false; |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | return true; |
| 209 | } |
| 210 | |
| 211 | } // namespace duckdb |
| 212 | |