1 | #include "duckdb/storage/table/struct_column_data.hpp" |
2 | #include "duckdb/storage/statistics/struct_stats.hpp" |
3 | #include "duckdb/transaction/transaction.hpp" |
4 | #include "duckdb/storage/table/column_checkpoint_state.hpp" |
5 | #include "duckdb/storage/table/append_state.hpp" |
6 | #include "duckdb/storage/table/scan_state.hpp" |
7 | |
8 | namespace duckdb { |
9 | |
10 | StructColumnData::StructColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, |
11 | idx_t start_row, LogicalType type_p, optional_ptr<ColumnData> parent) |
12 | : ColumnData(block_manager, info, column_index, start_row, std::move(type_p), parent), |
13 | validity(block_manager, info, 0, start_row, *this) { |
14 | D_ASSERT(type.InternalType() == PhysicalType::STRUCT); |
15 | auto &child_types = StructType::GetChildTypes(type); |
16 | D_ASSERT(child_types.size() > 0); |
17 | // the sub column index, starting at 1 (0 is the validity mask) |
18 | idx_t sub_column_index = 1; |
19 | for (auto &child_type : child_types) { |
20 | sub_columns.push_back( |
21 | x: ColumnData::CreateColumnUnique(block_manager, info, column_index: sub_column_index, start_row, type: child_type.second, parent: this)); |
22 | sub_column_index++; |
23 | } |
24 | } |
25 | |
26 | void StructColumnData::SetStart(idx_t new_start) { |
27 | this->start = new_start; |
28 | for (auto &sub_column : sub_columns) { |
29 | sub_column->SetStart(new_start); |
30 | } |
31 | validity.SetStart(new_start); |
32 | } |
33 | |
34 | bool StructColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) { |
35 | // table filters are not supported yet for struct columns |
36 | return false; |
37 | } |
38 | |
39 | idx_t StructColumnData::GetMaxEntry() { |
40 | return sub_columns[0]->GetMaxEntry(); |
41 | } |
42 | |
43 | void StructColumnData::InitializeScan(ColumnScanState &state) { |
44 | D_ASSERT(state.child_states.size() == sub_columns.size() + 1); |
45 | state.row_index = 0; |
46 | state.current = nullptr; |
47 | |
48 | // initialize the validity segment |
49 | validity.InitializeScan(state&: state.child_states[0]); |
50 | |
51 | // initialize the sub-columns |
52 | for (idx_t i = 0; i < sub_columns.size(); i++) { |
53 | sub_columns[i]->InitializeScan(state&: state.child_states[i + 1]); |
54 | } |
55 | } |
56 | |
57 | void StructColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) { |
58 | D_ASSERT(state.child_states.size() == sub_columns.size() + 1); |
59 | state.row_index = row_idx; |
60 | state.current = nullptr; |
61 | |
62 | // initialize the validity segment |
63 | validity.InitializeScanWithOffset(state&: state.child_states[0], row_idx); |
64 | |
65 | // initialize the sub-columns |
66 | for (idx_t i = 0; i < sub_columns.size(); i++) { |
67 | sub_columns[i]->InitializeScanWithOffset(state&: state.child_states[i + 1], row_idx); |
68 | } |
69 | } |
70 | |
71 | idx_t StructColumnData::Scan(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result) { |
72 | auto scan_count = validity.Scan(transaction, vector_index, state&: state.child_states[0], result); |
73 | auto &child_entries = StructVector::GetEntries(vector&: result); |
74 | for (idx_t i = 0; i < sub_columns.size(); i++) { |
75 | sub_columns[i]->Scan(transaction, vector_index, state&: state.child_states[i + 1], result&: *child_entries[i]); |
76 | } |
77 | return scan_count; |
78 | } |
79 | |
80 | idx_t StructColumnData::ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) { |
81 | auto scan_count = validity.ScanCommitted(vector_index, state&: state.child_states[0], result, allow_updates); |
82 | auto &child_entries = StructVector::GetEntries(vector&: result); |
83 | for (idx_t i = 0; i < sub_columns.size(); i++) { |
84 | sub_columns[i]->ScanCommitted(vector_index, state&: state.child_states[i + 1], result&: *child_entries[i], allow_updates); |
85 | } |
86 | return scan_count; |
87 | } |
88 | |
89 | idx_t StructColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t count) { |
90 | auto scan_count = validity.ScanCount(state&: state.child_states[0], result, count); |
91 | auto &child_entries = StructVector::GetEntries(vector&: result); |
92 | for (idx_t i = 0; i < sub_columns.size(); i++) { |
93 | sub_columns[i]->ScanCount(state&: state.child_states[i + 1], result&: *child_entries[i], count); |
94 | } |
95 | return scan_count; |
96 | } |
97 | |
98 | void StructColumnData::Skip(ColumnScanState &state, idx_t count) { |
99 | validity.Skip(state&: state.child_states[0], count); |
100 | |
101 | // skip inside the sub-columns |
102 | for (idx_t child_idx = 0; child_idx < sub_columns.size(); child_idx++) { |
103 | sub_columns[child_idx]->Skip(state&: state.child_states[child_idx + 1], count); |
104 | } |
105 | } |
106 | |
107 | void StructColumnData::InitializeAppend(ColumnAppendState &state) { |
108 | ColumnAppendState validity_append; |
109 | validity.InitializeAppend(state&: validity_append); |
110 | state.child_appends.push_back(x: std::move(validity_append)); |
111 | |
112 | for (auto &sub_column : sub_columns) { |
113 | ColumnAppendState child_append; |
114 | sub_column->InitializeAppend(state&: child_append); |
115 | state.child_appends.push_back(x: std::move(child_append)); |
116 | } |
117 | } |
118 | |
119 | void StructColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) { |
120 | vector.Flatten(count); |
121 | |
122 | // append the null values |
123 | validity.Append(stats, state&: state.child_appends[0], vector, count); |
124 | |
125 | auto &child_entries = StructVector::GetEntries(vector); |
126 | for (idx_t i = 0; i < child_entries.size(); i++) { |
127 | sub_columns[i]->Append(stats&: StructStats::GetChildStats(stats, i), state&: state.child_appends[i + 1], vector&: *child_entries[i], |
128 | count); |
129 | } |
130 | } |
131 | |
132 | void StructColumnData::RevertAppend(row_t start_row) { |
133 | validity.RevertAppend(start_row); |
134 | for (auto &sub_column : sub_columns) { |
135 | sub_column->RevertAppend(start_row); |
136 | } |
137 | } |
138 | |
139 | idx_t StructColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) { |
140 | // fetch validity mask |
141 | auto &child_entries = StructVector::GetEntries(vector&: result); |
142 | // insert any child states that are required |
143 | for (idx_t i = state.child_states.size(); i < child_entries.size() + 1; i++) { |
144 | ColumnScanState child_state; |
145 | state.child_states.push_back(x: std::move(child_state)); |
146 | } |
147 | // fetch the validity state |
148 | idx_t scan_count = validity.Fetch(state&: state.child_states[0], row_id, result); |
149 | // fetch the sub-column states |
150 | for (idx_t i = 0; i < child_entries.size(); i++) { |
151 | sub_columns[i]->Fetch(state&: state.child_states[i + 1], row_id, result&: *child_entries[i]); |
152 | } |
153 | return scan_count; |
154 | } |
155 | |
156 | void StructColumnData::Update(TransactionData transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, |
157 | idx_t update_count) { |
158 | validity.Update(transaction, column_index, update_vector, row_ids, update_count); |
159 | auto &child_entries = StructVector::GetEntries(vector&: update_vector); |
160 | for (idx_t i = 0; i < child_entries.size(); i++) { |
161 | sub_columns[i]->Update(transaction, column_index, update_vector&: *child_entries[i], row_ids, update_count); |
162 | } |
163 | } |
164 | |
165 | void StructColumnData::UpdateColumn(TransactionData transaction, const vector<column_t> &column_path, |
166 | Vector &update_vector, row_t *row_ids, idx_t update_count, idx_t depth) { |
167 | // we can never DIRECTLY update a struct column |
168 | if (depth >= column_path.size()) { |
169 | throw InternalException("Attempting to directly update a struct column - this should not be possible" ); |
170 | } |
171 | auto update_column = column_path[depth]; |
172 | if (update_column == 0) { |
173 | // update the validity column |
174 | validity.UpdateColumn(transaction, column_path, update_vector, row_ids, update_count, depth: depth + 1); |
175 | } else { |
176 | if (update_column > sub_columns.size()) { |
177 | throw InternalException("Update column_path out of range" ); |
178 | } |
179 | sub_columns[update_column - 1]->UpdateColumn(transaction, column_path, update_vector, row_ids, update_count, |
180 | depth: depth + 1); |
181 | } |
182 | } |
183 | |
184 | unique_ptr<BaseStatistics> StructColumnData::GetUpdateStatistics() { |
185 | // check if any child column has updates |
186 | auto stats = BaseStatistics::CreateEmpty(type); |
187 | auto validity_stats = validity.GetUpdateStatistics(); |
188 | if (validity_stats) { |
189 | stats.Merge(other: *validity_stats); |
190 | } |
191 | for (idx_t i = 0; i < sub_columns.size(); i++) { |
192 | auto child_stats = sub_columns[i]->GetUpdateStatistics(); |
193 | if (child_stats) { |
194 | StructStats::SetChildStats(stats, i, new_stats: std::move(child_stats)); |
195 | } |
196 | } |
197 | return stats.ToUnique(); |
198 | } |
199 | |
200 | void StructColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result, |
201 | idx_t result_idx) { |
202 | // fetch validity mask |
203 | auto &child_entries = StructVector::GetEntries(vector&: result); |
204 | // insert any child states that are required |
205 | for (idx_t i = state.child_states.size(); i < child_entries.size() + 1; i++) { |
206 | auto child_state = make_uniq<ColumnFetchState>(); |
207 | state.child_states.push_back(x: std::move(child_state)); |
208 | } |
209 | // fetch the validity state |
210 | validity.FetchRow(transaction, state&: *state.child_states[0], row_id, result, result_idx); |
211 | // fetch the sub-column states |
212 | for (idx_t i = 0; i < child_entries.size(); i++) { |
213 | sub_columns[i]->FetchRow(transaction, state&: *state.child_states[i + 1], row_id, result&: *child_entries[i], result_idx); |
214 | } |
215 | } |
216 | |
217 | void StructColumnData::CommitDropColumn() { |
218 | validity.CommitDropColumn(); |
219 | for (auto &sub_column : sub_columns) { |
220 | sub_column->CommitDropColumn(); |
221 | } |
222 | } |
223 | |
224 | struct StructColumnCheckpointState : public ColumnCheckpointState { |
225 | StructColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, |
226 | PartialBlockManager &partial_block_manager) |
227 | : ColumnCheckpointState(row_group, column_data, partial_block_manager) { |
228 | global_stats = StructStats::CreateEmpty(type: column_data.type).ToUnique(); |
229 | } |
230 | |
231 | unique_ptr<ColumnCheckpointState> validity_state; |
232 | vector<unique_ptr<ColumnCheckpointState>> child_states; |
233 | |
234 | public: |
235 | unique_ptr<BaseStatistics> GetStatistics() override { |
236 | auto stats = StructStats::CreateEmpty(type: column_data.type); |
237 | for (idx_t i = 0; i < child_states.size(); i++) { |
238 | StructStats::SetChildStats(stats, i, new_stats: child_states[i]->GetStatistics()); |
239 | } |
240 | return stats.ToUnique(); |
241 | } |
242 | |
243 | void WriteDataPointers(RowGroupWriter &writer) override { |
244 | validity_state->WriteDataPointers(writer); |
245 | for (auto &state : child_states) { |
246 | state->WriteDataPointers(writer); |
247 | } |
248 | } |
249 | }; |
250 | |
251 | unique_ptr<ColumnCheckpointState> StructColumnData::CreateCheckpointState(RowGroup &row_group, |
252 | PartialBlockManager &partial_block_manager) { |
253 | return make_uniq<StructColumnCheckpointState>(args&: row_group, args&: *this, args&: partial_block_manager); |
254 | } |
255 | |
256 | unique_ptr<ColumnCheckpointState> StructColumnData::Checkpoint(RowGroup &row_group, |
257 | PartialBlockManager &partial_block_manager, |
258 | ColumnCheckpointInfo &checkpoint_info) { |
259 | auto checkpoint_state = make_uniq<StructColumnCheckpointState>(args&: row_group, args&: *this, args&: partial_block_manager); |
260 | checkpoint_state->validity_state = validity.Checkpoint(row_group, partial_block_manager, checkpoint_info); |
261 | for (auto &sub_column : sub_columns) { |
262 | checkpoint_state->child_states.push_back( |
263 | x: sub_column->Checkpoint(row_group, partial_block_manager, checkpoint_info)); |
264 | } |
265 | return std::move(checkpoint_state); |
266 | } |
267 | |
268 | void StructColumnData::DeserializeColumn(Deserializer &source) { |
269 | validity.DeserializeColumn(source); |
270 | for (auto &sub_column : sub_columns) { |
271 | sub_column->DeserializeColumn(source); |
272 | } |
273 | this->count = validity.count; |
274 | } |
275 | |
276 | void StructColumnData::GetColumnSegmentInfo(duckdb::idx_t row_group_index, vector<duckdb::idx_t> col_path, |
277 | vector<duckdb::ColumnSegmentInfo> &result) { |
278 | col_path.push_back(x: 0); |
279 | validity.GetColumnSegmentInfo(row_group_index, col_path, result); |
280 | for (idx_t i = 0; i < sub_columns.size(); i++) { |
281 | col_path.back() = i + 1; |
282 | sub_columns[i]->GetColumnSegmentInfo(row_group_index, col_path, result); |
283 | } |
284 | } |
285 | |
286 | void StructColumnData::Verify(RowGroup &parent) { |
287 | #ifdef DEBUG |
288 | ColumnData::Verify(parent); |
289 | validity.Verify(parent); |
290 | for (auto &sub_column : sub_columns) { |
291 | sub_column->Verify(parent); |
292 | } |
293 | #endif |
294 | } |
295 | |
296 | } // namespace duckdb |
297 | |