1#include "duckdb/storage/table/struct_column_data.hpp"
2#include "duckdb/storage/statistics/struct_stats.hpp"
3#include "duckdb/transaction/transaction.hpp"
4#include "duckdb/storage/table/column_checkpoint_state.hpp"
5#include "duckdb/storage/table/append_state.hpp"
6#include "duckdb/storage/table/scan_state.hpp"
7
8namespace duckdb {
9
10StructColumnData::StructColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
11 idx_t start_row, LogicalType type_p, optional_ptr<ColumnData> parent)
12 : ColumnData(block_manager, info, column_index, start_row, std::move(type_p), parent),
13 validity(block_manager, info, 0, start_row, *this) {
14 D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
15 auto &child_types = StructType::GetChildTypes(type);
16 D_ASSERT(child_types.size() > 0);
17 // the sub column index, starting at 1 (0 is the validity mask)
18 idx_t sub_column_index = 1;
19 for (auto &child_type : child_types) {
20 sub_columns.push_back(
21 x: ColumnData::CreateColumnUnique(block_manager, info, column_index: sub_column_index, start_row, type: child_type.second, parent: this));
22 sub_column_index++;
23 }
24}
25
26void StructColumnData::SetStart(idx_t new_start) {
27 this->start = new_start;
28 for (auto &sub_column : sub_columns) {
29 sub_column->SetStart(new_start);
30 }
31 validity.SetStart(new_start);
32}
33
34bool StructColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) {
35 // table filters are not supported yet for struct columns
36 return false;
37}
38
39idx_t StructColumnData::GetMaxEntry() {
40 return sub_columns[0]->GetMaxEntry();
41}
42
43void StructColumnData::InitializeScan(ColumnScanState &state) {
44 D_ASSERT(state.child_states.size() == sub_columns.size() + 1);
45 state.row_index = 0;
46 state.current = nullptr;
47
48 // initialize the validity segment
49 validity.InitializeScan(state&: state.child_states[0]);
50
51 // initialize the sub-columns
52 for (idx_t i = 0; i < sub_columns.size(); i++) {
53 sub_columns[i]->InitializeScan(state&: state.child_states[i + 1]);
54 }
55}
56
57void StructColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
58 D_ASSERT(state.child_states.size() == sub_columns.size() + 1);
59 state.row_index = row_idx;
60 state.current = nullptr;
61
62 // initialize the validity segment
63 validity.InitializeScanWithOffset(state&: state.child_states[0], row_idx);
64
65 // initialize the sub-columns
66 for (idx_t i = 0; i < sub_columns.size(); i++) {
67 sub_columns[i]->InitializeScanWithOffset(state&: state.child_states[i + 1], row_idx);
68 }
69}
70
71idx_t StructColumnData::Scan(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result) {
72 auto scan_count = validity.Scan(transaction, vector_index, state&: state.child_states[0], result);
73 auto &child_entries = StructVector::GetEntries(vector&: result);
74 for (idx_t i = 0; i < sub_columns.size(); i++) {
75 sub_columns[i]->Scan(transaction, vector_index, state&: state.child_states[i + 1], result&: *child_entries[i]);
76 }
77 return scan_count;
78}
79
80idx_t StructColumnData::ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) {
81 auto scan_count = validity.ScanCommitted(vector_index, state&: state.child_states[0], result, allow_updates);
82 auto &child_entries = StructVector::GetEntries(vector&: result);
83 for (idx_t i = 0; i < sub_columns.size(); i++) {
84 sub_columns[i]->ScanCommitted(vector_index, state&: state.child_states[i + 1], result&: *child_entries[i], allow_updates);
85 }
86 return scan_count;
87}
88
89idx_t StructColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t count) {
90 auto scan_count = validity.ScanCount(state&: state.child_states[0], result, count);
91 auto &child_entries = StructVector::GetEntries(vector&: result);
92 for (idx_t i = 0; i < sub_columns.size(); i++) {
93 sub_columns[i]->ScanCount(state&: state.child_states[i + 1], result&: *child_entries[i], count);
94 }
95 return scan_count;
96}
97
98void StructColumnData::Skip(ColumnScanState &state, idx_t count) {
99 validity.Skip(state&: state.child_states[0], count);
100
101 // skip inside the sub-columns
102 for (idx_t child_idx = 0; child_idx < sub_columns.size(); child_idx++) {
103 sub_columns[child_idx]->Skip(state&: state.child_states[child_idx + 1], count);
104 }
105}
106
107void StructColumnData::InitializeAppend(ColumnAppendState &state) {
108 ColumnAppendState validity_append;
109 validity.InitializeAppend(state&: validity_append);
110 state.child_appends.push_back(x: std::move(validity_append));
111
112 for (auto &sub_column : sub_columns) {
113 ColumnAppendState child_append;
114 sub_column->InitializeAppend(state&: child_append);
115 state.child_appends.push_back(x: std::move(child_append));
116 }
117}
118
119void StructColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) {
120 vector.Flatten(count);
121
122 // append the null values
123 validity.Append(stats, state&: state.child_appends[0], vector, count);
124
125 auto &child_entries = StructVector::GetEntries(vector);
126 for (idx_t i = 0; i < child_entries.size(); i++) {
127 sub_columns[i]->Append(stats&: StructStats::GetChildStats(stats, i), state&: state.child_appends[i + 1], vector&: *child_entries[i],
128 count);
129 }
130}
131
132void StructColumnData::RevertAppend(row_t start_row) {
133 validity.RevertAppend(start_row);
134 for (auto &sub_column : sub_columns) {
135 sub_column->RevertAppend(start_row);
136 }
137}
138
139idx_t StructColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) {
140 // fetch validity mask
141 auto &child_entries = StructVector::GetEntries(vector&: result);
142 // insert any child states that are required
143 for (idx_t i = state.child_states.size(); i < child_entries.size() + 1; i++) {
144 ColumnScanState child_state;
145 state.child_states.push_back(x: std::move(child_state));
146 }
147 // fetch the validity state
148 idx_t scan_count = validity.Fetch(state&: state.child_states[0], row_id, result);
149 // fetch the sub-column states
150 for (idx_t i = 0; i < child_entries.size(); i++) {
151 sub_columns[i]->Fetch(state&: state.child_states[i + 1], row_id, result&: *child_entries[i]);
152 }
153 return scan_count;
154}
155
156void StructColumnData::Update(TransactionData transaction, idx_t column_index, Vector &update_vector, row_t *row_ids,
157 idx_t update_count) {
158 validity.Update(transaction, column_index, update_vector, row_ids, update_count);
159 auto &child_entries = StructVector::GetEntries(vector&: update_vector);
160 for (idx_t i = 0; i < child_entries.size(); i++) {
161 sub_columns[i]->Update(transaction, column_index, update_vector&: *child_entries[i], row_ids, update_count);
162 }
163}
164
165void StructColumnData::UpdateColumn(TransactionData transaction, const vector<column_t> &column_path,
166 Vector &update_vector, row_t *row_ids, idx_t update_count, idx_t depth) {
167 // we can never DIRECTLY update a struct column
168 if (depth >= column_path.size()) {
169 throw InternalException("Attempting to directly update a struct column - this should not be possible");
170 }
171 auto update_column = column_path[depth];
172 if (update_column == 0) {
173 // update the validity column
174 validity.UpdateColumn(transaction, column_path, update_vector, row_ids, update_count, depth: depth + 1);
175 } else {
176 if (update_column > sub_columns.size()) {
177 throw InternalException("Update column_path out of range");
178 }
179 sub_columns[update_column - 1]->UpdateColumn(transaction, column_path, update_vector, row_ids, update_count,
180 depth: depth + 1);
181 }
182}
183
184unique_ptr<BaseStatistics> StructColumnData::GetUpdateStatistics() {
185 // check if any child column has updates
186 auto stats = BaseStatistics::CreateEmpty(type);
187 auto validity_stats = validity.GetUpdateStatistics();
188 if (validity_stats) {
189 stats.Merge(other: *validity_stats);
190 }
191 for (idx_t i = 0; i < sub_columns.size(); i++) {
192 auto child_stats = sub_columns[i]->GetUpdateStatistics();
193 if (child_stats) {
194 StructStats::SetChildStats(stats, i, new_stats: std::move(child_stats));
195 }
196 }
197 return stats.ToUnique();
198}
199
200void StructColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
201 idx_t result_idx) {
202 // fetch validity mask
203 auto &child_entries = StructVector::GetEntries(vector&: result);
204 // insert any child states that are required
205 for (idx_t i = state.child_states.size(); i < child_entries.size() + 1; i++) {
206 auto child_state = make_uniq<ColumnFetchState>();
207 state.child_states.push_back(x: std::move(child_state));
208 }
209 // fetch the validity state
210 validity.FetchRow(transaction, state&: *state.child_states[0], row_id, result, result_idx);
211 // fetch the sub-column states
212 for (idx_t i = 0; i < child_entries.size(); i++) {
213 sub_columns[i]->FetchRow(transaction, state&: *state.child_states[i + 1], row_id, result&: *child_entries[i], result_idx);
214 }
215}
216
217void StructColumnData::CommitDropColumn() {
218 validity.CommitDropColumn();
219 for (auto &sub_column : sub_columns) {
220 sub_column->CommitDropColumn();
221 }
222}
223
224struct StructColumnCheckpointState : public ColumnCheckpointState {
225 StructColumnCheckpointState(RowGroup &row_group, ColumnData &column_data,
226 PartialBlockManager &partial_block_manager)
227 : ColumnCheckpointState(row_group, column_data, partial_block_manager) {
228 global_stats = StructStats::CreateEmpty(type: column_data.type).ToUnique();
229 }
230
231 unique_ptr<ColumnCheckpointState> validity_state;
232 vector<unique_ptr<ColumnCheckpointState>> child_states;
233
234public:
235 unique_ptr<BaseStatistics> GetStatistics() override {
236 auto stats = StructStats::CreateEmpty(type: column_data.type);
237 for (idx_t i = 0; i < child_states.size(); i++) {
238 StructStats::SetChildStats(stats, i, new_stats: child_states[i]->GetStatistics());
239 }
240 return stats.ToUnique();
241 }
242
243 void WriteDataPointers(RowGroupWriter &writer) override {
244 validity_state->WriteDataPointers(writer);
245 for (auto &state : child_states) {
246 state->WriteDataPointers(writer);
247 }
248 }
249};
250
251unique_ptr<ColumnCheckpointState> StructColumnData::CreateCheckpointState(RowGroup &row_group,
252 PartialBlockManager &partial_block_manager) {
253 return make_uniq<StructColumnCheckpointState>(args&: row_group, args&: *this, args&: partial_block_manager);
254}
255
256unique_ptr<ColumnCheckpointState> StructColumnData::Checkpoint(RowGroup &row_group,
257 PartialBlockManager &partial_block_manager,
258 ColumnCheckpointInfo &checkpoint_info) {
259 auto checkpoint_state = make_uniq<StructColumnCheckpointState>(args&: row_group, args&: *this, args&: partial_block_manager);
260 checkpoint_state->validity_state = validity.Checkpoint(row_group, partial_block_manager, checkpoint_info);
261 for (auto &sub_column : sub_columns) {
262 checkpoint_state->child_states.push_back(
263 x: sub_column->Checkpoint(row_group, partial_block_manager, checkpoint_info));
264 }
265 return std::move(checkpoint_state);
266}
267
268void StructColumnData::DeserializeColumn(Deserializer &source) {
269 validity.DeserializeColumn(source);
270 for (auto &sub_column : sub_columns) {
271 sub_column->DeserializeColumn(source);
272 }
273 this->count = validity.count;
274}
275
276void StructColumnData::GetColumnSegmentInfo(duckdb::idx_t row_group_index, vector<duckdb::idx_t> col_path,
277 vector<duckdb::ColumnSegmentInfo> &result) {
278 col_path.push_back(x: 0);
279 validity.GetColumnSegmentInfo(row_group_index, col_path, result);
280 for (idx_t i = 0; i < sub_columns.size(); i++) {
281 col_path.back() = i + 1;
282 sub_columns[i]->GetColumnSegmentInfo(row_group_index, col_path, result);
283 }
284}
285
286void StructColumnData::Verify(RowGroup &parent) {
287#ifdef DEBUG
288 ColumnData::Verify(parent);
289 validity.Verify(parent);
290 for (auto &sub_column : sub_columns) {
291 sub_column->Verify(parent);
292 }
293#endif
294}
295
296} // namespace duckdb
297