| 1 | #include "duckdb/storage/table/standard_column_data.hpp" |
| 2 | #include "duckdb/storage/table/scan_state.hpp" |
| 3 | #include "duckdb/storage/table/update_segment.hpp" |
| 4 | #include "duckdb/storage/table/append_state.hpp" |
| 5 | #include "duckdb/storage/data_table.hpp" |
| 6 | #include "duckdb/planner/table_filter.hpp" |
| 7 | #include "duckdb/transaction/transaction.hpp" |
| 8 | #include "duckdb/storage/table/column_checkpoint_state.hpp" |
| 9 | |
| 10 | namespace duckdb { |
| 11 | |
| 12 | StandardColumnData::StandardColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, |
| 13 | idx_t start_row, LogicalType type, optional_ptr<ColumnData> parent) |
| 14 | : ColumnData(block_manager, info, column_index, start_row, std::move(type), parent), |
| 15 | validity(block_manager, info, 0, start_row, *this) { |
| 16 | } |
| 17 | |
| 18 | void StandardColumnData::SetStart(idx_t new_start) { |
| 19 | ColumnData::SetStart(new_start); |
| 20 | validity.SetStart(new_start); |
| 21 | } |
| 22 | |
| 23 | bool StandardColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) { |
| 24 | if (!state.segment_checked) { |
| 25 | if (!state.current) { |
| 26 | return true; |
| 27 | } |
| 28 | state.segment_checked = true; |
| 29 | auto prune_result = filter.CheckStatistics(stats&: state.current->stats.statistics); |
| 30 | if (prune_result != FilterPropagateResult::FILTER_ALWAYS_FALSE) { |
| 31 | return true; |
| 32 | } |
| 33 | if (updates) { |
| 34 | auto update_stats = updates->GetStatistics(); |
| 35 | prune_result = filter.CheckStatistics(stats&: *update_stats); |
| 36 | return prune_result != FilterPropagateResult::FILTER_ALWAYS_FALSE; |
| 37 | } else { |
| 38 | return false; |
| 39 | } |
| 40 | } else { |
| 41 | return true; |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | void StandardColumnData::InitializeScan(ColumnScanState &state) { |
| 46 | ColumnData::InitializeScan(state); |
| 47 | |
| 48 | // initialize the validity segment |
| 49 | D_ASSERT(state.child_states.size() == 1); |
| 50 | validity.InitializeScan(state&: state.child_states[0]); |
| 51 | } |
| 52 | |
| 53 | void StandardColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) { |
| 54 | ColumnData::InitializeScanWithOffset(state, row_idx); |
| 55 | |
| 56 | // initialize the validity segment |
| 57 | D_ASSERT(state.child_states.size() == 1); |
| 58 | validity.InitializeScanWithOffset(state&: state.child_states[0], row_idx); |
| 59 | } |
| 60 | |
| 61 | idx_t StandardColumnData::Scan(TransactionData transaction, idx_t vector_index, ColumnScanState &state, |
| 62 | Vector &result) { |
| 63 | D_ASSERT(state.row_index == state.child_states[0].row_index); |
| 64 | auto scan_count = ColumnData::Scan(transaction, vector_index, state, result); |
| 65 | validity.Scan(transaction, vector_index, state&: state.child_states[0], result); |
| 66 | return scan_count; |
| 67 | } |
| 68 | |
| 69 | idx_t StandardColumnData::ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, |
| 70 | bool allow_updates) { |
| 71 | D_ASSERT(state.row_index == state.child_states[0].row_index); |
| 72 | auto scan_count = ColumnData::ScanCommitted(vector_index, state, result, allow_updates); |
| 73 | validity.ScanCommitted(vector_index, state&: state.child_states[0], result, allow_updates); |
| 74 | return scan_count; |
| 75 | } |
| 76 | |
| 77 | idx_t StandardColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t count) { |
| 78 | auto scan_count = ColumnData::ScanCount(state, result, count); |
| 79 | validity.ScanCount(state&: state.child_states[0], result, count); |
| 80 | return scan_count; |
| 81 | } |
| 82 | |
| 83 | void StandardColumnData::InitializeAppend(ColumnAppendState &state) { |
| 84 | ColumnData::InitializeAppend(state); |
| 85 | |
| 86 | ColumnAppendState child_append; |
| 87 | validity.InitializeAppend(state&: child_append); |
| 88 | state.child_appends.push_back(x: std::move(child_append)); |
| 89 | } |
| 90 | |
| 91 | void StandardColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata, |
| 92 | idx_t count) { |
| 93 | ColumnData::AppendData(stats, state, vdata, count); |
| 94 | validity.AppendData(stats, state&: state.child_appends[0], vdata, count); |
| 95 | } |
| 96 | |
| 97 | void StandardColumnData::RevertAppend(row_t start_row) { |
| 98 | ColumnData::RevertAppend(start_row); |
| 99 | |
| 100 | validity.RevertAppend(start_row); |
| 101 | } |
| 102 | |
| 103 | idx_t StandardColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) { |
| 104 | // fetch validity mask |
| 105 | if (state.child_states.empty()) { |
| 106 | ColumnScanState child_state; |
| 107 | state.child_states.push_back(x: std::move(child_state)); |
| 108 | } |
| 109 | auto scan_count = ColumnData::Fetch(state, row_id, result); |
| 110 | validity.Fetch(state&: state.child_states[0], row_id, result); |
| 111 | return scan_count; |
| 112 | } |
| 113 | |
| 114 | void StandardColumnData::Update(TransactionData transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, |
| 115 | idx_t update_count) { |
| 116 | ColumnData::Update(transaction, column_index, update_vector, row_ids, update_count); |
| 117 | validity.Update(transaction, column_index, update_vector, row_ids, update_count); |
| 118 | } |
| 119 | |
| 120 | void StandardColumnData::UpdateColumn(TransactionData transaction, const vector<column_t> &column_path, |
| 121 | Vector &update_vector, row_t *row_ids, idx_t update_count, idx_t depth) { |
| 122 | if (depth >= column_path.size()) { |
| 123 | // update this column |
| 124 | ColumnData::Update(transaction, column_index: column_path[0], update_vector, row_ids, update_count); |
| 125 | } else { |
| 126 | // update the child column (i.e. the validity column) |
| 127 | validity.UpdateColumn(transaction, column_path, update_vector, row_ids, update_count, depth: depth + 1); |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | unique_ptr<BaseStatistics> StandardColumnData::GetUpdateStatistics() { |
| 132 | auto stats = updates ? updates->GetStatistics() : nullptr; |
| 133 | auto validity_stats = validity.GetUpdateStatistics(); |
| 134 | if (!stats && !validity_stats) { |
| 135 | return nullptr; |
| 136 | } |
| 137 | if (!stats) { |
| 138 | stats = BaseStatistics::CreateEmpty(type).ToUnique(); |
| 139 | } |
| 140 | if (validity_stats) { |
| 141 | stats->Merge(other: *validity_stats); |
| 142 | } |
| 143 | return stats; |
| 144 | } |
| 145 | |
| 146 | void StandardColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result, |
| 147 | idx_t result_idx) { |
| 148 | // find the segment the row belongs to |
| 149 | if (state.child_states.empty()) { |
| 150 | auto child_state = make_uniq<ColumnFetchState>(); |
| 151 | state.child_states.push_back(x: std::move(child_state)); |
| 152 | } |
| 153 | validity.FetchRow(transaction, state&: *state.child_states[0], row_id, result, result_idx); |
| 154 | ColumnData::FetchRow(transaction, state, row_id, result, result_idx); |
| 155 | } |
| 156 | |
| 157 | void StandardColumnData::CommitDropColumn() { |
| 158 | ColumnData::CommitDropColumn(); |
| 159 | validity.CommitDropColumn(); |
| 160 | } |
| 161 | |
| 162 | struct StandardColumnCheckpointState : public ColumnCheckpointState { |
| 163 | StandardColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, |
| 164 | PartialBlockManager &partial_block_manager) |
| 165 | : ColumnCheckpointState(row_group, column_data, partial_block_manager) { |
| 166 | } |
| 167 | |
| 168 | unique_ptr<ColumnCheckpointState> validity_state; |
| 169 | |
| 170 | public: |
| 171 | unique_ptr<BaseStatistics> GetStatistics() override { |
| 172 | D_ASSERT(global_stats); |
| 173 | return std::move(global_stats); |
| 174 | } |
| 175 | |
| 176 | void WriteDataPointers(RowGroupWriter &writer) override { |
| 177 | ColumnCheckpointState::WriteDataPointers(writer); |
| 178 | validity_state->WriteDataPointers(writer); |
| 179 | } |
| 180 | }; |
| 181 | |
| 182 | unique_ptr<ColumnCheckpointState> |
| 183 | StandardColumnData::CreateCheckpointState(RowGroup &row_group, PartialBlockManager &partial_block_manager) { |
| 184 | return make_uniq<StandardColumnCheckpointState>(args&: row_group, args&: *this, args&: partial_block_manager); |
| 185 | } |
| 186 | |
| 187 | unique_ptr<ColumnCheckpointState> StandardColumnData::Checkpoint(RowGroup &row_group, |
| 188 | PartialBlockManager &partial_block_manager, |
| 189 | ColumnCheckpointInfo &checkpoint_info) { |
| 190 | auto validity_state = validity.Checkpoint(row_group, partial_block_manager, checkpoint_info); |
| 191 | auto base_state = ColumnData::Checkpoint(row_group, partial_block_manager, checkpoint_info); |
| 192 | auto &checkpoint_state = base_state->Cast<StandardColumnCheckpointState>(); |
| 193 | checkpoint_state.validity_state = std::move(validity_state); |
| 194 | return base_state; |
| 195 | } |
| 196 | |
| 197 | void StandardColumnData::CheckpointScan(ColumnSegment &segment, ColumnScanState &state, idx_t row_group_start, |
| 198 | idx_t count, Vector &scan_vector) { |
| 199 | ColumnData::CheckpointScan(segment, state, row_group_start, count, scan_vector); |
| 200 | |
| 201 | idx_t offset_in_row_group = state.row_index - row_group_start; |
| 202 | validity.ScanCommittedRange(row_group_start, offset_in_row_group, count, result&: scan_vector); |
| 203 | } |
| 204 | |
| 205 | void StandardColumnData::DeserializeColumn(Deserializer &source) { |
| 206 | ColumnData::DeserializeColumn(source); |
| 207 | validity.DeserializeColumn(source); |
| 208 | } |
| 209 | |
| 210 | void StandardColumnData::GetColumnSegmentInfo(duckdb::idx_t row_group_index, vector<duckdb::idx_t> col_path, |
| 211 | vector<duckdb::ColumnSegmentInfo> &result) { |
| 212 | ColumnData::GetColumnSegmentInfo(row_group_index, col_path, result); |
| 213 | col_path.push_back(x: 0); |
| 214 | validity.GetColumnSegmentInfo(row_group_index, col_path: std::move(col_path), result); |
| 215 | } |
| 216 | |
| 217 | void StandardColumnData::Verify(RowGroup &parent) { |
| 218 | #ifdef DEBUG |
| 219 | ColumnData::Verify(parent); |
| 220 | validity.Verify(parent); |
| 221 | #endif |
| 222 | } |
| 223 | |
| 224 | } // namespace duckdb |
| 225 | |