| 1 | //===----------------------------------------------------------------------===// |
| 2 | // DuckDB |
| 3 | // |
| 4 | // duckdb/storage/table/column_data.hpp |
| 5 | // |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include "duckdb/common/types/data_chunk.hpp" |
| 12 | #include "duckdb/storage/statistics/base_statistics.hpp" |
| 13 | #include "duckdb/storage/data_pointer.hpp" |
| 14 | #include "duckdb/storage/table/persistent_table_data.hpp" |
| 15 | #include "duckdb/storage/statistics/segment_statistics.hpp" |
| 16 | #include "duckdb/storage/table/segment_tree.hpp" |
| 17 | #include "duckdb/storage/table/column_segment_tree.hpp" |
| 18 | #include "duckdb/common/mutex.hpp" |
| 19 | |
| 20 | namespace duckdb { |
| 21 | class ColumnData; |
| 22 | class ColumnSegment; |
| 23 | class DatabaseInstance; |
| 24 | class RowGroup; |
| 25 | class RowGroupWriter; |
| 26 | class TableDataWriter; |
| 27 | class TableStorageInfo; |
| 28 | struct TransactionData; |
| 29 | |
| 30 | struct DataTableInfo; |
| 31 | |
| 32 | struct ColumnCheckpointInfo { |
| 33 | explicit ColumnCheckpointInfo(CompressionType compression_type_p) : compression_type(compression_type_p) {}; |
| 34 | CompressionType compression_type; |
| 35 | }; |
| 36 | |
| 37 | class ColumnData { |
| 38 | friend class ColumnDataCheckpointer; |
| 39 | |
| 40 | public: |
| 41 | ColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type, |
| 42 | optional_ptr<ColumnData> parent); |
| 43 | virtual ~ColumnData(); |
| 44 | |
| 45 | //! The start row |
| 46 | idx_t start; |
| 47 | //! The count of the column data |
| 48 | idx_t count; |
| 49 | //! The block manager |
| 50 | BlockManager &block_manager; |
| 51 | //! Table info for the column |
| 52 | DataTableInfo &info; |
| 53 | //! The column index of the column, either within the parent table or within the parent |
| 54 | idx_t column_index; |
| 55 | //! The type of the column |
| 56 | LogicalType type; |
| 57 | //! The parent column (if any) |
| 58 | optional_ptr<ColumnData> parent; |
| 59 | |
| 60 | public: |
| 61 | virtual bool CheckZonemap(ColumnScanState &state, TableFilter &filter) = 0; |
| 62 | |
| 63 | BlockManager &GetBlockManager() { |
| 64 | return block_manager; |
| 65 | } |
| 66 | DatabaseInstance &GetDatabase() const; |
| 67 | DataTableInfo &GetTableInfo() const; |
| 68 | virtual idx_t GetMaxEntry(); |
| 69 | |
| 70 | void IncrementVersion(); |
| 71 | |
| 72 | virtual void SetStart(idx_t new_start); |
| 73 | //! The root type of the column |
| 74 | const LogicalType &RootType() const; |
| 75 | |
| 76 | //! Initialize a scan of the column |
| 77 | virtual void InitializeScan(ColumnScanState &state); |
| 78 | //! Initialize a scan starting at the specified offset |
| 79 | virtual void InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx); |
| 80 | //! Scan the next vector from the column |
| 81 | virtual idx_t Scan(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result); |
| 82 | virtual idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates); |
| 83 | virtual void ScanCommittedRange(idx_t row_group_start, idx_t offset_in_row_group, idx_t count, Vector &result); |
| 84 | virtual idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count); |
| 85 | //! Select |
| 86 | virtual void Select(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result, |
| 87 | SelectionVector &sel, idx_t &count, const TableFilter &filter); |
| 88 | virtual void FilterScan(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result, |
| 89 | SelectionVector &sel, idx_t count); |
| 90 | virtual void FilterScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, SelectionVector &sel, |
| 91 | idx_t count, bool allow_updates); |
| 92 | |
| 93 | //! Skip the scan forward by "count" rows |
| 94 | virtual void Skip(ColumnScanState &state, idx_t count = STANDARD_VECTOR_SIZE); |
| 95 | |
| 96 | //! Initialize an appending phase for this column |
| 97 | virtual void InitializeAppend(ColumnAppendState &state); |
| 98 | //! Append a vector of type [type] to the end of the column |
| 99 | virtual void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count); |
| 100 | //! Append a vector of type [type] to the end of the column |
| 101 | void Append(ColumnAppendState &state, Vector &vector, idx_t count); |
| 102 | virtual void AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata, idx_t count); |
| 103 | //! Revert a set of appends to the ColumnData |
| 104 | virtual void RevertAppend(row_t start_row); |
| 105 | |
| 106 | //! Fetch the vector from the column data that belongs to this specific row |
| 107 | virtual idx_t Fetch(ColumnScanState &state, row_t row_id, Vector &result); |
| 108 | //! Fetch a specific row id and append it to the vector |
| 109 | virtual void FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result, |
| 110 | idx_t result_idx); |
| 111 | |
| 112 | virtual void Update(TransactionData transaction, idx_t column_index, Vector &update_vector, row_t *row_ids, |
| 113 | idx_t update_count); |
| 114 | virtual void UpdateColumn(TransactionData transaction, const vector<column_t> &column_path, Vector &update_vector, |
| 115 | row_t *row_ids, idx_t update_count, idx_t depth); |
| 116 | virtual unique_ptr<BaseStatistics> GetUpdateStatistics(); |
| 117 | |
| 118 | virtual void CommitDropColumn(); |
| 119 | |
| 120 | virtual unique_ptr<ColumnCheckpointState> CreateCheckpointState(RowGroup &row_group, |
| 121 | PartialBlockManager &partial_block_manager); |
| 122 | virtual unique_ptr<ColumnCheckpointState> |
| 123 | Checkpoint(RowGroup &row_group, PartialBlockManager &partial_block_manager, ColumnCheckpointInfo &checkpoint_info); |
| 124 | |
| 125 | virtual void CheckpointScan(ColumnSegment &segment, ColumnScanState &state, idx_t row_group_start, idx_t count, |
| 126 | Vector &scan_vector); |
| 127 | |
| 128 | virtual void DeserializeColumn(Deserializer &source); |
| 129 | static shared_ptr<ColumnData> Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, |
| 130 | idx_t start_row, Deserializer &source, const LogicalType &type, |
| 131 | optional_ptr<ColumnData> parent); |
| 132 | |
| 133 | virtual void GetColumnSegmentInfo(idx_t row_group_index, vector<idx_t> col_path, vector<ColumnSegmentInfo> &result); |
| 134 | virtual void Verify(RowGroup &parent); |
| 135 | |
| 136 | bool CheckZonemap(TableFilter &filter); |
| 137 | |
| 138 | static shared_ptr<ColumnData> CreateColumn(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, |
| 139 | idx_t start_row, const LogicalType &type, |
| 140 | optional_ptr<ColumnData> parent = nullptr); |
| 141 | static unique_ptr<ColumnData> CreateColumnUnique(BlockManager &block_manager, DataTableInfo &info, |
| 142 | idx_t column_index, idx_t start_row, const LogicalType &type, |
| 143 | optional_ptr<ColumnData> parent = nullptr); |
| 144 | |
| 145 | void MergeStatistics(const BaseStatistics &other); |
| 146 | void MergeIntoStatistics(BaseStatistics &other); |
| 147 | unique_ptr<BaseStatistics> GetStatistics(); |
| 148 | |
| 149 | protected: |
| 150 | //! Append a transient segment |
| 151 | void AppendTransientSegment(SegmentLock &l, idx_t start_row); |
| 152 | |
| 153 | //! Scans a base vector from the column |
| 154 | idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining); |
| 155 | //! Scans a vector from the column merged with any potential updates |
| 156 | //! If ALLOW_UPDATES is set to false, the function will instead throw an exception if any updates are found |
| 157 | template <bool SCAN_COMMITTED, bool ALLOW_UPDATES> |
| 158 | idx_t ScanVector(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result); |
| 159 | |
| 160 | protected: |
| 161 | //! The segments holding the data of this column segment |
| 162 | ColumnSegmentTree data; |
| 163 | //! The lock for the updates |
| 164 | mutex update_lock; |
| 165 | //! The updates for this column segment |
| 166 | unique_ptr<UpdateSegment> updates; |
| 167 | //! The internal version of the column data |
| 168 | idx_t version; |
| 169 | //! The stats of the root segment |
| 170 | unique_ptr<SegmentStatistics> stats; |
| 171 | }; |
| 172 | |
| 173 | } // namespace duckdb |
| 174 | |