1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/storage/table/column_data.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/common/types/data_chunk.hpp"
12#include "duckdb/storage/statistics/base_statistics.hpp"
13#include "duckdb/storage/data_pointer.hpp"
14#include "duckdb/storage/table/persistent_table_data.hpp"
15#include "duckdb/storage/statistics/segment_statistics.hpp"
16#include "duckdb/storage/table/segment_tree.hpp"
17#include "duckdb/storage/table/column_segment_tree.hpp"
18#include "duckdb/common/mutex.hpp"
19
20namespace duckdb {
21class ColumnData;
22class ColumnSegment;
23class DatabaseInstance;
24class RowGroup;
25class RowGroupWriter;
26class TableDataWriter;
27class TableStorageInfo;
28struct TransactionData;
29
30struct DataTableInfo;
31
32struct ColumnCheckpointInfo {
33 explicit ColumnCheckpointInfo(CompressionType compression_type_p) : compression_type(compression_type_p) {};
34 CompressionType compression_type;
35};
36
37class ColumnData {
38 friend class ColumnDataCheckpointer;
39
40public:
41 ColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type,
42 optional_ptr<ColumnData> parent);
43 virtual ~ColumnData();
44
45 //! The start row
46 idx_t start;
47 //! The count of the column data
48 idx_t count;
49 //! The block manager
50 BlockManager &block_manager;
51 //! Table info for the column
52 DataTableInfo &info;
53 //! The column index of the column, either within the parent table or within the parent
54 idx_t column_index;
55 //! The type of the column
56 LogicalType type;
57 //! The parent column (if any)
58 optional_ptr<ColumnData> parent;
59
60public:
61 virtual bool CheckZonemap(ColumnScanState &state, TableFilter &filter) = 0;
62
63 BlockManager &GetBlockManager() {
64 return block_manager;
65 }
66 DatabaseInstance &GetDatabase() const;
67 DataTableInfo &GetTableInfo() const;
68 virtual idx_t GetMaxEntry();
69
70 void IncrementVersion();
71
72 virtual void SetStart(idx_t new_start);
73 //! The root type of the column
74 const LogicalType &RootType() const;
75
76 //! Initialize a scan of the column
77 virtual void InitializeScan(ColumnScanState &state);
78 //! Initialize a scan starting at the specified offset
79 virtual void InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx);
80 //! Scan the next vector from the column
81 virtual idx_t Scan(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result);
82 virtual idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates);
83 virtual void ScanCommittedRange(idx_t row_group_start, idx_t offset_in_row_group, idx_t count, Vector &result);
84 virtual idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count);
85 //! Select
86 virtual void Select(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result,
87 SelectionVector &sel, idx_t &count, const TableFilter &filter);
88 virtual void FilterScan(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result,
89 SelectionVector &sel, idx_t count);
90 virtual void FilterScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, SelectionVector &sel,
91 idx_t count, bool allow_updates);
92
93 //! Skip the scan forward by "count" rows
94 virtual void Skip(ColumnScanState &state, idx_t count = STANDARD_VECTOR_SIZE);
95
96 //! Initialize an appending phase for this column
97 virtual void InitializeAppend(ColumnAppendState &state);
98 //! Append a vector of type [type] to the end of the column
99 virtual void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count);
100 //! Append a vector of type [type] to the end of the column
101 void Append(ColumnAppendState &state, Vector &vector, idx_t count);
102 virtual void AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata, idx_t count);
103 //! Revert a set of appends to the ColumnData
104 virtual void RevertAppend(row_t start_row);
105
106 //! Fetch the vector from the column data that belongs to this specific row
107 virtual idx_t Fetch(ColumnScanState &state, row_t row_id, Vector &result);
108 //! Fetch a specific row id and append it to the vector
109 virtual void FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
110 idx_t result_idx);
111
112 virtual void Update(TransactionData transaction, idx_t column_index, Vector &update_vector, row_t *row_ids,
113 idx_t update_count);
114 virtual void UpdateColumn(TransactionData transaction, const vector<column_t> &column_path, Vector &update_vector,
115 row_t *row_ids, idx_t update_count, idx_t depth);
116 virtual unique_ptr<BaseStatistics> GetUpdateStatistics();
117
118 virtual void CommitDropColumn();
119
120 virtual unique_ptr<ColumnCheckpointState> CreateCheckpointState(RowGroup &row_group,
121 PartialBlockManager &partial_block_manager);
122 virtual unique_ptr<ColumnCheckpointState>
123 Checkpoint(RowGroup &row_group, PartialBlockManager &partial_block_manager, ColumnCheckpointInfo &checkpoint_info);
124
125 virtual void CheckpointScan(ColumnSegment &segment, ColumnScanState &state, idx_t row_group_start, idx_t count,
126 Vector &scan_vector);
127
128 virtual void DeserializeColumn(Deserializer &source);
129 static shared_ptr<ColumnData> Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
130 idx_t start_row, Deserializer &source, const LogicalType &type,
131 optional_ptr<ColumnData> parent);
132
133 virtual void GetColumnSegmentInfo(idx_t row_group_index, vector<idx_t> col_path, vector<ColumnSegmentInfo> &result);
134 virtual void Verify(RowGroup &parent);
135
136 bool CheckZonemap(TableFilter &filter);
137
138 static shared_ptr<ColumnData> CreateColumn(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
139 idx_t start_row, const LogicalType &type,
140 optional_ptr<ColumnData> parent = nullptr);
141 static unique_ptr<ColumnData> CreateColumnUnique(BlockManager &block_manager, DataTableInfo &info,
142 idx_t column_index, idx_t start_row, const LogicalType &type,
143 optional_ptr<ColumnData> parent = nullptr);
144
145 void MergeStatistics(const BaseStatistics &other);
146 void MergeIntoStatistics(BaseStatistics &other);
147 unique_ptr<BaseStatistics> GetStatistics();
148
149protected:
150 //! Append a transient segment
151 void AppendTransientSegment(SegmentLock &l, idx_t start_row);
152
153 //! Scans a base vector from the column
154 idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining);
155 //! Scans a vector from the column merged with any potential updates
156 //! If ALLOW_UPDATES is set to false, the function will instead throw an exception if any updates are found
157 template <bool SCAN_COMMITTED, bool ALLOW_UPDATES>
158 idx_t ScanVector(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result);
159
160protected:
161 //! The segments holding the data of this column segment
162 ColumnSegmentTree data;
163 //! The lock for the updates
164 mutex update_lock;
165 //! The updates for this column segment
166 unique_ptr<UpdateSegment> updates;
167 //! The internal version of the column data
168 idx_t version;
169 //! The stats of the root segment
170 unique_ptr<SegmentStatistics> stats;
171};
172
173} // namespace duckdb
174