1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/storage/data_table.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/common/enums/index_type.hpp" |
12 | #include "duckdb/common/enums/scan_options.hpp" |
13 | #include "duckdb/common/mutex.hpp" |
14 | #include "duckdb/common/types/data_chunk.hpp" |
15 | #include "duckdb/storage/index.hpp" |
16 | #include "duckdb/storage/table/table_statistics.hpp" |
17 | #include "duckdb/storage/block.hpp" |
18 | #include "duckdb/storage/statistics/column_statistics.hpp" |
19 | #include "duckdb/storage/table/column_segment.hpp" |
20 | #include "duckdb/storage/table/persistent_table_data.hpp" |
21 | #include "duckdb/storage/table/row_group_collection.hpp" |
22 | #include "duckdb/storage/table/row_group.hpp" |
23 | #include "duckdb/transaction/local_storage.hpp" |
24 | #include "duckdb/storage/table/data_table_info.hpp" |
25 | #include "duckdb/common/unique_ptr.hpp" |
26 | |
27 | namespace duckdb { |
28 | class BoundForeignKeyConstraint; |
29 | class ClientContext; |
30 | class ColumnDataCollection; |
31 | class ColumnDefinition; |
32 | class DataTable; |
33 | class DuckTransaction; |
34 | class OptimisticDataWriter; |
35 | class RowGroup; |
36 | class StorageManager; |
37 | class TableCatalogEntry; |
38 | class TableIOManager; |
39 | class Transaction; |
40 | class WriteAheadLog; |
41 | class TableDataWriter; |
42 | class ConflictManager; |
43 | class TableScanState; |
44 | enum class VerifyExistenceType : uint8_t; |
45 | |
46 | //! DataTable represents a physical table on disk |
47 | class DataTable { |
48 | public: |
49 | //! Constructs a new data table from an (optional) set of persistent segments |
50 | DataTable(AttachedDatabase &db, shared_ptr<TableIOManager> table_io_manager, const string &schema, |
51 | const string &table, vector<ColumnDefinition> column_definitions_p, |
52 | unique_ptr<PersistentTableData> data = nullptr); |
53 | //! Constructs a DataTable as a delta on an existing data table with a newly added column |
54 | DataTable(ClientContext &context, DataTable &parent, ColumnDefinition &new_column, Expression *default_value); |
55 | //! Constructs a DataTable as a delta on an existing data table but with one column removed |
56 | DataTable(ClientContext &context, DataTable &parent, idx_t removed_column); |
57 | //! Constructs a DataTable as a delta on an existing data table but with one column changed type |
58 | DataTable(ClientContext &context, DataTable &parent, idx_t changed_idx, const LogicalType &target_type, |
59 | const vector<column_t> &bound_columns, Expression &cast_expr); |
60 | //! Constructs a DataTable as a delta on an existing data table but with one column added new constraint |
61 | explicit DataTable(ClientContext &context, DataTable &parent, unique_ptr<BoundConstraint> constraint); |
62 | |
63 | //! The table info |
64 | shared_ptr<DataTableInfo> info; |
65 | //! The set of physical columns stored by this DataTable |
66 | vector<ColumnDefinition> column_definitions; |
67 | //! A reference to the database instance |
68 | AttachedDatabase &db; |
69 | |
70 | public: |
71 | //! Returns a list of types of the table |
72 | vector<LogicalType> GetTypes(); |
73 | |
74 | void InitializeScan(TableScanState &state, const vector<column_t> &column_ids, |
75 | TableFilterSet *table_filter = nullptr); |
76 | void InitializeScan(DuckTransaction &transaction, TableScanState &state, const vector<column_t> &column_ids, |
77 | TableFilterSet *table_filters = nullptr); |
78 | |
79 | //! Returns the maximum amount of threads that should be assigned to scan this data table |
80 | idx_t MaxThreads(ClientContext &context); |
81 | void InitializeParallelScan(ClientContext &context, ParallelTableScanState &state); |
82 | bool NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state); |
83 | |
84 | //! Scans up to STANDARD_VECTOR_SIZE elements from the table starting |
85 | //! from offset and store them in result. Offset is incremented with how many |
86 | //! elements were returned. |
87 | //! Returns true if all pushed down filters were executed during data fetching |
88 | void Scan(DuckTransaction &transaction, DataChunk &result, TableScanState &state); |
89 | |
90 | //! Fetch data from the specific row identifiers from the base table |
91 | void Fetch(DuckTransaction &transaction, DataChunk &result, const vector<column_t> &column_ids, |
92 | const Vector &row_ids, idx_t fetch_count, ColumnFetchState &state); |
93 | |
94 | //! Initializes an append to transaction-local storage |
95 | void InitializeLocalAppend(LocalAppendState &state, ClientContext &context); |
96 | //! Append a DataChunk to the transaction-local storage of the table. |
97 | void LocalAppend(LocalAppendState &state, TableCatalogEntry &table, ClientContext &context, DataChunk &chunk, |
98 | bool unsafe = false); |
99 | //! Finalizes a transaction-local append |
100 | void FinalizeLocalAppend(LocalAppendState &state); |
101 | //! Append a chunk to the transaction-local storage of this table |
102 | void LocalAppend(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk); |
103 | //! Append a column data collection to the transaction-local storage of this table |
104 | void LocalAppend(TableCatalogEntry &table, ClientContext &context, ColumnDataCollection &collection); |
105 | //! Merge a row group collection into the transaction-local storage |
106 | void LocalMerge(ClientContext &context, RowGroupCollection &collection); |
107 | //! Creates an optimistic writer for this table - used for optimistically writing parallel appends |
108 | OptimisticDataWriter &CreateOptimisticWriter(ClientContext &context); |
109 | void FinalizeOptimisticWriter(ClientContext &context, OptimisticDataWriter &writer); |
110 | |
111 | //! Delete the entries with the specified row identifier from the table |
112 | idx_t Delete(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, idx_t count); |
113 | //! Update the entries with the specified row identifier from the table |
114 | void Update(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, |
115 | const vector<PhysicalIndex> &column_ids, DataChunk &data); |
116 | //! Update a single (sub-)column along a column path |
117 | //! The column_path vector is a *path* towards a column within the table |
118 | //! i.e. if we have a table with a single column S STRUCT(A INT, B INT) |
119 | //! and we update the validity mask of "S.B" |
120 | //! the column path is: |
121 | //! 0 (first column of table) |
122 | //! -> 1 (second subcolumn of struct) |
123 | //! -> 0 (first subcolumn of INT) |
124 | //! This method should only be used from the WAL replay. It does not verify update constraints. |
125 | void UpdateColumn(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, |
126 | const vector<column_t> &column_path, DataChunk &updates); |
127 | |
128 | //! Add an index to the DataTable. NOTE: for CREATE (UNIQUE) INDEX statements, we use the PhysicalCreateIndex |
129 | //! operator. This function is only used during the WAL replay, and is a much less performant index creation |
130 | //! approach. |
131 | void WALAddIndex(ClientContext &context, unique_ptr<Index> index, |
132 | const vector<unique_ptr<Expression>> &expressions); |
133 | |
134 | //! Fetches an append lock |
135 | void AppendLock(TableAppendState &state); |
136 | //! Begin appending structs to this table, obtaining necessary locks, etc |
137 | void InitializeAppend(DuckTransaction &transaction, TableAppendState &state, idx_t append_count); |
138 | //! Append a chunk to the table using the AppendState obtained from InitializeAppend |
139 | void Append(DataChunk &chunk, TableAppendState &state); |
140 | //! Commit the append |
141 | void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count); |
142 | //! Write a segment of the table to the WAL |
143 | void WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count); |
144 | //! Revert a set of appends made by the given AppendState, used to revert appends in the event of an error during |
145 | //! commit (e.g. because of an I/O exception) |
146 | void RevertAppend(idx_t start_row, idx_t count); |
147 | void RevertAppendInternal(idx_t start_row, idx_t count); |
148 | |
149 | void ScanTableSegment(idx_t start_row, idx_t count, const std::function<void(DataChunk &chunk)> &function); |
150 | |
151 | //! Merge a row group collection directly into this table - appending it to the end of the table without copying |
152 | void MergeStorage(RowGroupCollection &data, TableIndexList &indexes); |
153 | |
154 | //! Append a chunk with the row ids [row_start, ..., row_start + chunk.size()] to all indexes of the table, returns |
155 | //! whether or not the append succeeded |
156 | PreservedError AppendToIndexes(DataChunk &chunk, row_t row_start); |
157 | static PreservedError AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t row_start); |
158 | //! Remove a chunk with the row ids [row_start, ..., row_start + chunk.size()] from all indexes of the table |
159 | void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, row_t row_start); |
160 | //! Remove the chunk with the specified set of row identifiers from all indexes of the table |
161 | void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, Vector &row_identifiers); |
162 | //! Remove the row identifiers from all the indexes of the table |
163 | void RemoveFromIndexes(Vector &row_identifiers, idx_t count); |
164 | |
165 | void SetAsRoot() { |
166 | this->is_root = true; |
167 | } |
168 | bool IsRoot() { |
169 | return this->is_root; |
170 | } |
171 | |
172 | //! Get statistics of a physical column within the table |
173 | unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id); |
174 | //! Sets statistics of a physical column within the table |
175 | void SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats); |
176 | |
177 | //! Checkpoint the table to the specified table data writer |
178 | void Checkpoint(TableDataWriter &writer); |
179 | void CommitDropTable(); |
180 | void CommitDropColumn(idx_t index); |
181 | |
182 | idx_t GetTotalRows(); |
183 | |
184 | vector<ColumnSegmentInfo> GetColumnSegmentInfo(); |
185 | static bool IsForeignKeyIndex(const vector<PhysicalIndex> &fk_keys, Index &index, ForeignKeyType fk_type); |
186 | |
187 | //! Initializes a special scan that is used to create an index on the table, it keeps locks on the table |
188 | void InitializeWALCreateIndexScan(CreateIndexScanState &state, const vector<column_t> &column_ids); |
189 | //! Scans the next chunk for the CREATE INDEX operator |
190 | bool CreateIndexScan(TableScanState &state, DataChunk &result, TableScanType type); |
191 | |
192 | //! Verify constraints with a chunk from the Append containing all columns of the table |
193 | void VerifyAppendConstraints(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk, |
194 | ConflictManager *conflict_manager = nullptr); |
195 | |
196 | public: |
197 | static void VerifyUniqueIndexes(TableIndexList &indexes, ClientContext &context, DataChunk &chunk, |
198 | ConflictManager *conflict_manager); |
199 | |
200 | private: |
201 | //! Verify the new added constraints against current persistent&local data |
202 | void VerifyNewConstraint(ClientContext &context, DataTable &parent, const BoundConstraint *constraint); |
203 | //! Verify constraints with a chunk from the Update containing only the specified column_ids |
204 | void VerifyUpdateConstraints(ClientContext &context, TableCatalogEntry &table, DataChunk &chunk, |
205 | const vector<PhysicalIndex> &column_ids); |
206 | //! Verify constraints with a chunk from the Delete containing all columns of the table |
207 | void VerifyDeleteConstraints(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk); |
208 | |
209 | void InitializeScanWithOffset(TableScanState &state, const vector<column_t> &column_ids, idx_t start_row, |
210 | idx_t end_row); |
211 | |
212 | void VerifyForeignKeyConstraint(const BoundForeignKeyConstraint &bfk, ClientContext &context, DataChunk &chunk, |
213 | VerifyExistenceType verify_type); |
214 | void VerifyAppendForeignKeyConstraint(const BoundForeignKeyConstraint &bfk, ClientContext &context, |
215 | DataChunk &chunk); |
216 | void VerifyDeleteForeignKeyConstraint(const BoundForeignKeyConstraint &bfk, ClientContext &context, |
217 | DataChunk &chunk); |
218 | |
219 | private: |
220 | //! Lock for appending entries to the table |
221 | mutex append_lock; |
222 | //! The row groups of the table |
223 | shared_ptr<RowGroupCollection> row_groups; |
224 | //! Whether or not the data table is the root DataTable for this table; the root DataTable is the newest version |
225 | //! that can be appended to |
226 | atomic<bool> is_root; |
227 | }; |
228 | } // namespace duckdb |
229 | |