1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/storage/data_table.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/common/enums/index_type.hpp"
12#include "duckdb/common/enums/scan_options.hpp"
13#include "duckdb/common/mutex.hpp"
14#include "duckdb/common/types/data_chunk.hpp"
15#include "duckdb/storage/index.hpp"
16#include "duckdb/storage/table/table_statistics.hpp"
17#include "duckdb/storage/block.hpp"
18#include "duckdb/storage/statistics/column_statistics.hpp"
19#include "duckdb/storage/table/column_segment.hpp"
20#include "duckdb/storage/table/persistent_table_data.hpp"
21#include "duckdb/storage/table/row_group_collection.hpp"
22#include "duckdb/storage/table/row_group.hpp"
23#include "duckdb/transaction/local_storage.hpp"
24#include "duckdb/storage/table/data_table_info.hpp"
25#include "duckdb/common/unique_ptr.hpp"
26
27namespace duckdb {
28class BoundForeignKeyConstraint;
29class ClientContext;
30class ColumnDataCollection;
31class ColumnDefinition;
32class DataTable;
33class DuckTransaction;
34class OptimisticDataWriter;
35class RowGroup;
36class StorageManager;
37class TableCatalogEntry;
38class TableIOManager;
39class Transaction;
40class WriteAheadLog;
41class TableDataWriter;
42class ConflictManager;
43class TableScanState;
44enum class VerifyExistenceType : uint8_t;
45
46//! DataTable represents a physical table on disk
47class DataTable {
48public:
49 //! Constructs a new data table from an (optional) set of persistent segments
50 DataTable(AttachedDatabase &db, shared_ptr<TableIOManager> table_io_manager, const string &schema,
51 const string &table, vector<ColumnDefinition> column_definitions_p,
52 unique_ptr<PersistentTableData> data = nullptr);
53 //! Constructs a DataTable as a delta on an existing data table with a newly added column
54 DataTable(ClientContext &context, DataTable &parent, ColumnDefinition &new_column, Expression *default_value);
55 //! Constructs a DataTable as a delta on an existing data table but with one column removed
56 DataTable(ClientContext &context, DataTable &parent, idx_t removed_column);
57 //! Constructs a DataTable as a delta on an existing data table but with one column changed type
58 DataTable(ClientContext &context, DataTable &parent, idx_t changed_idx, const LogicalType &target_type,
59 const vector<column_t> &bound_columns, Expression &cast_expr);
60 //! Constructs a DataTable as a delta on an existing data table but with one column added new constraint
61 explicit DataTable(ClientContext &context, DataTable &parent, unique_ptr<BoundConstraint> constraint);
62
63 //! The table info
64 shared_ptr<DataTableInfo> info;
65 //! The set of physical columns stored by this DataTable
66 vector<ColumnDefinition> column_definitions;
67 //! A reference to the database instance
68 AttachedDatabase &db;
69
70public:
71 //! Returns a list of types of the table
72 vector<LogicalType> GetTypes();
73
74 void InitializeScan(TableScanState &state, const vector<column_t> &column_ids,
75 TableFilterSet *table_filter = nullptr);
76 void InitializeScan(DuckTransaction &transaction, TableScanState &state, const vector<column_t> &column_ids,
77 TableFilterSet *table_filters = nullptr);
78
79 //! Returns the maximum amount of threads that should be assigned to scan this data table
80 idx_t MaxThreads(ClientContext &context);
81 void InitializeParallelScan(ClientContext &context, ParallelTableScanState &state);
82 bool NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state);
83
84 //! Scans up to STANDARD_VECTOR_SIZE elements from the table starting
85 //! from offset and store them in result. Offset is incremented with how many
86 //! elements were returned.
87 //! Returns true if all pushed down filters were executed during data fetching
88 void Scan(DuckTransaction &transaction, DataChunk &result, TableScanState &state);
89
90 //! Fetch data from the specific row identifiers from the base table
91 void Fetch(DuckTransaction &transaction, DataChunk &result, const vector<column_t> &column_ids,
92 const Vector &row_ids, idx_t fetch_count, ColumnFetchState &state);
93
94 //! Initializes an append to transaction-local storage
95 void InitializeLocalAppend(LocalAppendState &state, ClientContext &context);
96 //! Append a DataChunk to the transaction-local storage of the table.
97 void LocalAppend(LocalAppendState &state, TableCatalogEntry &table, ClientContext &context, DataChunk &chunk,
98 bool unsafe = false);
99 //! Finalizes a transaction-local append
100 void FinalizeLocalAppend(LocalAppendState &state);
101 //! Append a chunk to the transaction-local storage of this table
102 void LocalAppend(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk);
103 //! Append a column data collection to the transaction-local storage of this table
104 void LocalAppend(TableCatalogEntry &table, ClientContext &context, ColumnDataCollection &collection);
105 //! Merge a row group collection into the transaction-local storage
106 void LocalMerge(ClientContext &context, RowGroupCollection &collection);
107 //! Creates an optimistic writer for this table - used for optimistically writing parallel appends
108 OptimisticDataWriter &CreateOptimisticWriter(ClientContext &context);
109 void FinalizeOptimisticWriter(ClientContext &context, OptimisticDataWriter &writer);
110
111 //! Delete the entries with the specified row identifier from the table
112 idx_t Delete(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, idx_t count);
113 //! Update the entries with the specified row identifier from the table
114 void Update(TableCatalogEntry &table, ClientContext &context, Vector &row_ids,
115 const vector<PhysicalIndex> &column_ids, DataChunk &data);
116 //! Update a single (sub-)column along a column path
117 //! The column_path vector is a *path* towards a column within the table
118 //! i.e. if we have a table with a single column S STRUCT(A INT, B INT)
119 //! and we update the validity mask of "S.B"
120 //! the column path is:
121 //! 0 (first column of table)
122 //! -> 1 (second subcolumn of struct)
123 //! -> 0 (first subcolumn of INT)
124 //! This method should only be used from the WAL replay. It does not verify update constraints.
125 void UpdateColumn(TableCatalogEntry &table, ClientContext &context, Vector &row_ids,
126 const vector<column_t> &column_path, DataChunk &updates);
127
128 //! Add an index to the DataTable. NOTE: for CREATE (UNIQUE) INDEX statements, we use the PhysicalCreateIndex
129 //! operator. This function is only used during the WAL replay, and is a much less performant index creation
130 //! approach.
131 void WALAddIndex(ClientContext &context, unique_ptr<Index> index,
132 const vector<unique_ptr<Expression>> &expressions);
133
134 //! Fetches an append lock
135 void AppendLock(TableAppendState &state);
136 //! Begin appending structs to this table, obtaining necessary locks, etc
137 void InitializeAppend(DuckTransaction &transaction, TableAppendState &state, idx_t append_count);
138 //! Append a chunk to the table using the AppendState obtained from InitializeAppend
139 void Append(DataChunk &chunk, TableAppendState &state);
140 //! Commit the append
141 void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count);
142 //! Write a segment of the table to the WAL
143 void WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count);
144 //! Revert a set of appends made by the given AppendState, used to revert appends in the event of an error during
145 //! commit (e.g. because of an I/O exception)
146 void RevertAppend(idx_t start_row, idx_t count);
147 void RevertAppendInternal(idx_t start_row, idx_t count);
148
149 void ScanTableSegment(idx_t start_row, idx_t count, const std::function<void(DataChunk &chunk)> &function);
150
151 //! Merge a row group collection directly into this table - appending it to the end of the table without copying
152 void MergeStorage(RowGroupCollection &data, TableIndexList &indexes);
153
154 //! Append a chunk with the row ids [row_start, ..., row_start + chunk.size()] to all indexes of the table, returns
155 //! whether or not the append succeeded
156 PreservedError AppendToIndexes(DataChunk &chunk, row_t row_start);
157 static PreservedError AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t row_start);
158 //! Remove a chunk with the row ids [row_start, ..., row_start + chunk.size()] from all indexes of the table
159 void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, row_t row_start);
160 //! Remove the chunk with the specified set of row identifiers from all indexes of the table
161 void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, Vector &row_identifiers);
162 //! Remove the row identifiers from all the indexes of the table
163 void RemoveFromIndexes(Vector &row_identifiers, idx_t count);
164
165 void SetAsRoot() {
166 this->is_root = true;
167 }
168 bool IsRoot() {
169 return this->is_root;
170 }
171
172 //! Get statistics of a physical column within the table
173 unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id);
174 //! Sets statistics of a physical column within the table
175 void SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats);
176
177 //! Checkpoint the table to the specified table data writer
178 void Checkpoint(TableDataWriter &writer);
179 void CommitDropTable();
180 void CommitDropColumn(idx_t index);
181
182 idx_t GetTotalRows();
183
184 vector<ColumnSegmentInfo> GetColumnSegmentInfo();
185 static bool IsForeignKeyIndex(const vector<PhysicalIndex> &fk_keys, Index &index, ForeignKeyType fk_type);
186
187 //! Initializes a special scan that is used to create an index on the table, it keeps locks on the table
188 void InitializeWALCreateIndexScan(CreateIndexScanState &state, const vector<column_t> &column_ids);
189 //! Scans the next chunk for the CREATE INDEX operator
190 bool CreateIndexScan(TableScanState &state, DataChunk &result, TableScanType type);
191
192 //! Verify constraints with a chunk from the Append containing all columns of the table
193 void VerifyAppendConstraints(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk,
194 ConflictManager *conflict_manager = nullptr);
195
196public:
197 static void VerifyUniqueIndexes(TableIndexList &indexes, ClientContext &context, DataChunk &chunk,
198 ConflictManager *conflict_manager);
199
200private:
201 //! Verify the new added constraints against current persistent&local data
202 void VerifyNewConstraint(ClientContext &context, DataTable &parent, const BoundConstraint *constraint);
203 //! Verify constraints with a chunk from the Update containing only the specified column_ids
204 void VerifyUpdateConstraints(ClientContext &context, TableCatalogEntry &table, DataChunk &chunk,
205 const vector<PhysicalIndex> &column_ids);
206 //! Verify constraints with a chunk from the Delete containing all columns of the table
207 void VerifyDeleteConstraints(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk);
208
209 void InitializeScanWithOffset(TableScanState &state, const vector<column_t> &column_ids, idx_t start_row,
210 idx_t end_row);
211
212 void VerifyForeignKeyConstraint(const BoundForeignKeyConstraint &bfk, ClientContext &context, DataChunk &chunk,
213 VerifyExistenceType verify_type);
214 void VerifyAppendForeignKeyConstraint(const BoundForeignKeyConstraint &bfk, ClientContext &context,
215 DataChunk &chunk);
216 void VerifyDeleteForeignKeyConstraint(const BoundForeignKeyConstraint &bfk, ClientContext &context,
217 DataChunk &chunk);
218
219private:
220 //! Lock for appending entries to the table
221 mutex append_lock;
222 //! The row groups of the table
223 shared_ptr<RowGroupCollection> row_groups;
224 //! Whether or not the data table is the root DataTable for this table; the root DataTable is the newest version
225 //! that can be appended to
226 atomic<bool> is_root;
227};
228} // namespace duckdb
229