1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/storage/table/row_group.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/common/vector_size.hpp"
12#include "duckdb/storage/table/chunk_info.hpp"
13#include "duckdb/storage/statistics/segment_statistics.hpp"
14#include "duckdb/common/types/data_chunk.hpp"
15#include "duckdb/common/enums/scan_options.hpp"
16#include "duckdb/common/mutex.hpp"
17#include "duckdb/parser/column_list.hpp"
18#include "duckdb/storage/table/segment_base.hpp"
19#include "duckdb/storage/block.hpp"
20
21namespace duckdb {
22class AttachedDatabase;
23class BlockManager;
24class ColumnData;
25class DatabaseInstance;
26class DataTable;
27class PartialBlockManager;
28struct DataTableInfo;
29class ExpressionExecutor;
30class RowGroupCollection;
31class RowGroupWriter;
32class UpdateSegment;
33class TableStatistics;
34struct ColumnSegmentInfo;
35class Vector;
36struct ColumnCheckpointState;
37struct RowGroupPointer;
38struct TransactionData;
39struct VersionNode;
40class CollectionScanState;
41class TableFilterSet;
42struct ColumnFetchState;
43struct RowGroupAppendState;
44
45struct RowGroupWriteData {
46 vector<unique_ptr<ColumnCheckpointState>> states;
47 vector<BaseStatistics> statistics;
48};
49
50class RowGroup : public SegmentBase<RowGroup> {
51public:
52 friend class ColumnData;
53 friend class VersionDeleteState;
54
55public:
56 static constexpr const idx_t ROW_GROUP_SIZE = STANDARD_ROW_GROUPS_SIZE;
57 static constexpr const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE;
58
59public:
60 RowGroup(RowGroupCollection &collection, idx_t start, idx_t count);
61 RowGroup(RowGroupCollection &collection, RowGroupPointer &&pointer);
62 ~RowGroup();
63
64private:
65 //! The RowGroupCollection this row-group is a part of
66 reference<RowGroupCollection> collection;
67 //! The version info of the row_group (inserted and deleted tuple info)
68 shared_ptr<VersionNode> version_info;
69 //! The column data of the row_group
70 vector<shared_ptr<ColumnData>> columns;
71
72public:
73 void MoveToCollection(RowGroupCollection &collection, idx_t new_start);
74 RowGroupCollection &GetCollection() {
75 return collection.get();
76 }
77 DatabaseInstance &GetDatabase();
78 BlockManager &GetBlockManager();
79 DataTableInfo &GetTableInfo();
80
81 unique_ptr<RowGroup> AlterType(RowGroupCollection &collection, const LogicalType &target_type, idx_t changed_idx,
82 ExpressionExecutor &executor, CollectionScanState &scan_state,
83 DataChunk &scan_chunk);
84 unique_ptr<RowGroup> AddColumn(RowGroupCollection &collection, ColumnDefinition &new_column,
85 ExpressionExecutor &executor, Expression *default_value, Vector &intermediate);
86 unique_ptr<RowGroup> RemoveColumn(RowGroupCollection &collection, idx_t removed_column);
87
88 void CommitDrop();
89 void CommitDropColumn(idx_t index);
90
91 void InitializeEmpty(const vector<LogicalType> &types);
92
93 //! Initialize a scan over this row_group
94 bool InitializeScan(CollectionScanState &state);
95 bool InitializeScanWithOffset(CollectionScanState &state, idx_t vector_offset);
96 //! Checks the given set of table filters against the row-group statistics. Returns false if the entire row group
97 //! can be skipped.
98 bool CheckZonemap(TableFilterSet &filters, const vector<column_t> &column_ids);
99 //! Checks the given set of table filters against the per-segment statistics. Returns false if any segments were
100 //! skipped.
101 bool CheckZonemapSegments(CollectionScanState &state);
102 void Scan(TransactionData transaction, CollectionScanState &state, DataChunk &result);
103 void ScanCommitted(CollectionScanState &state, DataChunk &result, TableScanType type);
104
105 idx_t GetSelVector(TransactionData transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count);
106 idx_t GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx,
107 SelectionVector &sel_vector, idx_t max_count);
108
109 //! For a specific row, returns true if it should be used for the transaction and false otherwise.
110 bool Fetch(TransactionData transaction, idx_t row);
111 //! Fetch a specific row from the row_group and insert it into the result at the specified index
112 void FetchRow(TransactionData transaction, ColumnFetchState &state, const vector<column_t> &column_ids,
113 row_t row_id, DataChunk &result, idx_t result_idx);
114
115 //! Append count rows to the version info
116 void AppendVersionInfo(TransactionData transaction, idx_t count);
117 //! Commit a previous append made by RowGroup::AppendVersionInfo
118 void CommitAppend(transaction_t commit_id, idx_t start, idx_t count);
119 //! Revert a previous append made by RowGroup::AppendVersionInfo
120 void RevertAppend(idx_t start);
121
122 //! Delete the given set of rows in the version manager
123 idx_t Delete(TransactionData transaction, DataTable &table, row_t *row_ids, idx_t count);
124
125 RowGroupWriteData WriteToDisk(PartialBlockManager &manager, const vector<CompressionType> &compression_types);
126 RowGroupPointer Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats);
127 static void Serialize(RowGroupPointer &pointer, Serializer &serializer);
128 static RowGroupPointer Deserialize(Deserializer &source, const vector<LogicalType> &columns);
129
130 void InitializeAppend(RowGroupAppendState &append_state);
131 void Append(RowGroupAppendState &append_state, DataChunk &chunk, idx_t append_count);
132
133 void Update(TransactionData transaction, DataChunk &updates, row_t *ids, idx_t offset, idx_t count,
134 const vector<PhysicalIndex> &column_ids);
135 //! Update a single column; corresponds to DataTable::UpdateColumn
136 //! This method should only be called from the WAL
137 void UpdateColumn(TransactionData transaction, DataChunk &updates, Vector &row_ids,
138 const vector<column_t> &column_path);
139
140 void MergeStatistics(idx_t column_idx, const BaseStatistics &other);
141 void MergeIntoStatistics(idx_t column_idx, BaseStatistics &other);
142 unique_ptr<BaseStatistics> GetStatistics(idx_t column_idx);
143
144 void GetColumnSegmentInfo(idx_t row_group_index, vector<ColumnSegmentInfo> &result);
145
146 void Verify();
147
148 void NextVector(CollectionScanState &state);
149
150private:
151 ChunkInfo *GetChunkInfo(idx_t vector_idx);
152 ColumnData &GetColumn(storage_t c);
153 idx_t GetColumnCount() const;
154 vector<shared_ptr<ColumnData>> &GetColumns();
155
156 template <TableScanType TYPE>
157 void TemplatedScan(TransactionData transaction, CollectionScanState &state, DataChunk &result);
158
159 static void CheckpointDeletes(VersionNode *versions, Serializer &serializer);
160 static shared_ptr<VersionNode> DeserializeDeletes(Deserializer &source);
161
162private:
163 mutex row_group_lock;
164 mutex stats_lock;
165 vector<BlockPointer> column_pointers;
166 unique_ptr<atomic<bool>[]> is_loaded;
167};
168
169struct VersionNode {
170 unique_ptr<ChunkInfo> info[RowGroup::ROW_GROUP_VECTOR_COUNT];
171
172 void SetStart(idx_t start);
173};
174
175} // namespace duckdb
176