| 1 | //===----------------------------------------------------------------------===// |
| 2 | // DuckDB |
| 3 | // |
| 4 | // duckdb/storage/table/row_group.hpp |
| 5 | // |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include "duckdb/common/vector_size.hpp" |
| 12 | #include "duckdb/storage/table/chunk_info.hpp" |
| 13 | #include "duckdb/storage/statistics/segment_statistics.hpp" |
| 14 | #include "duckdb/common/types/data_chunk.hpp" |
| 15 | #include "duckdb/common/enums/scan_options.hpp" |
| 16 | #include "duckdb/common/mutex.hpp" |
| 17 | #include "duckdb/parser/column_list.hpp" |
| 18 | #include "duckdb/storage/table/segment_base.hpp" |
| 19 | #include "duckdb/storage/block.hpp" |
| 20 | |
| 21 | namespace duckdb { |
| 22 | class AttachedDatabase; |
| 23 | class BlockManager; |
| 24 | class ColumnData; |
| 25 | class DatabaseInstance; |
| 26 | class DataTable; |
| 27 | class PartialBlockManager; |
| 28 | struct DataTableInfo; |
| 29 | class ExpressionExecutor; |
| 30 | class RowGroupCollection; |
| 31 | class RowGroupWriter; |
| 32 | class UpdateSegment; |
| 33 | class TableStatistics; |
| 34 | struct ColumnSegmentInfo; |
| 35 | class Vector; |
| 36 | struct ColumnCheckpointState; |
| 37 | struct RowGroupPointer; |
| 38 | struct TransactionData; |
| 39 | struct VersionNode; |
| 40 | class CollectionScanState; |
| 41 | class TableFilterSet; |
| 42 | struct ColumnFetchState; |
| 43 | struct RowGroupAppendState; |
| 44 | |
| 45 | struct RowGroupWriteData { |
| 46 | vector<unique_ptr<ColumnCheckpointState>> states; |
| 47 | vector<BaseStatistics> statistics; |
| 48 | }; |
| 49 | |
| 50 | class RowGroup : public SegmentBase<RowGroup> { |
| 51 | public: |
| 52 | friend class ColumnData; |
| 53 | friend class VersionDeleteState; |
| 54 | |
| 55 | public: |
| 56 | static constexpr const idx_t ROW_GROUP_SIZE = STANDARD_ROW_GROUPS_SIZE; |
| 57 | static constexpr const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE; |
| 58 | |
| 59 | public: |
| 60 | RowGroup(RowGroupCollection &collection, idx_t start, idx_t count); |
| 61 | RowGroup(RowGroupCollection &collection, RowGroupPointer &&pointer); |
| 62 | ~RowGroup(); |
| 63 | |
| 64 | private: |
| 65 | //! The RowGroupCollection this row-group is a part of |
| 66 | reference<RowGroupCollection> collection; |
| 67 | //! The version info of the row_group (inserted and deleted tuple info) |
| 68 | shared_ptr<VersionNode> version_info; |
| 69 | //! The column data of the row_group |
| 70 | vector<shared_ptr<ColumnData>> columns; |
| 71 | |
| 72 | public: |
| 73 | void MoveToCollection(RowGroupCollection &collection, idx_t new_start); |
| 74 | RowGroupCollection &GetCollection() { |
| 75 | return collection.get(); |
| 76 | } |
| 77 | DatabaseInstance &GetDatabase(); |
| 78 | BlockManager &GetBlockManager(); |
| 79 | DataTableInfo &GetTableInfo(); |
| 80 | |
| 81 | unique_ptr<RowGroup> AlterType(RowGroupCollection &collection, const LogicalType &target_type, idx_t changed_idx, |
| 82 | ExpressionExecutor &executor, CollectionScanState &scan_state, |
| 83 | DataChunk &scan_chunk); |
| 84 | unique_ptr<RowGroup> AddColumn(RowGroupCollection &collection, ColumnDefinition &new_column, |
| 85 | ExpressionExecutor &executor, Expression *default_value, Vector &intermediate); |
| 86 | unique_ptr<RowGroup> RemoveColumn(RowGroupCollection &collection, idx_t removed_column); |
| 87 | |
| 88 | void CommitDrop(); |
| 89 | void CommitDropColumn(idx_t index); |
| 90 | |
| 91 | void InitializeEmpty(const vector<LogicalType> &types); |
| 92 | |
| 93 | //! Initialize a scan over this row_group |
| 94 | bool InitializeScan(CollectionScanState &state); |
| 95 | bool InitializeScanWithOffset(CollectionScanState &state, idx_t vector_offset); |
| 96 | //! Checks the given set of table filters against the row-group statistics. Returns false if the entire row group |
| 97 | //! can be skipped. |
| 98 | bool CheckZonemap(TableFilterSet &filters, const vector<column_t> &column_ids); |
| 99 | //! Checks the given set of table filters against the per-segment statistics. Returns false if any segments were |
| 100 | //! skipped. |
| 101 | bool CheckZonemapSegments(CollectionScanState &state); |
| 102 | void Scan(TransactionData transaction, CollectionScanState &state, DataChunk &result); |
| 103 | void ScanCommitted(CollectionScanState &state, DataChunk &result, TableScanType type); |
| 104 | |
| 105 | idx_t GetSelVector(TransactionData transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count); |
| 106 | idx_t GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx, |
| 107 | SelectionVector &sel_vector, idx_t max_count); |
| 108 | |
| 109 | //! For a specific row, returns true if it should be used for the transaction and false otherwise. |
| 110 | bool Fetch(TransactionData transaction, idx_t row); |
| 111 | //! Fetch a specific row from the row_group and insert it into the result at the specified index |
| 112 | void FetchRow(TransactionData transaction, ColumnFetchState &state, const vector<column_t> &column_ids, |
| 113 | row_t row_id, DataChunk &result, idx_t result_idx); |
| 114 | |
| 115 | //! Append count rows to the version info |
| 116 | void AppendVersionInfo(TransactionData transaction, idx_t count); |
| 117 | //! Commit a previous append made by RowGroup::AppendVersionInfo |
| 118 | void CommitAppend(transaction_t commit_id, idx_t start, idx_t count); |
| 119 | //! Revert a previous append made by RowGroup::AppendVersionInfo |
| 120 | void RevertAppend(idx_t start); |
| 121 | |
| 122 | //! Delete the given set of rows in the version manager |
| 123 | idx_t Delete(TransactionData transaction, DataTable &table, row_t *row_ids, idx_t count); |
| 124 | |
| 125 | RowGroupWriteData WriteToDisk(PartialBlockManager &manager, const vector<CompressionType> &compression_types); |
| 126 | RowGroupPointer Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats); |
| 127 | static void Serialize(RowGroupPointer &pointer, Serializer &serializer); |
| 128 | static RowGroupPointer Deserialize(Deserializer &source, const vector<LogicalType> &columns); |
| 129 | |
| 130 | void InitializeAppend(RowGroupAppendState &append_state); |
| 131 | void Append(RowGroupAppendState &append_state, DataChunk &chunk, idx_t append_count); |
| 132 | |
| 133 | void Update(TransactionData transaction, DataChunk &updates, row_t *ids, idx_t offset, idx_t count, |
| 134 | const vector<PhysicalIndex> &column_ids); |
| 135 | //! Update a single column; corresponds to DataTable::UpdateColumn |
| 136 | //! This method should only be called from the WAL |
| 137 | void UpdateColumn(TransactionData transaction, DataChunk &updates, Vector &row_ids, |
| 138 | const vector<column_t> &column_path); |
| 139 | |
| 140 | void MergeStatistics(idx_t column_idx, const BaseStatistics &other); |
| 141 | void MergeIntoStatistics(idx_t column_idx, BaseStatistics &other); |
| 142 | unique_ptr<BaseStatistics> GetStatistics(idx_t column_idx); |
| 143 | |
| 144 | void GetColumnSegmentInfo(idx_t row_group_index, vector<ColumnSegmentInfo> &result); |
| 145 | |
| 146 | void Verify(); |
| 147 | |
| 148 | void NextVector(CollectionScanState &state); |
| 149 | |
| 150 | private: |
| 151 | ChunkInfo *GetChunkInfo(idx_t vector_idx); |
| 152 | ColumnData &GetColumn(storage_t c); |
| 153 | idx_t GetColumnCount() const; |
| 154 | vector<shared_ptr<ColumnData>> &GetColumns(); |
| 155 | |
| 156 | template <TableScanType TYPE> |
| 157 | void TemplatedScan(TransactionData transaction, CollectionScanState &state, DataChunk &result); |
| 158 | |
| 159 | static void CheckpointDeletes(VersionNode *versions, Serializer &serializer); |
| 160 | static shared_ptr<VersionNode> DeserializeDeletes(Deserializer &source); |
| 161 | |
| 162 | private: |
| 163 | mutex row_group_lock; |
| 164 | mutex stats_lock; |
| 165 | vector<BlockPointer> column_pointers; |
| 166 | unique_ptr<atomic<bool>[]> is_loaded; |
| 167 | }; |
| 168 | |
| 169 | struct VersionNode { |
| 170 | unique_ptr<ChunkInfo> info[RowGroup::ROW_GROUP_VECTOR_COUNT]; |
| 171 | |
| 172 | void SetStart(idx_t start); |
| 173 | }; |
| 174 | |
| 175 | } // namespace duckdb |
| 176 | |