1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/storage/table/column_segment.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/storage/block.hpp" |
12 | #include "duckdb/common/types.hpp" |
13 | #include "duckdb/common/types/vector.hpp" |
14 | #include "duckdb/storage/buffer_manager.hpp" |
15 | #include "duckdb/storage/statistics/segment_statistics.hpp" |
16 | #include "duckdb/storage/storage_lock.hpp" |
17 | #include "duckdb/function/compression_function.hpp" |
18 | #include "duckdb/storage/table/segment_base.hpp" |
19 | #include "duckdb/storage/buffer/block_handle.hpp" |
20 | |
21 | namespace duckdb { |
22 | class ColumnSegment; |
23 | class BlockManager; |
24 | class ColumnSegment; |
25 | class ColumnData; |
26 | class DatabaseInstance; |
27 | class Transaction; |
28 | class BaseStatistics; |
29 | class UpdateSegment; |
30 | class TableFilter; |
31 | struct ColumnFetchState; |
32 | struct ColumnScanState; |
33 | struct ColumnAppendState; |
34 | |
35 | enum class ColumnSegmentType : uint8_t { TRANSIENT, PERSISTENT }; |
36 | //! TableFilter represents a filter pushed down into the table scan. |
37 | |
38 | class ColumnSegment : public SegmentBase<ColumnSegment> { |
39 | public: |
40 | ~ColumnSegment(); |
41 | |
42 | //! The database instance |
43 | DatabaseInstance &db; |
44 | //! The type stored in the column |
45 | LogicalType type; |
46 | //! The size of the type |
47 | idx_t type_size; |
48 | //! The column segment type (transient or persistent) |
49 | ColumnSegmentType segment_type; |
50 | //! The compression function |
51 | reference<CompressionFunction> function; |
52 | //! The statistics for the segment |
53 | SegmentStatistics stats; |
54 | //! The block that this segment relates to |
55 | shared_ptr<BlockHandle> block; |
56 | |
57 | static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager, |
58 | block_id_t id, idx_t offset, const LogicalType &type_p, |
59 | idx_t start, idx_t count, CompressionType compression_type, |
60 | BaseStatistics statistics); |
61 | static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start, |
62 | idx_t segment_size = Storage::BLOCK_SIZE); |
63 | static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start); |
64 | |
65 | public: |
66 | void InitializeScan(ColumnScanState &state); |
67 | //! Scan one vector from this segment |
68 | void Scan(ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset, bool entire_vector); |
69 | //! Fetch a value of the specific row id and append it to the result |
70 | void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx); |
71 | |
72 | static idx_t FilterSelection(SelectionVector &sel, Vector &result, const TableFilter &filter, |
73 | idx_t &approved_tuple_count, ValidityMask &mask); |
74 | |
75 | //! Skip a scan forward to the row_index specified in the scan state |
76 | void Skip(ColumnScanState &state); |
77 | |
78 | // The maximum size of the buffer (in bytes) |
79 | idx_t SegmentSize() const; |
80 | //! Resize the block |
81 | void Resize(idx_t segment_size); |
82 | |
83 | //! Initialize an append of this segment. Appends are only supported on transient segments. |
84 | void InitializeAppend(ColumnAppendState &state); |
85 | //! Appends a (part of) vector to the segment, returns the amount of entries successfully appended |
86 | idx_t Append(ColumnAppendState &state, UnifiedVectorFormat &data, idx_t offset, idx_t count); |
87 | //! Finalize the segment for appending - no more appends can follow on this segment |
88 | //! The segment should be compacted as much as possible |
89 | //! Returns the number of bytes occupied within the segment |
90 | idx_t FinalizeAppend(ColumnAppendState &state); |
91 | //! Revert an append made to this segment |
92 | void RevertAppend(idx_t start_row); |
93 | |
94 | //! Convert a transient in-memory segment into a persistent segment blocked by an on-disk block. |
95 | //! Only used during checkpointing. |
96 | void ConvertToPersistent(optional_ptr<BlockManager> block_manager, block_id_t block_id); |
97 | //! Updates pointers to refer to the given block and offset. This is only used |
98 | //! when sharing a block among segments. This is invoked only AFTER the block is written. |
99 | void MarkAsPersistent(shared_ptr<BlockHandle> block, uint32_t offset_in_block); |
100 | |
101 | block_id_t GetBlockId() { |
102 | D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT); |
103 | return block_id; |
104 | } |
105 | |
106 | BlockManager &GetBlockManager() const { |
107 | return block->block_manager; |
108 | } |
109 | |
110 | idx_t GetBlockOffset() { |
111 | D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT || offset == 0); |
112 | return offset; |
113 | } |
114 | |
115 | idx_t GetRelativeIndex(idx_t row_index) { |
116 | D_ASSERT(row_index >= this->start); |
117 | D_ASSERT(row_index <= this->start + this->count); |
118 | return row_index - this->start; |
119 | } |
120 | |
121 | CompressedSegmentState *GetSegmentState() { |
122 | return segment_state.get(); |
123 | } |
124 | |
125 | public: |
126 | ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type, |
127 | idx_t start, idx_t count, CompressionFunction &function, BaseStatistics statistics, |
128 | block_id_t block_id, idx_t offset, idx_t segment_size); |
129 | ColumnSegment(ColumnSegment &other, idx_t start); |
130 | |
131 | private: |
132 | void Scan(ColumnScanState &state, idx_t scan_count, Vector &result); |
133 | void ScanPartial(ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset); |
134 | |
135 | private: |
136 | //! The block id that this segment relates to (persistent segment only) |
137 | block_id_t block_id; |
138 | //! The offset into the block (persistent segment only) |
139 | idx_t offset; |
140 | //! The allocated segment size |
141 | idx_t segment_size; |
142 | //! Storage associated with the compressed segment |
143 | unique_ptr<CompressedSegmentState> segment_state; |
144 | }; |
145 | |
146 | } // namespace duckdb |
147 | |