1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/storage/table/column_segment.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/storage/block.hpp"
12#include "duckdb/common/types.hpp"
13#include "duckdb/common/types/vector.hpp"
14#include "duckdb/storage/buffer_manager.hpp"
15#include "duckdb/storage/statistics/segment_statistics.hpp"
16#include "duckdb/storage/storage_lock.hpp"
17#include "duckdb/function/compression_function.hpp"
18#include "duckdb/storage/table/segment_base.hpp"
19#include "duckdb/storage/buffer/block_handle.hpp"
20
21namespace duckdb {
22class ColumnSegment;
23class BlockManager;
24class ColumnSegment;
25class ColumnData;
26class DatabaseInstance;
27class Transaction;
28class BaseStatistics;
29class UpdateSegment;
30class TableFilter;
31struct ColumnFetchState;
32struct ColumnScanState;
33struct ColumnAppendState;
34
35enum class ColumnSegmentType : uint8_t { TRANSIENT, PERSISTENT };
36//! TableFilter represents a filter pushed down into the table scan.
37
38class ColumnSegment : public SegmentBase<ColumnSegment> {
39public:
40 ~ColumnSegment();
41
42 //! The database instance
43 DatabaseInstance &db;
44 //! The type stored in the column
45 LogicalType type;
46 //! The size of the type
47 idx_t type_size;
48 //! The column segment type (transient or persistent)
49 ColumnSegmentType segment_type;
50 //! The compression function
51 reference<CompressionFunction> function;
52 //! The statistics for the segment
53 SegmentStatistics stats;
54 //! The block that this segment relates to
55 shared_ptr<BlockHandle> block;
56
57 static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager,
58 block_id_t id, idx_t offset, const LogicalType &type_p,
59 idx_t start, idx_t count, CompressionType compression_type,
60 BaseStatistics statistics);
61 static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start,
62 idx_t segment_size = Storage::BLOCK_SIZE);
63 static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
64
65public:
66 void InitializeScan(ColumnScanState &state);
67 //! Scan one vector from this segment
68 void Scan(ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset, bool entire_vector);
69 //! Fetch a value of the specific row id and append it to the result
70 void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx);
71
72 static idx_t FilterSelection(SelectionVector &sel, Vector &result, const TableFilter &filter,
73 idx_t &approved_tuple_count, ValidityMask &mask);
74
75 //! Skip a scan forward to the row_index specified in the scan state
76 void Skip(ColumnScanState &state);
77
78 // The maximum size of the buffer (in bytes)
79 idx_t SegmentSize() const;
80 //! Resize the block
81 void Resize(idx_t segment_size);
82
83 //! Initialize an append of this segment. Appends are only supported on transient segments.
84 void InitializeAppend(ColumnAppendState &state);
85 //! Appends a (part of) vector to the segment, returns the amount of entries successfully appended
86 idx_t Append(ColumnAppendState &state, UnifiedVectorFormat &data, idx_t offset, idx_t count);
87 //! Finalize the segment for appending - no more appends can follow on this segment
88 //! The segment should be compacted as much as possible
89 //! Returns the number of bytes occupied within the segment
90 idx_t FinalizeAppend(ColumnAppendState &state);
91 //! Revert an append made to this segment
92 void RevertAppend(idx_t start_row);
93
94 //! Convert a transient in-memory segment into a persistent segment blocked by an on-disk block.
95 //! Only used during checkpointing.
96 void ConvertToPersistent(optional_ptr<BlockManager> block_manager, block_id_t block_id);
97 //! Updates pointers to refer to the given block and offset. This is only used
98 //! when sharing a block among segments. This is invoked only AFTER the block is written.
99 void MarkAsPersistent(shared_ptr<BlockHandle> block, uint32_t offset_in_block);
100
101 block_id_t GetBlockId() {
102 D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT);
103 return block_id;
104 }
105
106 BlockManager &GetBlockManager() const {
107 return block->block_manager;
108 }
109
110 idx_t GetBlockOffset() {
111 D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT || offset == 0);
112 return offset;
113 }
114
115 idx_t GetRelativeIndex(idx_t row_index) {
116 D_ASSERT(row_index >= this->start);
117 D_ASSERT(row_index <= this->start + this->count);
118 return row_index - this->start;
119 }
120
121 CompressedSegmentState *GetSegmentState() {
122 return segment_state.get();
123 }
124
125public:
126 ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
127 idx_t start, idx_t count, CompressionFunction &function, BaseStatistics statistics,
128 block_id_t block_id, idx_t offset, idx_t segment_size);
129 ColumnSegment(ColumnSegment &other, idx_t start);
130
131private:
132 void Scan(ColumnScanState &state, idx_t scan_count, Vector &result);
133 void ScanPartial(ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset);
134
135private:
136 //! The block id that this segment relates to (persistent segment only)
137 block_id_t block_id;
138 //! The offset into the block (persistent segment only)
139 idx_t offset;
140 //! The allocated segment size
141 idx_t segment_size;
142 //! Storage associated with the compressed segment
143 unique_ptr<CompressedSegmentState> segment_state;
144};
145
146} // namespace duckdb
147