1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/storage/table/scan_state.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/common/common.hpp"
12#include "duckdb/storage/buffer/buffer_handle.hpp"
13#include "duckdb/storage/storage_lock.hpp"
14#include "duckdb/common/enums/scan_options.hpp"
15#include "duckdb/execution/adaptive_filter.hpp"
16#include "duckdb/storage/table/segment_lock.hpp"
17
18namespace duckdb {
19class ColumnSegment;
20class LocalTableStorage;
21class CollectionScanState;
22class Index;
23class RowGroup;
24class RowGroupCollection;
25class UpdateSegment;
26class TableScanState;
27class ColumnSegment;
28class ColumnSegmentTree;
29class ValiditySegment;
30class TableFilterSet;
31class ColumnData;
32class DuckTransaction;
33class RowGroupSegmentTree;
34
35struct SegmentScanState {
36 virtual ~SegmentScanState() {
37 }
38
39 template <class TARGET>
40 TARGET &Cast() {
41 D_ASSERT(dynamic_cast<TARGET *>(this));
42 return reinterpret_cast<TARGET &>(*this);
43 }
44 template <class TARGET>
45 const TARGET &Cast() const {
46 D_ASSERT(dynamic_cast<const TARGET *>(this));
47 return reinterpret_cast<const TARGET &>(*this);
48 }
49};
50
51struct IndexScanState {
52 virtual ~IndexScanState() {
53 }
54
55 template <class TARGET>
56 TARGET &Cast() {
57 D_ASSERT(dynamic_cast<TARGET *>(this));
58 return reinterpret_cast<TARGET &>(*this);
59 }
60 template <class TARGET>
61 const TARGET &Cast() const {
62 D_ASSERT(dynamic_cast<const TARGET *>(this));
63 return reinterpret_cast<const TARGET &>(*this);
64 }
65};
66
67typedef unordered_map<block_id_t, BufferHandle> buffer_handle_set_t;
68
69struct ColumnScanState {
70 //! The column segment that is currently being scanned
71 ColumnSegment *current = nullptr;
72 //! Column segment tree
73 ColumnSegmentTree *segment_tree = nullptr;
74 //! The current row index of the scan
75 idx_t row_index = 0;
76 //! The internal row index (i.e. the position of the SegmentScanState)
77 idx_t internal_index = 0;
78 //! Segment scan state
79 unique_ptr<SegmentScanState> scan_state;
80 //! Child states of the vector
81 vector<ColumnScanState> child_states;
82 //! Whether or not InitializeState has been called for this segment
83 bool initialized = false;
84 //! If this segment has already been checked for skipping purposes
85 bool segment_checked = false;
86 //! The version of the column data that we are scanning.
87 //! This is used to detect if the ColumnData has been changed out from under us during a scan
88 //! If this is the case, we re-initialize the scan
89 idx_t version = 0;
90 //! We initialize one SegmentScanState per segment, however, if scanning a DataChunk requires us to scan over more
91 //! than one Segment, we need to keep the scan states of the previous segments around
92 vector<unique_ptr<SegmentScanState>> previous_states;
93 //! The last read offset in the child state (used for LIST columns only)
94 idx_t last_offset = 0;
95
96public:
97 void Initialize(const LogicalType &type);
98 //! Move the scan state forward by "count" rows (including all child states)
99 void Next(idx_t count);
100 //! Move ONLY this state forward by "count" rows (i.e. not the child states)
101 void NextInternal(idx_t count);
102};
103
104struct ColumnFetchState {
105 //! The set of pinned block handles for this set of fetches
106 buffer_handle_set_t handles;
107 //! Any child states of the fetch
108 vector<unique_ptr<ColumnFetchState>> child_states;
109
110 BufferHandle &GetOrInsertHandle(ColumnSegment &segment);
111};
112
113class CollectionScanState {
114public:
115 CollectionScanState(TableScanState &parent_p);
116
117 //! The current row_group we are scanning
118 RowGroup *row_group;
119 //! The vector index within the row_group
120 idx_t vector_index;
121 //! The maximum row within the row group
122 idx_t max_row_group_row;
123 //! Child column scans
124 unsafe_unique_array<ColumnScanState> column_scans;
125 //! Row group segment tree
126 RowGroupSegmentTree *row_groups;
127 //! The total maximum row index
128 idx_t max_row;
129 //! The current batch index
130 idx_t batch_index;
131
132public:
133 void Initialize(const vector<LogicalType> &types);
134 const vector<storage_t> &GetColumnIds();
135 TableFilterSet *GetFilters();
136 AdaptiveFilter *GetAdaptiveFilter();
137 bool Scan(DuckTransaction &transaction, DataChunk &result);
138 bool ScanCommitted(DataChunk &result, TableScanType type);
139 bool ScanCommitted(DataChunk &result, SegmentLock &l, TableScanType type);
140
141private:
142 TableScanState &parent;
143};
144
145class TableScanState {
146public:
147 TableScanState() : table_state(*this), local_state(*this), table_filters(nullptr) {};
148
149 //! The underlying table scan state
150 CollectionScanState table_state;
151 //! Transaction-local scan state
152 CollectionScanState local_state;
153
154public:
155 void Initialize(vector<storage_t> column_ids, TableFilterSet *table_filters = nullptr);
156
157 const vector<storage_t> &GetColumnIds();
158 TableFilterSet *GetFilters();
159 AdaptiveFilter *GetAdaptiveFilter();
160
161private:
162 //! The column identifiers of the scan
163 vector<storage_t> column_ids;
164 //! The table filters (if any)
165 TableFilterSet *table_filters;
166 //! Adaptive filter info (if any)
167 unique_ptr<AdaptiveFilter> adaptive_filter;
168};
169
170struct ParallelCollectionScanState {
171 ParallelCollectionScanState();
172
173 //! The row group collection we are scanning
174 RowGroupCollection *collection;
175 RowGroup *current_row_group;
176 idx_t vector_index;
177 idx_t max_row;
178 idx_t batch_index;
179 atomic<idx_t> processed_rows;
180 mutex lock;
181};
182
183struct ParallelTableScanState {
184 //! Parallel scan state for the table
185 ParallelCollectionScanState scan_state;
186 //! Parallel scan state for the transaction-local state
187 ParallelCollectionScanState local_state;
188};
189
190class CreateIndexScanState : public TableScanState {
191public:
192 vector<unique_ptr<StorageLockKey>> locks;
193 unique_lock<mutex> append_lock;
194 SegmentLock segment_lock;
195};
196
197} // namespace duckdb
198