1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/storage/table/scan_state.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/common/common.hpp" |
12 | #include "duckdb/storage/buffer/buffer_handle.hpp" |
13 | #include "duckdb/storage/storage_lock.hpp" |
14 | #include "duckdb/common/enums/scan_options.hpp" |
15 | #include "duckdb/execution/adaptive_filter.hpp" |
16 | #include "duckdb/storage/table/segment_lock.hpp" |
17 | |
18 | namespace duckdb { |
19 | class ColumnSegment; |
20 | class LocalTableStorage; |
21 | class CollectionScanState; |
22 | class Index; |
23 | class RowGroup; |
24 | class RowGroupCollection; |
25 | class UpdateSegment; |
26 | class TableScanState; |
27 | class ColumnSegment; |
28 | class ColumnSegmentTree; |
29 | class ValiditySegment; |
30 | class TableFilterSet; |
31 | class ColumnData; |
32 | class DuckTransaction; |
33 | class RowGroupSegmentTree; |
34 | |
35 | struct SegmentScanState { |
36 | virtual ~SegmentScanState() { |
37 | } |
38 | |
39 | template <class TARGET> |
40 | TARGET &Cast() { |
41 | D_ASSERT(dynamic_cast<TARGET *>(this)); |
42 | return reinterpret_cast<TARGET &>(*this); |
43 | } |
44 | template <class TARGET> |
45 | const TARGET &Cast() const { |
46 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
47 | return reinterpret_cast<const TARGET &>(*this); |
48 | } |
49 | }; |
50 | |
51 | struct IndexScanState { |
52 | virtual ~IndexScanState() { |
53 | } |
54 | |
55 | template <class TARGET> |
56 | TARGET &Cast() { |
57 | D_ASSERT(dynamic_cast<TARGET *>(this)); |
58 | return reinterpret_cast<TARGET &>(*this); |
59 | } |
60 | template <class TARGET> |
61 | const TARGET &Cast() const { |
62 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
63 | return reinterpret_cast<const TARGET &>(*this); |
64 | } |
65 | }; |
66 | |
67 | typedef unordered_map<block_id_t, BufferHandle> buffer_handle_set_t; |
68 | |
69 | struct ColumnScanState { |
70 | //! The column segment that is currently being scanned |
71 | ColumnSegment *current = nullptr; |
72 | //! Column segment tree |
73 | ColumnSegmentTree *segment_tree = nullptr; |
74 | //! The current row index of the scan |
75 | idx_t row_index = 0; |
76 | //! The internal row index (i.e. the position of the SegmentScanState) |
77 | idx_t internal_index = 0; |
78 | //! Segment scan state |
79 | unique_ptr<SegmentScanState> scan_state; |
80 | //! Child states of the vector |
81 | vector<ColumnScanState> child_states; |
82 | //! Whether or not InitializeState has been called for this segment |
83 | bool initialized = false; |
84 | //! If this segment has already been checked for skipping purposes |
85 | bool segment_checked = false; |
86 | //! The version of the column data that we are scanning. |
87 | //! This is used to detect if the ColumnData has been changed out from under us during a scan |
88 | //! If this is the case, we re-initialize the scan |
89 | idx_t version = 0; |
90 | //! We initialize one SegmentScanState per segment, however, if scanning a DataChunk requires us to scan over more |
91 | //! than one Segment, we need to keep the scan states of the previous segments around |
92 | vector<unique_ptr<SegmentScanState>> previous_states; |
93 | //! The last read offset in the child state (used for LIST columns only) |
94 | idx_t last_offset = 0; |
95 | |
96 | public: |
97 | void Initialize(const LogicalType &type); |
98 | //! Move the scan state forward by "count" rows (including all child states) |
99 | void Next(idx_t count); |
100 | //! Move ONLY this state forward by "count" rows (i.e. not the child states) |
101 | void NextInternal(idx_t count); |
102 | }; |
103 | |
104 | struct ColumnFetchState { |
105 | //! The set of pinned block handles for this set of fetches |
106 | buffer_handle_set_t handles; |
107 | //! Any child states of the fetch |
108 | vector<unique_ptr<ColumnFetchState>> child_states; |
109 | |
110 | BufferHandle &GetOrInsertHandle(ColumnSegment &segment); |
111 | }; |
112 | |
113 | class CollectionScanState { |
114 | public: |
115 | CollectionScanState(TableScanState &parent_p); |
116 | |
117 | //! The current row_group we are scanning |
118 | RowGroup *row_group; |
119 | //! The vector index within the row_group |
120 | idx_t vector_index; |
121 | //! The maximum row within the row group |
122 | idx_t max_row_group_row; |
123 | //! Child column scans |
124 | unsafe_unique_array<ColumnScanState> column_scans; |
125 | //! Row group segment tree |
126 | RowGroupSegmentTree *row_groups; |
127 | //! The total maximum row index |
128 | idx_t max_row; |
129 | //! The current batch index |
130 | idx_t batch_index; |
131 | |
132 | public: |
133 | void Initialize(const vector<LogicalType> &types); |
134 | const vector<storage_t> &GetColumnIds(); |
135 | TableFilterSet *GetFilters(); |
136 | AdaptiveFilter *GetAdaptiveFilter(); |
137 | bool Scan(DuckTransaction &transaction, DataChunk &result); |
138 | bool ScanCommitted(DataChunk &result, TableScanType type); |
139 | bool ScanCommitted(DataChunk &result, SegmentLock &l, TableScanType type); |
140 | |
141 | private: |
142 | TableScanState &parent; |
143 | }; |
144 | |
145 | class TableScanState { |
146 | public: |
147 | TableScanState() : table_state(*this), local_state(*this), table_filters(nullptr) {}; |
148 | |
149 | //! The underlying table scan state |
150 | CollectionScanState table_state; |
151 | //! Transaction-local scan state |
152 | CollectionScanState local_state; |
153 | |
154 | public: |
155 | void Initialize(vector<storage_t> column_ids, TableFilterSet *table_filters = nullptr); |
156 | |
157 | const vector<storage_t> &GetColumnIds(); |
158 | TableFilterSet *GetFilters(); |
159 | AdaptiveFilter *GetAdaptiveFilter(); |
160 | |
161 | private: |
162 | //! The column identifiers of the scan |
163 | vector<storage_t> column_ids; |
164 | //! The table filters (if any) |
165 | TableFilterSet *table_filters; |
166 | //! Adaptive filter info (if any) |
167 | unique_ptr<AdaptiveFilter> adaptive_filter; |
168 | }; |
169 | |
170 | struct ParallelCollectionScanState { |
171 | ParallelCollectionScanState(); |
172 | |
173 | //! The row group collection we are scanning |
174 | RowGroupCollection *collection; |
175 | RowGroup *current_row_group; |
176 | idx_t vector_index; |
177 | idx_t max_row; |
178 | idx_t batch_index; |
179 | atomic<idx_t> processed_rows; |
180 | mutex lock; |
181 | }; |
182 | |
183 | struct ParallelTableScanState { |
184 | //! Parallel scan state for the table |
185 | ParallelCollectionScanState scan_state; |
186 | //! Parallel scan state for the transaction-local state |
187 | ParallelCollectionScanState local_state; |
188 | }; |
189 | |
190 | class CreateIndexScanState : public TableScanState { |
191 | public: |
192 | vector<unique_ptr<StorageLockKey>> locks; |
193 | unique_lock<mutex> append_lock; |
194 | SegmentLock segment_lock; |
195 | }; |
196 | |
197 | } // namespace duckdb |
198 | |