1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/storage/table/segment_tree.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/common/constants.hpp"
12#include "duckdb/storage/storage_lock.hpp"
13#include "duckdb/storage/table/segment_lock.hpp"
14#include "duckdb/common/vector.hpp"
15#include "duckdb/common/mutex.hpp"
16#include "duckdb/common/string_util.hpp"
17
18namespace duckdb {
19
20template <class T>
21struct SegmentNode {
22 idx_t row_start;
23 unique_ptr<T> node;
24};
25
26//! The SegmentTree maintains a list of all segments of a specific column in a table, and allows searching for a segment
27//! by row number
28template <class T, bool SUPPORTS_LAZY_LOADING = false>
29class SegmentTree {
30private:
31 class SegmentIterationHelper;
32
33public:
34 explicit SegmentTree() : finished_loading(true) {
35 }
36 virtual ~SegmentTree() {
37 }
38
39 //! Locks the segment tree. All methods to the segment tree either lock the segment tree, or take an already
40 //! obtained lock.
41 SegmentLock Lock() {
42 return SegmentLock(node_lock);
43 }
44
45 bool IsEmpty(SegmentLock &l) {
46 return GetRootSegment(l) == nullptr;
47 }
48
49 //! Gets a pointer to the first segment. Useful for scans.
50 T *GetRootSegment() {
51 auto l = Lock();
52 return GetRootSegment(l);
53 }
54
55 T *GetRootSegment(SegmentLock &l) {
56 if (nodes.empty()) {
57 LoadNextSegment(l);
58 }
59 return GetRootSegmentInternal();
60 }
61 //! Obtains ownership of the data of the segment tree
62 vector<SegmentNode<T>> MoveSegments(SegmentLock &l) {
63 LoadAllSegments(l);
64 return std::move(nodes);
65 }
66 vector<SegmentNode<T>> MoveSegments() {
67 auto l = Lock();
68 return MoveSegments(l);
69 }
70 idx_t GetSegmentCount() {
71 auto l = Lock();
72 return nodes.size();
73 }
74 //! Gets a pointer to the nth segment. Negative numbers start from the back.
75 T *GetSegmentByIndex(int64_t index) {
76 auto l = Lock();
77 return GetSegmentByIndex(l, index);
78 }
79 T *GetSegmentByIndex(SegmentLock &l, int64_t index) {
80 if (index < 0) {
81 // load all segments
82 LoadAllSegments(l);
83 index = nodes.size() + index;
84 if (index < 0) {
85 return nullptr;
86 }
87 return nodes[index].node.get();
88 } else {
89 // lazily load segments until we reach the specific segment
90 while (idx_t(index) >= nodes.size() && LoadNextSegment(l)) {
91 }
92 if (idx_t(index) >= nodes.size()) {
93 return nullptr;
94 }
95 return nodes[index].node.get();
96 }
97 }
98 //! Gets the next segment
99 T *GetNextSegment(T *segment) {
100 if (!SUPPORTS_LAZY_LOADING) {
101 return segment->Next();
102 }
103 if (finished_loading) {
104 return segment->Next();
105 }
106 auto l = Lock();
107 return GetNextSegment(l, segment);
108 }
109 T *GetNextSegment(SegmentLock &l, T *segment) {
110 if (!segment) {
111 return nullptr;
112 }
113#ifdef DEBUG
114 D_ASSERT(nodes[segment->index].node.get() == segment);
115#endif
116 return GetSegmentByIndex(l, segment->index + 1);
117 }
118
119 //! Gets a pointer to the last segment. Useful for appends.
120 T *GetLastSegment(SegmentLock &l) {
121 LoadAllSegments(l);
122 if (nodes.empty()) {
123 return nullptr;
124 }
125 return nodes.back().node.get();
126 }
127 //! Gets a pointer to a specific column segment for the given row
128 T *GetSegment(idx_t row_number) {
129 auto l = Lock();
130 return GetSegment(l, row_number);
131 }
132 T *GetSegment(SegmentLock &l, idx_t row_number) {
133 return nodes[GetSegmentIndex(l, row_number)].node.get();
134 }
135
136 //! Append a column segment to the tree
137 void AppendSegmentInternal(SegmentLock &l, unique_ptr<T> segment) {
138 D_ASSERT(segment);
139 // add the node to the list of nodes
140 if (!nodes.empty()) {
141 nodes.back().node->next = segment.get();
142 }
143 SegmentNode<T> node;
144 segment->index = nodes.size();
145 node.row_start = segment->start;
146 node.node = std::move(segment);
147 nodes.push_back(std::move(node));
148 }
149 void AppendSegment(unique_ptr<T> segment) {
150 auto l = Lock();
151 AppendSegment(l, std::move(segment));
152 }
153 void AppendSegment(SegmentLock &l, unique_ptr<T> segment) {
154 LoadAllSegments(l);
155 AppendSegmentInternal(l, segment: std::move(segment));
156 }
157 //! Debug method, check whether the segment is in the segment tree
158 bool HasSegment(T *segment) {
159 auto l = Lock();
160 return HasSegment(l, segment);
161 }
162 bool HasSegment(SegmentLock &, T *segment) {
163 return segment->index < nodes.size() && nodes[segment->index].node.get() == segment;
164 }
165
166 //! Replace this tree with another tree, taking over its nodes in-place
167 void Replace(SegmentTree<T> &other) {
168 auto l = Lock();
169 Replace(l, other);
170 }
171 void Replace(SegmentLock &l, SegmentTree<T> &other) {
172 other.LoadAllSegments(l);
173 nodes = std::move(other.nodes);
174 }
175
176 //! Erase all segments after a specific segment
177 void EraseSegments(SegmentLock &l, idx_t segment_start) {
178 LoadAllSegments(l);
179 if (segment_start >= nodes.size() - 1) {
180 return;
181 }
182 nodes.erase(nodes.begin() + segment_start + 1, nodes.end());
183 }
184
185 //! Get the segment index of the column segment for the given row
186 idx_t GetSegmentIndex(SegmentLock &l, idx_t row_number) {
187 idx_t segment_index;
188 if (TryGetSegmentIndex(l, row_number, result&: segment_index)) {
189 return segment_index;
190 }
191 string error;
192 error = StringUtil::Format("Attempting to find row number \"%lld\" in %lld nodes\n", row_number, nodes.size());
193 for (idx_t i = 0; i < nodes.size(); i++) {
194 error += StringUtil::Format("Node %lld: Start %lld, Count %lld", i, nodes[i].row_start,
195 nodes[i].node->count.load());
196 }
197 throw InternalException("Could not find node in column segment tree!\n%s%s", error, Exception::GetStackTrace());
198 }
199
200 bool TryGetSegmentIndex(SegmentLock &l, idx_t row_number, idx_t &result) {
201 // load segments until the row number is within bounds
202 while (nodes.empty() || (row_number >= (nodes.back().row_start + nodes.back().node->count))) {
203 if (!LoadNextSegment(l)) {
204 break;
205 }
206 }
207 if (nodes.empty()) {
208 return false;
209 }
210 D_ASSERT(!nodes.empty());
211 D_ASSERT(row_number >= nodes[0].row_start);
212 D_ASSERT(row_number < nodes.back().row_start + nodes.back().node->count);
213 idx_t lower = 0;
214 idx_t upper = nodes.size() - 1;
215 // binary search to find the node
216 while (lower <= upper) {
217 idx_t index = (lower + upper) / 2;
218 D_ASSERT(index < nodes.size());
219 auto &entry = nodes[index];
220 D_ASSERT(entry.row_start == entry.node->start);
221 if (row_number < entry.row_start) {
222 upper = index - 1;
223 } else if (row_number >= entry.row_start + entry.node->count) {
224 lower = index + 1;
225 } else {
226 result = index;
227 return true;
228 }
229 }
230 return false;
231 }
232
233 void Verify(SegmentLock &) {
234#ifdef DEBUG
235 idx_t base_start = nodes.empty() ? 0 : nodes[0].node->start;
236 for (idx_t i = 0; i < nodes.size(); i++) {
237 D_ASSERT(nodes[i].row_start == nodes[i].node->start);
238 D_ASSERT(nodes[i].node->start == base_start);
239 base_start += nodes[i].node->count;
240 }
241#endif
242 }
243 void Verify() {
244#ifdef DEBUG
245 auto l = Lock();
246 Verify(l);
247#endif
248 }
249
250 SegmentIterationHelper Segments() {
251 return SegmentIterationHelper(*this);
252 }
253
254 void Reinitialize() {
255 if (nodes.empty()) {
256 return;
257 }
258 idx_t offset = nodes[0].node->start;
259 for (auto &entry : nodes) {
260 if (entry.node->start != offset) {
261 throw InternalException("In SegmentTree::Reinitialize - gap found between nodes!");
262 }
263 entry.row_start = offset;
264 offset += entry.node->count;
265 }
266 }
267
268protected:
269 atomic<bool> finished_loading;
270
271 //! Load the next segment - only used when lazily loading
272 virtual unique_ptr<T> LoadSegment() {
273 return nullptr;
274 }
275
276private:
277 //! The nodes in the tree, can be binary searched
278 vector<SegmentNode<T>> nodes;
279 //! Lock to access or modify the nodes
280 mutex node_lock;
281
282private:
283 T *GetRootSegmentInternal() {
284 return nodes.empty() ? nullptr : nodes[0].node.get();
285 }
286
287 class SegmentIterationHelper {
288 public:
289 explicit SegmentIterationHelper(SegmentTree &tree) : tree(tree) {
290 }
291
292 private:
293 SegmentTree &tree;
294
295 private:
296 class SegmentIterator {
297 public:
298 SegmentIterator(SegmentTree &tree_p, T *current_p) : tree(tree_p), current(current_p) {
299 }
300
301 SegmentTree &tree;
302 T *current;
303
304 public:
305 void Next() {
306 current = tree.GetNextSegment(current);
307 }
308
309 SegmentIterator &operator++() {
310 Next();
311 return *this;
312 }
313 bool operator!=(const SegmentIterator &other) const {
314 return current != other.current;
315 }
316 T &operator*() const {
317 D_ASSERT(current);
318 return *current;
319 }
320 };
321
322 public:
323 SegmentIterator begin() {
324 return SegmentIterator(tree, tree.GetRootSegment());
325 }
326 SegmentIterator end() {
327 return SegmentIterator(tree, nullptr);
328 }
329 };
330
331 //! Load the next segment, if there are any left to load
332 bool LoadNextSegment(SegmentLock &l) {
333 if (!SUPPORTS_LAZY_LOADING) {
334 return false;
335 }
336 if (finished_loading) {
337 return false;
338 }
339 auto result = LoadSegment();
340 if (result) {
341 AppendSegmentInternal(l, segment: std::move(result));
342 return true;
343 }
344 return false;
345 }
346
347 //! Load all segments, if there are any left to load
348 void LoadAllSegments(SegmentLock &l) {
349 if (!SUPPORTS_LAZY_LOADING) {
350 return;
351 }
352 while (LoadNextSegment(l))
353 ;
354 }
355};
356
357} // namespace duckdb
358