1 | #include "duckdb/execution/index/art/leaf.hpp" |
2 | |
3 | #include "duckdb/execution/index/art/art.hpp" |
4 | #include "duckdb/execution/index/art/art_key.hpp" |
5 | #include "duckdb/execution/index/art/leaf_segment.hpp" |
6 | #include "duckdb/execution/index/art/node.hpp" |
7 | #include "duckdb/storage/meta_block_reader.hpp" |
8 | #include "duckdb/storage/meta_block_writer.hpp" |
9 | |
10 | namespace duckdb { |
11 | |
12 | Leaf &Leaf::New(ART &art, Node &node, const ARTKey &key, const uint32_t depth, const row_t row_id) { |
13 | |
14 | node.SetPtr(Node::GetAllocator(art, type: NType::LEAF).New()); |
15 | node.type = (uint8_t)NType::LEAF; |
16 | auto &leaf = Leaf::Get(art, ptr: node); |
17 | |
18 | // set the fields of the leaf |
19 | leaf.count = 1; |
20 | leaf.row_ids.inlined = row_id; |
21 | |
22 | // initialize the prefix |
23 | D_ASSERT(key.len >= depth); |
24 | leaf.prefix.Initialize(art, key, depth, count_p: key.len - depth); |
25 | |
26 | return leaf; |
27 | } |
28 | |
29 | Leaf &Leaf::New(ART &art, Node &node, const ARTKey &key, const uint32_t depth, const row_t *row_ids, |
30 | const idx_t count) { |
31 | |
32 | // inlined leaf |
33 | D_ASSERT(count >= 1); |
34 | if (count == 1) { |
35 | return Leaf::New(art, node, key, depth, row_id: row_ids[0]); |
36 | } |
37 | |
38 | node.SetPtr(Node::GetAllocator(art, type: NType::LEAF).New()); |
39 | node.type = (uint8_t)NType::LEAF; |
40 | auto &leaf = Leaf::Get(art, ptr: node); |
41 | |
42 | // set the fields of the leaf |
43 | leaf.count = 0; |
44 | |
45 | // copy the row IDs |
46 | reference<LeafSegment> segment(LeafSegment::New(art, node&: leaf.row_ids.ptr)); |
47 | for (idx_t i = 0; i < count; i++) { |
48 | segment = segment.get().Append(art, count&: leaf.count, row_id: row_ids[i]); |
49 | } |
50 | |
51 | // set the prefix |
52 | D_ASSERT(key.len >= depth); |
53 | leaf.prefix.Initialize(art, key, depth, count_p: key.len - depth); |
54 | |
55 | return leaf; |
56 | } |
57 | |
58 | void Leaf::Free(ART &art, Node &node) { |
59 | |
60 | D_ASSERT(node.IsSet()); |
61 | D_ASSERT(!node.IsSwizzled()); |
62 | |
63 | // free leaf segments |
64 | auto &leaf = Leaf::Get(art, ptr: node); |
65 | if (!leaf.IsInlined()) { |
66 | Node::Free(art, node&: leaf.row_ids.ptr); |
67 | } |
68 | } |
69 | |
70 | void Leaf::InitializeMerge(const ART &art, const idx_t buffer_count) { |
71 | |
72 | if (IsInlined()) { |
73 | return; |
74 | } |
75 | |
76 | reference<LeafSegment> segment(LeafSegment::Get(art, ptr: row_ids.ptr)); |
77 | row_ids.ptr.buffer_id += buffer_count; |
78 | |
79 | auto ptr = segment.get().next; |
80 | while (ptr.IsSet()) { |
81 | segment.get().next.buffer_id += buffer_count; |
82 | segment = LeafSegment::Get(art, ptr); |
83 | ptr = segment.get().next; |
84 | } |
85 | } |
86 | |
87 | void Leaf::Merge(ART &art, Node &other) { |
88 | |
89 | auto &other_leaf = Leaf::Get(art, ptr: other); |
90 | |
91 | // copy inlined row ID |
92 | if (other_leaf.IsInlined()) { |
93 | Insert(art, row_id: other_leaf.row_ids.inlined); |
94 | Node::Free(art, node&: other); |
95 | return; |
96 | } |
97 | |
98 | // row ID was inlined, move to a new segment |
99 | if (IsInlined()) { |
100 | auto row_id = row_ids.inlined; |
101 | auto &segment = LeafSegment::New(art, node&: row_ids.ptr); |
102 | segment.row_ids[0] = row_id; |
103 | } |
104 | |
105 | // get the first segment to copy to |
106 | reference<LeafSegment> segment(LeafSegment::Get(art, ptr: row_ids.ptr).GetTail(art)); |
107 | |
108 | // initialize loop variables |
109 | auto other_ptr = other_leaf.row_ids.ptr; |
110 | auto remaining = other_leaf.count; |
111 | |
112 | // copy row IDs |
113 | while (other_ptr.IsSet()) { |
114 | auto &other_segment = LeafSegment::Get(art, ptr: other_ptr); |
115 | auto copy_count = MinValue(a: Node::LEAF_SEGMENT_SIZE, b: remaining); |
116 | |
117 | // copy the data |
118 | for (idx_t i = 0; i < copy_count; i++) { |
119 | segment = segment.get().Append(art, count, row_id: other_segment.row_ids[i]); |
120 | } |
121 | |
122 | // adjust the loop variables |
123 | other_ptr = other_segment.next; |
124 | remaining -= copy_count; |
125 | } |
126 | D_ASSERT(remaining == 0); |
127 | |
128 | Node::Free(art, node&: other); |
129 | } |
130 | |
131 | void Leaf::Insert(ART &art, const row_t row_id) { |
132 | |
133 | if (count == 0) { |
134 | row_ids.inlined = row_id; |
135 | count++; |
136 | return; |
137 | } |
138 | |
139 | if (count == 1) { |
140 | MoveInlinedToSegment(art); |
141 | } |
142 | |
143 | // append to the tail |
144 | auto &first_segment = LeafSegment::Get(art, ptr: row_ids.ptr); |
145 | auto &tail = first_segment.GetTail(art); |
146 | tail.Append(art, count, row_id); |
147 | } |
148 | |
149 | void Leaf::Remove(ART &art, const row_t row_id) { |
150 | |
151 | if (count == 0) { |
152 | return; |
153 | } |
154 | |
155 | if (IsInlined()) { |
156 | if (row_ids.inlined == row_id) { |
157 | count--; |
158 | } |
159 | return; |
160 | } |
161 | |
162 | // possibly inline the row ID |
163 | if (count == 2) { |
164 | auto &segment = LeafSegment::Get(art, ptr: row_ids.ptr); |
165 | if (segment.row_ids[0] != row_id && segment.row_ids[1] != row_id) { |
166 | return; |
167 | } |
168 | |
169 | auto remaining_row_id = segment.row_ids[0] == row_id ? segment.row_ids[1] : segment.row_ids[0]; |
170 | Node::Free(art, node&: row_ids.ptr); |
171 | row_ids.inlined = remaining_row_id; |
172 | count--; |
173 | return; |
174 | } |
175 | |
176 | // find the row ID, and the segment containing that row ID (stored in ptr) |
177 | auto ptr = row_ids.ptr; |
178 | auto copy_idx = FindRowId(art, ptr, row_id); |
179 | if (copy_idx == (uint32_t)DConstants::INVALID_INDEX) { |
180 | return; |
181 | } |
182 | copy_idx++; |
183 | |
184 | // iterate all remaining segments and move the row IDs one field to the left |
185 | reference<LeafSegment> segment(LeafSegment::Get(art, ptr)); |
186 | reference<LeafSegment> prev_segment(LeafSegment::Get(art, ptr)); |
187 | while (copy_idx < count) { |
188 | |
189 | auto copy_start = copy_idx % Node::LEAF_SEGMENT_SIZE; |
190 | D_ASSERT(copy_start != 0); |
191 | auto copy_end = MinValue(a: copy_start + count - copy_idx, b: Node::LEAF_SEGMENT_SIZE); |
192 | |
193 | // copy row IDs |
194 | for (idx_t i = copy_start; i < copy_end; i++) { |
195 | segment.get().row_ids[i - 1] = segment.get().row_ids[i]; |
196 | copy_idx++; |
197 | } |
198 | |
199 | // adjust loop variables |
200 | if (segment.get().next.IsSet()) { |
201 | prev_segment = segment; |
202 | segment = LeafSegment::Get(art, ptr: segment.get().next); |
203 | // this segment has at least one element, and we need to copy it into the previous segment |
204 | prev_segment.get().row_ids[Node::LEAF_SEGMENT_SIZE - 1] = segment.get().row_ids[0]; |
205 | copy_idx++; |
206 | } |
207 | } |
208 | |
209 | // this evaluates to true, if we need to delete the last segment |
210 | if (count % Node::LEAF_SEGMENT_SIZE == 1) { |
211 | ptr = row_ids.ptr; |
212 | while (ptr.IsSet()) { |
213 | |
214 | // get the segment succeeding the current segment |
215 | auto ¤t_segment = LeafSegment::Get(art, ptr); |
216 | D_ASSERT(current_segment.next.IsSet()); |
217 | auto &next_segment = LeafSegment::Get(art, ptr: current_segment.next); |
218 | |
219 | // next_segment is the tail of the segment list |
220 | if (!next_segment.next.IsSet()) { |
221 | Node::Free(art, node&: current_segment.next); |
222 | } |
223 | |
224 | // adjust loop variables |
225 | ptr = current_segment.next; |
226 | } |
227 | } |
228 | count--; |
229 | } |
230 | |
231 | row_t Leaf::GetRowId(const ART &art, const idx_t position) const { |
232 | |
233 | D_ASSERT(position < count); |
234 | if (IsInlined()) { |
235 | return row_ids.inlined; |
236 | } |
237 | |
238 | // get the correct segment |
239 | reference<LeafSegment> segment(LeafSegment::Get(art, ptr: row_ids.ptr)); |
240 | for (idx_t i = 0; i < position / Node::LEAF_SEGMENT_SIZE; i++) { |
241 | D_ASSERT(segment.get().next.IsSet()); |
242 | segment = LeafSegment::Get(art, ptr: segment.get().next); |
243 | } |
244 | |
245 | return segment.get().row_ids[position % Node::LEAF_SEGMENT_SIZE]; |
246 | } |
247 | |
248 | uint32_t Leaf::FindRowId(const ART &art, Node &ptr, const row_t row_id) const { |
249 | |
250 | D_ASSERT(!IsInlined()); |
251 | |
252 | auto remaining = count; |
253 | while (ptr.IsSet()) { |
254 | |
255 | auto &segment = LeafSegment::Get(art, ptr); |
256 | auto search_count = MinValue(a: Node::LEAF_SEGMENT_SIZE, b: remaining); |
257 | |
258 | // search in this segment |
259 | for (idx_t i = 0; i < search_count; i++) { |
260 | if (segment.row_ids[i] == row_id) { |
261 | return count - remaining + i; |
262 | } |
263 | } |
264 | |
265 | // adjust loop variables |
266 | remaining -= search_count; |
267 | ptr = segment.next; |
268 | } |
269 | return (uint32_t)DConstants::INVALID_INDEX; |
270 | } |
271 | |
272 | string Leaf::VerifyAndToString(const ART &art, const bool only_verify) const { |
273 | |
274 | if (IsInlined()) { |
275 | return only_verify ? "" : "Leaf [count: 1, row ID: " + to_string(val: row_ids.inlined) + "]" ; |
276 | } |
277 | |
278 | auto ptr = row_ids.ptr; |
279 | auto remaining = count; |
280 | string str = "" ; |
281 | uint32_t this_count = 0; |
282 | while (ptr.IsSet()) { |
283 | auto &segment = LeafSegment::Get(art, ptr); |
284 | auto to_string_count = Node::LEAF_SEGMENT_SIZE < remaining ? Node::LEAF_SEGMENT_SIZE : remaining; |
285 | |
286 | for (idx_t i = 0; i < to_string_count; i++) { |
287 | str += ", " + to_string(val: segment.row_ids[i]); |
288 | this_count++; |
289 | } |
290 | remaining -= to_string_count; |
291 | ptr = segment.next; |
292 | } |
293 | |
294 | D_ASSERT(remaining == 0); |
295 | (void)this_count; |
296 | D_ASSERT(this_count == count); |
297 | return only_verify ? "" : "Leaf [count: " + to_string(val: count) + ", row IDs: " + str + "] \n" ; |
298 | } |
299 | |
300 | BlockPointer Leaf::Serialize(const ART &art, MetaBlockWriter &writer) const { |
301 | |
302 | // get pointer and write fields |
303 | auto block_pointer = writer.GetBlockPointer(); |
304 | writer.Write(element: NType::LEAF); |
305 | writer.Write<uint32_t>(element: count); |
306 | prefix.Serialize(art, writer); |
307 | |
308 | if (IsInlined()) { |
309 | writer.Write(element: row_ids.inlined); |
310 | return block_pointer; |
311 | } |
312 | |
313 | D_ASSERT(row_ids.ptr.IsSet()); |
314 | auto ptr = row_ids.ptr; |
315 | auto remaining = count; |
316 | |
317 | // iterate all leaf segments and write their row IDs |
318 | while (ptr.IsSet()) { |
319 | auto &segment = LeafSegment::Get(art, ptr); |
320 | auto write_count = MinValue(a: Node::LEAF_SEGMENT_SIZE, b: remaining); |
321 | |
322 | // write the row IDs |
323 | for (idx_t i = 0; i < write_count; i++) { |
324 | writer.Write(element: segment.row_ids[i]); |
325 | } |
326 | |
327 | // adjust loop variables |
328 | remaining -= write_count; |
329 | ptr = segment.next; |
330 | } |
331 | D_ASSERT(remaining == 0); |
332 | |
333 | return block_pointer; |
334 | } |
335 | |
336 | void Leaf::Deserialize(ART &art, MetaBlockReader &reader) { |
337 | |
338 | auto count_p = reader.Read<uint32_t>(); |
339 | prefix.Deserialize(art, reader); |
340 | |
341 | // inlined |
342 | if (count_p == 1) { |
343 | row_ids.inlined = reader.Read<row_t>(); |
344 | count = count_p; |
345 | return; |
346 | } |
347 | |
348 | // copy into segments |
349 | count = 0; |
350 | reference<LeafSegment> segment(LeafSegment::New(art, node&: row_ids.ptr)); |
351 | for (idx_t i = 0; i < count_p; i++) { |
352 | segment = segment.get().Append(art, count, row_id: reader.Read<row_t>()); |
353 | } |
354 | D_ASSERT(count_p == count); |
355 | } |
356 | |
357 | void Leaf::Vacuum(ART &art) { |
358 | |
359 | if (IsInlined()) { |
360 | return; |
361 | } |
362 | |
363 | // first pointer has special treatment because we don't obtain it from a leaf segment |
364 | auto &allocator = Node::GetAllocator(art, type: NType::LEAF_SEGMENT); |
365 | if (allocator.NeedsVacuum(ptr: row_ids.ptr)) { |
366 | row_ids.ptr.SetPtr(allocator.VacuumPointer(ptr: row_ids.ptr)); |
367 | row_ids.ptr.type = (uint8_t)NType::LEAF_SEGMENT; |
368 | } |
369 | |
370 | auto ptr = row_ids.ptr; |
371 | while (ptr.IsSet()) { |
372 | auto &segment = LeafSegment::Get(art, ptr); |
373 | ptr = segment.next; |
374 | if (ptr.IsSet() && allocator.NeedsVacuum(ptr)) { |
375 | segment.next.SetPtr(allocator.VacuumPointer(ptr)); |
376 | segment.next.type = (uint8_t)NType::LEAF_SEGMENT; |
377 | ptr = segment.next; |
378 | } |
379 | } |
380 | } |
381 | |
382 | void Leaf::MoveInlinedToSegment(ART &art) { |
383 | |
384 | D_ASSERT(IsInlined()); |
385 | |
386 | auto row_id = row_ids.inlined; |
387 | auto &segment = LeafSegment::New(art, node&: row_ids.ptr); |
388 | segment.row_ids[0] = row_id; |
389 | } |
390 | |
391 | } // namespace duckdb |
392 | |