1#include "duckdb/execution/index/art/prefix.hpp"
2
3#include "duckdb/execution/index/art/art.hpp"
4#include "duckdb/execution/index/art/art_key.hpp"
5#include "duckdb/execution/index/art/node.hpp"
6#include "duckdb/execution/index/art/prefix_segment.hpp"
7#include "duckdb/storage/meta_block_reader.hpp"
8#include "duckdb/storage/meta_block_writer.hpp"
9
10namespace duckdb {
11
12void Prefix::Free(ART &art) {
13
14 if (IsInlined()) {
15 return Initialize();
16 }
17
18 // delete all prefix segments
19 auto ptr = data.ptr;
20 while (ptr.IsSet()) {
21 auto next_ptr = PrefixSegment::Get(art, ptr).next;
22 Node::Free(art, node&: ptr);
23 ptr = next_ptr;
24 }
25
26 Initialize();
27}
28
29void Prefix::Initialize(ART &art, const ARTKey &key, const uint32_t depth, const uint32_t count_p) {
30
31 // prefix can be inlined
32 if (count_p <= Node::PREFIX_INLINE_BYTES) {
33 memcpy(dest: data.inlined, src: key.data + depth, n: count_p);
34 count = count_p;
35 return;
36 }
37
38 // prefix cannot be inlined, copy to segment(s)
39 count = 0;
40 reference<PrefixSegment> segment(PrefixSegment::New(art, node&: data.ptr));
41 for (idx_t i = 0; i < count_p; i++) {
42 segment = segment.get().Append(art, count, byte: key.data[depth + i]);
43 }
44 D_ASSERT(count == count_p);
45}
46
47void Prefix::Initialize(ART &art, const Prefix &other, const uint32_t count_p) {
48
49 D_ASSERT(count_p <= other.count);
50
51 // copy inlined data
52 if (other.IsInlined()) {
53 memcpy(dest: data.inlined, src: other.data.inlined, n: count_p);
54 count = count_p;
55 return;
56 }
57
58 // initialize the count and get the first segment
59 count = 0;
60 reference<PrefixSegment> segment(PrefixSegment::New(art, node&: data.ptr));
61
62 // iterate the segments of the other prefix and copy their data
63 auto other_ptr = other.data.ptr;
64 auto remaining = count_p;
65
66 while (remaining != 0) {
67 D_ASSERT(other_ptr.IsSet());
68 auto &other_segment = PrefixSegment::Get(art, ptr: other_ptr);
69 auto copy_count = MinValue(a: Node::PREFIX_SEGMENT_SIZE, b: remaining);
70
71 // copy the data
72 for (idx_t i = 0; i < copy_count; i++) {
73 segment = segment.get().Append(art, count, byte: other_segment.bytes[i]);
74 }
75
76 // adjust the loop variables
77 other_ptr = other_segment.next;
78 remaining -= copy_count;
79 }
80 D_ASSERT(count == count_p);
81}
82
83void Prefix::InitializeMerge(ART &art, const idx_t buffer_count) {
84
85 if (IsInlined()) {
86 return;
87 }
88
89 reference<PrefixSegment> segment(PrefixSegment::Get(art, ptr: data.ptr));
90 data.ptr.buffer_id += buffer_count;
91
92 auto ptr = segment.get().next;
93 while (ptr.IsSet()) {
94 segment.get().next.buffer_id += buffer_count;
95 segment = PrefixSegment::Get(art, ptr);
96 ptr = segment.get().next;
97 }
98}
99
100void Prefix::Append(ART &art, const Prefix &other) {
101
102 // result fits into inlined data, i.e., both prefixes are also inlined
103 if (count + other.count <= Node::PREFIX_INLINE_BYTES) {
104 memcpy(dest: data.inlined + count, src: other.data.inlined, n: other.count);
105 count += other.count;
106 return;
107 }
108
109 // this prefix is inlined, but will no longer be after appending the other prefix,
110 // move the inlined bytes to the first prefix segment
111 if (IsInlined()) {
112 MoveInlinedToSegment(art);
113 }
114
115 // get the tail of the segments of this prefix
116 reference<PrefixSegment> segment(PrefixSegment::Get(art, ptr: data.ptr).GetTail(art));
117
118 // the other prefix is inlined
119 if (other.IsInlined()) {
120 for (idx_t i = 0; i < other.count; i++) {
121 segment = segment.get().Append(art, count, byte: other.data.inlined[i]);
122 }
123 return;
124 }
125
126 // iterate all segments of the other prefix and copy their data
127 auto other_ptr = other.data.ptr;
128 auto remaining = other.count;
129
130 while (other_ptr.IsSet()) {
131 auto &other_segment = PrefixSegment::Get(art, ptr: other_ptr);
132 auto copy_count = MinValue(a: Node::PREFIX_SEGMENT_SIZE, b: remaining);
133
134 // copy the data
135 for (idx_t i = 0; i < copy_count; i++) {
136 segment = segment.get().Append(art, count, byte: other_segment.bytes[i]);
137 }
138
139 // adjust the loop variables
140 other_ptr = other_segment.next;
141 remaining -= copy_count;
142 }
143 D_ASSERT(remaining == 0);
144}
145
146void Prefix::Concatenate(ART &art, const uint8_t byte, const Prefix &other) {
147
148 auto new_size = count + 1 + other.count;
149
150 // overwrite into this prefix (both are inlined)
151 if (new_size <= Node::PREFIX_INLINE_BYTES) {
152 // move this prefix backwards
153 memmove(dest: data.inlined + other.count + 1, src: data.inlined, n: count);
154 // copy byte
155 data.inlined[other.count] = byte;
156 // copy the other prefix into this prefix
157 memcpy(dest: data.inlined, src: other.data.inlined, n: other.count);
158 count = new_size;
159 return;
160 }
161
162 auto this_inlined = IsInlined();
163 auto this_count = count;
164 auto this_data = data;
165 Initialize();
166
167 // append the other prefix and possibly move the data to a segment
168 Append(art, other);
169 if (IsInlined()) {
170 MoveInlinedToSegment(art);
171 }
172
173 // get the tail
174 reference<PrefixSegment> segment(PrefixSegment::Get(art, ptr: data.ptr).GetTail(art));
175 // append the byte
176 segment = segment.get().Append(art, count, byte);
177
178 if (this_inlined) {
179 // append this prefix
180 for (idx_t i = 0; i < this_count; i++) {
181 segment = segment.get().Append(art, count, byte: this_data.inlined[i]);
182 }
183 return;
184 }
185
186 // iterate all segments of this prefix, copy their data, and free them
187 auto this_ptr = this_data.ptr;
188 auto remaining = this_count;
189
190 while (this_ptr.IsSet()) {
191 auto &this_segment = PrefixSegment::Get(art, ptr: this_ptr);
192 auto copy_count = MinValue(a: Node::PREFIX_SEGMENT_SIZE, b: remaining);
193
194 // copy the data
195 for (idx_t i = 0; i < copy_count; i++) {
196 segment = segment.get().Append(art, count, byte: this_segment.bytes[i]);
197 }
198
199 // adjust the loop variables
200 Node::Free(art, node&: this_ptr);
201 this_ptr = this_segment.next;
202 remaining -= copy_count;
203 }
204 D_ASSERT(remaining == 0);
205}
206
207uint8_t Prefix::Reduce(ART &art, const idx_t reduce_count) {
208
209 auto new_count = count - reduce_count - 1;
210 auto new_first_byte = GetByte(art, position: reduce_count);
211
212 // prefix is now empty
213 if (new_count == 0) {
214 Free(art);
215 return new_first_byte;
216 }
217
218 // was inlined, just move bytes
219 if (IsInlined()) {
220 memmove(dest: data.inlined, src: data.inlined + reduce_count + 1, n: new_count);
221 count = new_count;
222 return new_first_byte;
223 }
224
225 count = 0;
226 auto start = reduce_count + 1;
227 auto offset = start % Node::PREFIX_SEGMENT_SIZE;
228 auto remaining = new_count;
229
230 // get the source segment, i.e., the segment that contains the byte at start
231 reference<PrefixSegment> src_segment(PrefixSegment::Get(art, ptr: data.ptr));
232 for (idx_t i = 0; i < start / Node::PREFIX_SEGMENT_SIZE; i++) {
233 D_ASSERT(src_segment.get().next.IsSet());
234 src_segment = PrefixSegment::Get(art, ptr: src_segment.get().next);
235 }
236
237 // iterate all segments starting at the source segment and shift their data
238 reference<PrefixSegment> dst_segment(PrefixSegment::Get(art, ptr: data.ptr));
239 while (true) {
240 auto copy_count = MinValue(a: Node::PREFIX_SEGMENT_SIZE - offset, b: remaining);
241
242 // copy the data
243 for (idx_t i = offset; i < offset + copy_count; i++) {
244 dst_segment = dst_segment.get().Append(art, count, byte: src_segment.get().bytes[i]);
245 }
246
247 // adjust the loop variables
248 offset = 0;
249 remaining -= copy_count;
250 if (remaining == 0) {
251 break;
252 }
253 D_ASSERT(src_segment.get().next.IsSet());
254 src_segment = PrefixSegment::Get(art, ptr: src_segment.get().next);
255 }
256
257 // possibly inline the data
258 if (IsInlined()) {
259 MoveSegmentToInlined(art);
260 }
261
262 return new_first_byte;
263}
264
265uint8_t Prefix::GetByte(const ART &art, const idx_t position) const {
266
267 D_ASSERT(position < count);
268 if (IsInlined()) {
269 return data.inlined[position];
270 }
271
272 // get the correct segment
273 reference<PrefixSegment> segment(PrefixSegment::Get(art, ptr: data.ptr));
274 for (idx_t i = 0; i < position / Node::PREFIX_SEGMENT_SIZE; i++) {
275 D_ASSERT(segment.get().next.IsSet());
276 segment = PrefixSegment::Get(art, ptr: segment.get().next);
277 }
278
279 return segment.get().bytes[position % Node::PREFIX_SEGMENT_SIZE];
280}
281
282uint32_t Prefix::KeyMismatchPosition(const ART &art, const ARTKey &key, const uint32_t depth) const {
283
284 if (IsInlined()) {
285 for (idx_t mismatch_position = 0; mismatch_position < count; mismatch_position++) {
286 D_ASSERT(depth + mismatch_position < key.len);
287 if (key[depth + mismatch_position] != data.inlined[mismatch_position]) {
288 return mismatch_position;
289 }
290 }
291 return count;
292 }
293
294 uint32_t mismatch_position = 0;
295 auto ptr = data.ptr;
296
297 while (mismatch_position != count) {
298 D_ASSERT(depth + mismatch_position < key.len);
299 D_ASSERT(ptr.IsSet());
300
301 auto &segment = PrefixSegment::Get(art, ptr);
302 auto compare_count = MinValue(a: Node::PREFIX_SEGMENT_SIZE, b: count - mismatch_position);
303
304 // compare bytes
305 for (uint32_t i = 0; i < compare_count; i++) {
306 if (key[depth + mismatch_position] != segment.bytes[i]) {
307 return mismatch_position;
308 }
309 mismatch_position++;
310 }
311
312 // adjust loop variables
313 ptr = segment.next;
314 }
315 return count;
316}
317
318uint32_t Prefix::MismatchPosition(const ART &art, const Prefix &other) const {
319
320 D_ASSERT(count <= other.count);
321
322 // case 1: both prefixes are inlined
323 if (IsInlined() && other.IsInlined()) {
324 for (uint32_t i = 0; i < count; i++) {
325 if (data.inlined[i] != other.data.inlined[i]) {
326 return i;
327 }
328 }
329 return count;
330 }
331
332 // case 2: only this prefix is inlined
333 if (IsInlined()) {
334 // we only need the first segment of the other prefix
335 auto &segment = PrefixSegment::Get(art, ptr: other.data.ptr);
336 for (uint32_t i = 0; i < count; i++) {
337 if (data.inlined[i] != segment.bytes[i]) {
338 return i;
339 }
340 }
341 return count;
342 }
343
344 // case 3: both prefixes are not inlined
345 auto ptr = data.ptr;
346 auto other_ptr = other.data.ptr;
347
348 // iterate segments and compare bytes
349 uint32_t mismatch_position = 0;
350 while (ptr.IsSet()) {
351 D_ASSERT(other_ptr.IsSet());
352 auto &segment = PrefixSegment::Get(art, ptr);
353 auto &other_segment = PrefixSegment::Get(art, ptr: other_ptr);
354
355 // compare bytes
356 auto compare_count = MinValue(a: Node::PREFIX_SEGMENT_SIZE, b: count - mismatch_position);
357 for (uint32_t i = 0; i < compare_count; i++) {
358 if (segment.bytes[i] != other_segment.bytes[i]) {
359 return mismatch_position;
360 }
361 mismatch_position++;
362 }
363
364 // adjust loop variables
365 ptr = segment.next;
366 other_ptr = other_segment.next;
367 }
368 return count;
369}
370
371void Prefix::Serialize(const ART &art, MetaBlockWriter &writer) const {
372
373 writer.Write(element: count);
374
375 // write inlined data
376 if (IsInlined()) {
377 writer.WriteData(buffer: data.inlined, write_size: count);
378 return;
379 }
380
381 D_ASSERT(data.ptr.IsSet());
382 auto ptr = data.ptr;
383 auto remaining = count;
384
385 // iterate all prefix segments and write their bytes
386 while (ptr.IsSet()) {
387 auto &segment = PrefixSegment::Get(art, ptr);
388 auto copy_count = MinValue(a: Node::PREFIX_SEGMENT_SIZE, b: remaining);
389
390 // write the bytes
391 writer.WriteData(buffer: segment.bytes, write_size: copy_count);
392
393 // adjust loop variables
394 remaining -= copy_count;
395 ptr = segment.next;
396 }
397 D_ASSERT(remaining == 0);
398}
399
400void Prefix::Deserialize(ART &art, MetaBlockReader &reader) {
401
402 auto count_p = reader.Read<uint32_t>();
403
404 // copy into inlined data
405 if (count_p <= Node::PREFIX_INLINE_BYTES) {
406 reader.ReadData(buffer: data.inlined, read_size: count_p);
407 count = count_p;
408 return;
409 }
410
411 // copy into segments
412 count = 0;
413 reference<PrefixSegment> segment(PrefixSegment::New(art, node&: data.ptr));
414 for (idx_t i = 0; i < count_p; i++) {
415 segment = segment.get().Append(art, count, byte: reader.Read<uint8_t>());
416 }
417 D_ASSERT(count_p == count);
418}
419
420void Prefix::Vacuum(ART &art) {
421
422 if (IsInlined()) {
423 return;
424 }
425
426 // first pointer has special treatment because we don't obtain it from a prefix segment
427 auto &allocator = Node::GetAllocator(art, type: NType::PREFIX_SEGMENT);
428 if (allocator.NeedsVacuum(ptr: data.ptr)) {
429 data.ptr.SetPtr(allocator.VacuumPointer(ptr: data.ptr));
430 data.ptr.type = (uint8_t)NType::PREFIX_SEGMENT;
431 }
432
433 auto ptr = data.ptr;
434 while (ptr.IsSet()) {
435 auto &segment = PrefixSegment::Get(art, ptr);
436 ptr = segment.next;
437 if (ptr.IsSet() && allocator.NeedsVacuum(ptr)) {
438 segment.next.SetPtr(allocator.VacuumPointer(ptr));
439 segment.next.type = (uint8_t)NType::PREFIX_SEGMENT;
440 ptr = segment.next;
441 }
442 }
443}
444
445PrefixSegment &Prefix::MoveInlinedToSegment(ART &art) {
446
447 D_ASSERT(IsInlined());
448
449 Node ptr;
450 auto &segment = PrefixSegment::New(art, node&: ptr);
451
452 // move data
453 D_ASSERT(Node::PREFIX_SEGMENT_SIZE >= Node::PREFIX_INLINE_BYTES);
454 memcpy(dest: segment.bytes, src: data.inlined, n: count);
455 data.ptr = ptr;
456 return segment;
457}
458
459void Prefix::MoveSegmentToInlined(ART &art) {
460
461 D_ASSERT(IsInlined());
462 D_ASSERT(data.ptr.IsSet());
463
464 auto ptr = data.ptr;
465 auto &segment = PrefixSegment::Get(art, ptr: data.ptr);
466
467 memcpy(dest: data.inlined, src: segment.bytes, n: count);
468 Node::Free(art, node&: ptr);
469}
470
471} // namespace duckdb
472