1#include "duckdb/execution/index/art/art.hpp"
2
3#include "duckdb/common/radix.hpp"
4#include "duckdb/common/vector_operations/vector_operations.hpp"
5#include "duckdb/execution/expression_executor.hpp"
6#include "duckdb/storage/arena_allocator.hpp"
7#include "duckdb/execution/index/art/art_key.hpp"
8#include "duckdb/execution/index/art/prefix_segment.hpp"
9#include "duckdb/execution/index/art/leaf_segment.hpp"
10#include "duckdb/execution/index/art/prefix.hpp"
11#include "duckdb/execution/index/art/leaf.hpp"
12#include "duckdb/execution/index/art/node4.hpp"
13#include "duckdb/execution/index/art/node16.hpp"
14#include "duckdb/execution/index/art/node48.hpp"
15#include "duckdb/execution/index/art/node256.hpp"
16#include "duckdb/execution/index/art/iterator.hpp"
17#include "duckdb/common/types/conflict_manager.hpp"
18#include "duckdb/storage/table/scan_state.hpp"
19
20#include <algorithm>
21
22namespace duckdb {
23
24struct ARTIndexScanState : public IndexScanState {
25
26 //! Scan predicates (single predicate scan or range scan)
27 Value values[2];
28 //! Expressions of the scan predicates
29 ExpressionType expressions[2];
30 bool checked = false;
31 //! All scanned row IDs
32 vector<row_t> result_ids;
33 Iterator iterator;
34};
35
36ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
37 const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
38 AttachedDatabase &db, const idx_t block_id, const idx_t block_offset)
39
40 : Index(db, IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type) {
41
42 if (!Radix::IsLittleEndian()) {
43 throw NotImplementedException("ART indexes are not supported on big endian architectures");
44 }
45
46 // initialize all allocators
47 allocators.emplace_back(args: make_uniq<FixedSizeAllocator>(args: sizeof(PrefixSegment), args&: buffer_manager.GetBufferAllocator()));
48 allocators.emplace_back(args: make_uniq<FixedSizeAllocator>(args: sizeof(LeafSegment), args&: buffer_manager.GetBufferAllocator()));
49 allocators.emplace_back(args: make_uniq<FixedSizeAllocator>(args: sizeof(Leaf), args&: buffer_manager.GetBufferAllocator()));
50 allocators.emplace_back(args: make_uniq<FixedSizeAllocator>(args: sizeof(Node4), args&: buffer_manager.GetBufferAllocator()));
51 allocators.emplace_back(args: make_uniq<FixedSizeAllocator>(args: sizeof(Node16), args&: buffer_manager.GetBufferAllocator()));
52 allocators.emplace_back(args: make_uniq<FixedSizeAllocator>(args: sizeof(Node48), args&: buffer_manager.GetBufferAllocator()));
53 allocators.emplace_back(args: make_uniq<FixedSizeAllocator>(args: sizeof(Node256), args&: buffer_manager.GetBufferAllocator()));
54
55 // set the root node of the tree
56 tree = make_uniq<Node>();
57 if (block_id != DConstants::INVALID_INDEX) {
58 tree->buffer_id = block_id;
59 tree->offset = block_offset;
60 tree->Deserialize(art&: *this);
61 }
62 serialized_data_pointer = BlockPointer(block_id, block_offset);
63
64 // validate the types of the key columns
65 for (idx_t i = 0; i < types.size(); i++) {
66 switch (types[i]) {
67 case PhysicalType::BOOL:
68 case PhysicalType::INT8:
69 case PhysicalType::INT16:
70 case PhysicalType::INT32:
71 case PhysicalType::INT64:
72 case PhysicalType::INT128:
73 case PhysicalType::UINT8:
74 case PhysicalType::UINT16:
75 case PhysicalType::UINT32:
76 case PhysicalType::UINT64:
77 case PhysicalType::FLOAT:
78 case PhysicalType::DOUBLE:
79 case PhysicalType::VARCHAR:
80 break;
81 default:
82 throw InvalidTypeException(logical_types[i], "Invalid type for index key.");
83 }
84 }
85}
86
87ART::~ART() {
88 tree->Reset();
89}
90
91//===--------------------------------------------------------------------===//
92// Initialize Predicate Scans
93//===--------------------------------------------------------------------===//
94
95unique_ptr<IndexScanState> ART::InitializeScanSinglePredicate(const Transaction &transaction, const Value &value,
96 const ExpressionType expression_type) {
97 // initialize point lookup
98 auto result = make_uniq<ARTIndexScanState>();
99 result->values[0] = value;
100 result->expressions[0] = expression_type;
101 return std::move(result);
102}
103
104unique_ptr<IndexScanState> ART::InitializeScanTwoPredicates(const Transaction &transaction, const Value &low_value,
105 const ExpressionType low_expression_type,
106 const Value &high_value,
107 const ExpressionType high_expression_type) {
108 // initialize range lookup
109 auto result = make_uniq<ARTIndexScanState>();
110 result->values[0] = low_value;
111 result->expressions[0] = low_expression_type;
112 result->values[1] = high_value;
113 result->expressions[1] = high_expression_type;
114 return std::move(result);
115}
116
117//===--------------------------------------------------------------------===//
118// Keys
119//===--------------------------------------------------------------------===//
120
121template <class T>
122static void TemplatedGenerateKeys(ArenaAllocator &allocator, Vector &input, idx_t count, vector<ARTKey> &keys) {
123 UnifiedVectorFormat idata;
124 input.ToUnifiedFormat(count, data&: idata);
125
126 D_ASSERT(keys.size() >= count);
127 auto input_data = UnifiedVectorFormat::GetData<T>(idata);
128 for (idx_t i = 0; i < count; i++) {
129 auto idx = idata.sel->get_index(idx: i);
130 if (idata.validity.RowIsValid(row_idx: idx)) {
131 ARTKey::CreateARTKey<T>(allocator, input.GetType(), keys[i], input_data[idx]);
132 } else {
133 // we need to possibly reset the former key value in the keys vector
134 keys[i] = ARTKey();
135 }
136 }
137}
138
139template <class T>
140static void ConcatenateKeys(ArenaAllocator &allocator, Vector &input, idx_t count, vector<ARTKey> &keys) {
141 UnifiedVectorFormat idata;
142 input.ToUnifiedFormat(count, data&: idata);
143
144 auto input_data = UnifiedVectorFormat::GetData<T>(idata);
145 for (idx_t i = 0; i < count; i++) {
146 auto idx = idata.sel->get_index(idx: i);
147
148 // key is not NULL (no previous column entry was NULL)
149 if (!keys[i].Empty()) {
150 if (!idata.validity.RowIsValid(row_idx: idx)) {
151 // this column entry is NULL, set whole key to NULL
152 keys[i] = ARTKey();
153 } else {
154 auto other_key = ARTKey::CreateARTKey<T>(allocator, input.GetType(), input_data[idx]);
155 keys[i].ConcatenateARTKey(allocator, other_key);
156 }
157 }
158 }
159}
160
161void ART::GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<ARTKey> &keys) {
162 // generate keys for the first input column
163 switch (input.data[0].GetType().InternalType()) {
164 case PhysicalType::BOOL:
165 TemplatedGenerateKeys<bool>(allocator, input&: input.data[0], count: input.size(), keys);
166 break;
167 case PhysicalType::INT8:
168 TemplatedGenerateKeys<int8_t>(allocator, input&: input.data[0], count: input.size(), keys);
169 break;
170 case PhysicalType::INT16:
171 TemplatedGenerateKeys<int16_t>(allocator, input&: input.data[0], count: input.size(), keys);
172 break;
173 case PhysicalType::INT32:
174 TemplatedGenerateKeys<int32_t>(allocator, input&: input.data[0], count: input.size(), keys);
175 break;
176 case PhysicalType::INT64:
177 TemplatedGenerateKeys<int64_t>(allocator, input&: input.data[0], count: input.size(), keys);
178 break;
179 case PhysicalType::INT128:
180 TemplatedGenerateKeys<hugeint_t>(allocator, input&: input.data[0], count: input.size(), keys);
181 break;
182 case PhysicalType::UINT8:
183 TemplatedGenerateKeys<uint8_t>(allocator, input&: input.data[0], count: input.size(), keys);
184 break;
185 case PhysicalType::UINT16:
186 TemplatedGenerateKeys<uint16_t>(allocator, input&: input.data[0], count: input.size(), keys);
187 break;
188 case PhysicalType::UINT32:
189 TemplatedGenerateKeys<uint32_t>(allocator, input&: input.data[0], count: input.size(), keys);
190 break;
191 case PhysicalType::UINT64:
192 TemplatedGenerateKeys<uint64_t>(allocator, input&: input.data[0], count: input.size(), keys);
193 break;
194 case PhysicalType::FLOAT:
195 TemplatedGenerateKeys<float>(allocator, input&: input.data[0], count: input.size(), keys);
196 break;
197 case PhysicalType::DOUBLE:
198 TemplatedGenerateKeys<double>(allocator, input&: input.data[0], count: input.size(), keys);
199 break;
200 case PhysicalType::VARCHAR:
201 TemplatedGenerateKeys<string_t>(allocator, input&: input.data[0], count: input.size(), keys);
202 break;
203 default:
204 throw InternalException("Invalid type for index");
205 }
206
207 for (idx_t i = 1; i < input.ColumnCount(); i++) {
208 // for each of the remaining columns, concatenate
209 switch (input.data[i].GetType().InternalType()) {
210 case PhysicalType::BOOL:
211 ConcatenateKeys<bool>(allocator, input&: input.data[i], count: input.size(), keys);
212 break;
213 case PhysicalType::INT8:
214 ConcatenateKeys<int8_t>(allocator, input&: input.data[i], count: input.size(), keys);
215 break;
216 case PhysicalType::INT16:
217 ConcatenateKeys<int16_t>(allocator, input&: input.data[i], count: input.size(), keys);
218 break;
219 case PhysicalType::INT32:
220 ConcatenateKeys<int32_t>(allocator, input&: input.data[i], count: input.size(), keys);
221 break;
222 case PhysicalType::INT64:
223 ConcatenateKeys<int64_t>(allocator, input&: input.data[i], count: input.size(), keys);
224 break;
225 case PhysicalType::INT128:
226 ConcatenateKeys<hugeint_t>(allocator, input&: input.data[i], count: input.size(), keys);
227 break;
228 case PhysicalType::UINT8:
229 ConcatenateKeys<uint8_t>(allocator, input&: input.data[i], count: input.size(), keys);
230 break;
231 case PhysicalType::UINT16:
232 ConcatenateKeys<uint16_t>(allocator, input&: input.data[i], count: input.size(), keys);
233 break;
234 case PhysicalType::UINT32:
235 ConcatenateKeys<uint32_t>(allocator, input&: input.data[i], count: input.size(), keys);
236 break;
237 case PhysicalType::UINT64:
238 ConcatenateKeys<uint64_t>(allocator, input&: input.data[i], count: input.size(), keys);
239 break;
240 case PhysicalType::FLOAT:
241 ConcatenateKeys<float>(allocator, input&: input.data[i], count: input.size(), keys);
242 break;
243 case PhysicalType::DOUBLE:
244 ConcatenateKeys<double>(allocator, input&: input.data[i], count: input.size(), keys);
245 break;
246 case PhysicalType::VARCHAR:
247 ConcatenateKeys<string_t>(allocator, input&: input.data[i], count: input.size(), keys);
248 break;
249 default:
250 throw InternalException("Invalid type for index");
251 }
252 }
253}
254
255//===--------------------------------------------------------------------===//
256// Construct from sorted data (only during CREATE (UNIQUE) INDEX statements)
257//===--------------------------------------------------------------------===//
258
259struct KeySection {
260 KeySection(idx_t start_p, idx_t end_p, idx_t depth_p, data_t key_byte_p)
261 : start(start_p), end(end_p), depth(depth_p), key_byte(key_byte_p) {};
262 KeySection(idx_t start_p, idx_t end_p, vector<ARTKey> &keys, KeySection &key_section)
263 : start(start_p), end(end_p), depth(key_section.depth + 1), key_byte(keys[end_p].data[key_section.depth]) {};
264 idx_t start;
265 idx_t end;
266 idx_t depth;
267 data_t key_byte;
268};
269
270void GetChildSections(vector<KeySection> &child_sections, vector<ARTKey> &keys, KeySection &key_section) {
271
272 idx_t child_start_idx = key_section.start;
273 for (idx_t i = key_section.start + 1; i <= key_section.end; i++) {
274 if (keys[i - 1].data[key_section.depth] != keys[i].data[key_section.depth]) {
275 child_sections.emplace_back(args&: child_start_idx, args: i - 1, args&: keys, args&: key_section);
276 child_start_idx = i;
277 }
278 }
279 child_sections.emplace_back(args&: child_start_idx, args&: key_section.end, args&: keys, args&: key_section);
280}
281
282bool Construct(ART &art, vector<ARTKey> &keys, row_t *row_ids, Node &node, KeySection &key_section,
283 bool &has_constraint) {
284
285 D_ASSERT(key_section.start < keys.size());
286 D_ASSERT(key_section.end < keys.size());
287 D_ASSERT(key_section.start <= key_section.end);
288
289 auto &start_key = keys[key_section.start];
290 auto &end_key = keys[key_section.end];
291
292 // increment the depth until we reach a leaf or find a mismatching byte
293 auto prefix_start = key_section.depth;
294 while (start_key.len != key_section.depth && start_key.ByteMatches(other: end_key, depth: key_section.depth)) {
295 key_section.depth++;
296 }
297
298 // we reached a leaf, i.e. all the bytes of start_key and end_key match
299 if (start_key.len == key_section.depth) {
300 // end_idx is inclusive
301 auto num_row_ids = key_section.end - key_section.start + 1;
302
303 // check for possible constraint violation
304 auto single_row_id = num_row_ids == 1;
305 if (has_constraint && !single_row_id) {
306 return false;
307 }
308
309 if (single_row_id) {
310 Leaf::New(art, node, key: start_key, depth: prefix_start, row_id: row_ids[key_section.start]);
311 } else {
312 Leaf::New(art, node, key: start_key, depth: prefix_start, row_ids: row_ids + key_section.start, count: num_row_ids);
313 }
314 return true;
315 }
316
317 // create a new node and recurse
318
319 // we will find at least two child entries of this node, otherwise we'd have reached a leaf
320 vector<KeySection> child_sections;
321 GetChildSections(child_sections, keys, key_section);
322
323 auto node_type = Node::GetARTNodeTypeByCount(count: child_sections.size());
324 Node::New(art, node, type: node_type);
325
326 auto prefix_length = key_section.depth - prefix_start;
327 node.GetPrefix(art).Initialize(art, key: start_key, depth: prefix_start, count_p: prefix_length);
328
329 // recurse on each child section
330 for (auto &child_section : child_sections) {
331 Node new_child;
332 auto no_violation = Construct(art, keys, row_ids, node&: new_child, key_section&: child_section, has_constraint);
333 Node::InsertChild(art, node, byte: child_section.key_byte, child: new_child);
334 if (!no_violation) {
335 return false;
336 }
337 }
338 return true;
339}
340
341bool ART::ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_identifiers) {
342
343 // prepare the row_identifiers
344 row_identifiers.Flatten(count);
345 auto row_ids = FlatVector::GetData<row_t>(vector&: row_identifiers);
346
347 auto key_section = KeySection(0, count - 1, 0, 0);
348 auto has_constraint = IsUnique();
349 if (!Construct(art&: *this, keys, row_ids, node&: *this->tree, key_section, has_constraint)) {
350 return false;
351 }
352
353#ifdef DEBUG
354 D_ASSERT(!VerifyAndToStringInternal(true).empty());
355 for (idx_t i = 0; i < count; i++) {
356 D_ASSERT(!keys[i].Empty());
357 auto leaf_node = Lookup(*tree, keys[i], 0);
358 D_ASSERT(leaf_node.IsSet());
359 auto &leaf = Leaf::Get(*this, leaf_node);
360
361 if (leaf.IsInlined()) {
362 D_ASSERT(row_ids[i] == leaf.row_ids.inlined);
363 continue;
364 }
365
366 D_ASSERT(leaf.row_ids.ptr.IsSet());
367 Node leaf_segment = leaf.row_ids.ptr;
368 auto position = leaf.FindRowId(*this, leaf_segment, row_ids[i]);
369 D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
370 }
371#endif
372
373 return true;
374}
375
376//===--------------------------------------------------------------------===//
377// Insert / Verification / Constraint Checking
378//===--------------------------------------------------------------------===//
379PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
380
381 D_ASSERT(row_ids.GetType().InternalType() == ROW_TYPE);
382 D_ASSERT(logical_types[0] == input.data[0].GetType());
383
384 // generate the keys for the given input
385 ArenaAllocator arena_allocator(BufferAllocator::Get(db));
386 vector<ARTKey> keys(input.size());
387 GenerateKeys(allocator&: arena_allocator, input, keys);
388
389 // get the corresponding row IDs
390 row_ids.Flatten(count: input.size());
391 auto row_identifiers = FlatVector::GetData<row_t>(vector&: row_ids);
392
393 // now insert the elements into the index
394 idx_t failed_index = DConstants::INVALID_INDEX;
395 for (idx_t i = 0; i < input.size(); i++) {
396 if (keys[i].Empty()) {
397 continue;
398 }
399
400 row_t row_id = row_identifiers[i];
401 if (!Insert(node&: *tree, key: keys[i], depth: 0, row_id)) {
402 // failed to insert because of constraint violation
403 failed_index = i;
404 break;
405 }
406 }
407
408 // failed to insert because of constraint violation: remove previously inserted entries
409 if (failed_index != DConstants::INVALID_INDEX) {
410 for (idx_t i = 0; i < failed_index; i++) {
411 if (keys[i].Empty()) {
412 continue;
413 }
414 row_t row_id = row_identifiers[i];
415 Erase(node&: *tree, key: keys[i], depth: 0, row_id);
416 }
417 }
418
419 if (failed_index != DConstants::INVALID_INDEX) {
420 return PreservedError(ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicate key \"%s\"",
421 AppendRowError(input, index: failed_index)));
422 }
423
424#ifdef DEBUG
425 for (idx_t i = 0; i < input.size(); i++) {
426 if (keys[i].Empty()) {
427 continue;
428 }
429
430 auto leaf_node = Lookup(*tree, keys[i], 0);
431 D_ASSERT(leaf_node.IsSet());
432 auto &leaf = Leaf::Get(*this, leaf_node);
433
434 if (leaf.IsInlined()) {
435 D_ASSERT(row_identifiers[i] == leaf.row_ids.inlined);
436 continue;
437 }
438
439 D_ASSERT(leaf.row_ids.ptr.IsSet());
440 Node leaf_segment = leaf.row_ids.ptr;
441 auto position = leaf.FindRowId(*this, leaf_segment, row_identifiers[i]);
442 D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
443 }
444#endif
445
446 return PreservedError();
447}
448
449PreservedError ART::Append(IndexLock &lock, DataChunk &appended_data, Vector &row_identifiers) {
450 DataChunk expression_result;
451 expression_result.Initialize(allocator&: Allocator::DefaultAllocator(), types: logical_types);
452
453 // first resolve the expressions for the index
454 ExecuteExpressions(input&: appended_data, result&: expression_result);
455
456 // now insert into the index
457 return Insert(lock, input&: expression_result, row_ids&: row_identifiers);
458}
459
460void ART::VerifyAppend(DataChunk &chunk) {
461 ConflictManager conflict_manager(VerifyExistenceType::APPEND, chunk.size());
462 CheckConstraintsForChunk(input&: chunk, conflict_manager);
463}
464
465void ART::VerifyAppend(DataChunk &chunk, ConflictManager &conflict_manager) {
466 D_ASSERT(conflict_manager.LookupType() == VerifyExistenceType::APPEND);
467 CheckConstraintsForChunk(input&: chunk, conflict_manager);
468}
469
470bool ART::InsertToLeaf(Node &leaf_node, const row_t &row_id) {
471
472 auto &leaf = Leaf::Get(art: *this, ptr: leaf_node);
473
474#ifdef DEBUG
475 for (idx_t k = 0; k < leaf.count; k++) {
476 D_ASSERT(leaf.GetRowId(*this, k) != row_id);
477 }
478#endif
479 if (IsUnique() && leaf.count != 0) {
480 return false;
481 }
482 leaf.Insert(art&: *this, row_id);
483 return true;
484}
485
486bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id) {
487
488 if (!node.IsSet()) {
489 // node is currently empty, create a leaf here with the key
490 Leaf::New(art&: *this, node, key, depth, row_id);
491 return true;
492 }
493
494 if (node.DecodeARTNodeType() == NType::LEAF) {
495
496 // add a row ID to a leaf, if they have the same key
497 auto &leaf = Leaf::Get(art: *this, ptr: node);
498 auto mismatch_position = leaf.prefix.KeyMismatchPosition(art: *this, key, depth);
499 if (mismatch_position == leaf.prefix.count && depth + leaf.prefix.count == key.len) {
500 return InsertToLeaf(leaf_node&: node, row_id);
501 }
502
503 // replace leaf with Node4 and store both leaves in it
504 auto old_node = node;
505 auto &new_n4 = Node4::New(art&: *this, node);
506 new_n4.prefix.Initialize(art&: *this, key, depth, count_p: mismatch_position);
507
508 auto key_byte = old_node.GetPrefix(art&: *this).Reduce(art&: *this, reduce_count: mismatch_position);
509 Node4::InsertChild(art&: *this, node, byte: key_byte, child: old_node);
510
511 Node leaf_node;
512 Leaf::New(art&: *this, node&: leaf_node, key, depth: depth + mismatch_position + 1, row_id);
513 Node4::InsertChild(art&: *this, node, byte: key[depth + mismatch_position], child: leaf_node);
514
515 return true;
516 }
517
518 // handle prefix of inner node
519 auto &old_node_prefix = node.GetPrefix(art&: *this);
520 if (old_node_prefix.count) {
521
522 auto mismatch_position = old_node_prefix.KeyMismatchPosition(art: *this, key, depth);
523 if (mismatch_position != old_node_prefix.count) {
524
525 // prefix differs, create new node
526 auto old_node = node;
527 auto &new_n4 = Node4::New(art&: *this, node);
528 new_n4.prefix.Initialize(art&: *this, key, depth, count_p: mismatch_position);
529
530 auto key_byte = old_node_prefix.Reduce(art&: *this, reduce_count: mismatch_position);
531 Node4::InsertChild(art&: *this, node, byte: key_byte, child: old_node);
532
533 Node leaf_node;
534 Leaf::New(art&: *this, node&: leaf_node, key, depth: depth + mismatch_position + 1, row_id);
535 Node4::InsertChild(art&: *this, node, byte: key[depth + mismatch_position], child: leaf_node);
536
537 return true;
538 }
539 depth += node.GetPrefix(art&: *this).count;
540 }
541
542 // recurse
543 D_ASSERT(depth < key.len);
544 auto child = node.GetChild(art&: *this, byte: key[depth]);
545 if (child) {
546 bool success = Insert(node&: *child, key, depth: depth + 1, row_id);
547 node.ReplaceChild(art: *this, byte: key[depth], child: *child);
548 return success;
549 }
550
551 // insert at position
552 Node leaf_node;
553 Leaf::New(art&: *this, node&: leaf_node, key, depth: depth + 1, row_id);
554 Node::InsertChild(art&: *this, node, byte: key[depth], child: leaf_node);
555 return true;
556}
557
558//===--------------------------------------------------------------------===//
559// Delete
560//===--------------------------------------------------------------------===//
561
562void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
563
564 DataChunk expression;
565 expression.Initialize(allocator&: Allocator::DefaultAllocator(), types: logical_types);
566
567 // first resolve the expressions
568 ExecuteExpressions(input, result&: expression);
569
570 // then generate the keys for the given input
571 ArenaAllocator arena_allocator(BufferAllocator::Get(db));
572 vector<ARTKey> keys(expression.size());
573 GenerateKeys(allocator&: arena_allocator, input&: expression, keys);
574
575 // now erase the elements from the database
576 row_ids.Flatten(count: input.size());
577 auto row_identifiers = FlatVector::GetData<row_t>(vector&: row_ids);
578
579 for (idx_t i = 0; i < input.size(); i++) {
580 if (keys[i].Empty()) {
581 continue;
582 }
583 Erase(node&: *tree, key: keys[i], depth: 0, row_id: row_identifiers[i]);
584 }
585
586#ifdef DEBUG
587 // verify that we removed all row IDs
588 for (idx_t i = 0; i < input.size(); i++) {
589 if (keys[i].Empty()) {
590 continue;
591 }
592
593 auto node = Lookup(*tree, keys[i], 0);
594 if (node.IsSet()) {
595 auto &leaf = Leaf::Get(*this, node);
596
597 if (leaf.IsInlined()) {
598 D_ASSERT(row_identifiers[i] != leaf.row_ids.inlined);
599 continue;
600 }
601
602 D_ASSERT(leaf.row_ids.ptr.IsSet());
603 Node leaf_segment = leaf.row_ids.ptr;
604 auto position = leaf.FindRowId(*this, leaf_segment, row_identifiers[i]);
605 D_ASSERT(position == (uint32_t)DConstants::INVALID_INDEX);
606 }
607 }
608#endif
609}
610
611void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id) {
612
613 if (!node.IsSet()) {
614 return;
615 }
616
617 // delete a row ID from a leaf
618 if (node.DecodeARTNodeType() == NType::LEAF) {
619 auto &leaf = Leaf::Get(art: *this, ptr: node);
620 leaf.Remove(art&: *this, row_id);
621
622 if (leaf.count == 0) {
623 Node::Free(art&: *this, node);
624 node.Reset();
625 }
626 return;
627 }
628
629 // handle prefix
630 auto &node_prefix = node.GetPrefix(art&: *this);
631 if (node_prefix.count) {
632 if (node_prefix.KeyMismatchPosition(art: *this, key, depth) != node_prefix.count) {
633 return;
634 }
635 depth += node_prefix.count;
636 }
637
638 auto child = node.GetChild(art&: *this, byte: key[depth]);
639 if (child) {
640 D_ASSERT(child->IsSet());
641
642 if (child->DecodeARTNodeType() == NType::LEAF) {
643 // leaf found, remove entry
644 auto &leaf = Leaf::Get(art: *this, ptr: *child);
645 leaf.Remove(art&: *this, row_id);
646
647 if (leaf.count == 0) {
648 // leaf is empty, delete leaf, decrement node counter and maybe shrink node
649 Node::DeleteChild(art&: *this, node, byte: key[depth]);
650 }
651 return;
652 }
653
654 // recurse
655 Erase(node&: *child, key, depth: depth + 1, row_id);
656 node.ReplaceChild(art: *this, byte: key[depth], child: *child);
657 }
658}
659
660//===--------------------------------------------------------------------===//
661// Point Query (Equal)
662//===--------------------------------------------------------------------===//
663
664static ARTKey CreateKey(ArenaAllocator &allocator, PhysicalType type, Value &value) {
665 D_ASSERT(type == value.type().InternalType());
666 switch (type) {
667 case PhysicalType::BOOL:
668 return ARTKey::CreateARTKey<bool>(allocator, type: value.type(), element: value);
669 case PhysicalType::INT8:
670 return ARTKey::CreateARTKey<int8_t>(allocator, type: value.type(), element: value);
671 case PhysicalType::INT16:
672 return ARTKey::CreateARTKey<int16_t>(allocator, type: value.type(), element: value);
673 case PhysicalType::INT32:
674 return ARTKey::CreateARTKey<int32_t>(allocator, type: value.type(), element: value);
675 case PhysicalType::INT64:
676 return ARTKey::CreateARTKey<int64_t>(allocator, type: value.type(), element: value);
677 case PhysicalType::UINT8:
678 return ARTKey::CreateARTKey<uint8_t>(allocator, type: value.type(), element: value);
679 case PhysicalType::UINT16:
680 return ARTKey::CreateARTKey<uint16_t>(allocator, type: value.type(), element: value);
681 case PhysicalType::UINT32:
682 return ARTKey::CreateARTKey<uint32_t>(allocator, type: value.type(), element: value);
683 case PhysicalType::UINT64:
684 return ARTKey::CreateARTKey<uint64_t>(allocator, type: value.type(), element: value);
685 case PhysicalType::INT128:
686 return ARTKey::CreateARTKey<hugeint_t>(allocator, type: value.type(), element: value);
687 case PhysicalType::FLOAT:
688 return ARTKey::CreateARTKey<float>(allocator, type: value.type(), element: value);
689 case PhysicalType::DOUBLE:
690 return ARTKey::CreateARTKey<double>(allocator, type: value.type(), element: value);
691 case PhysicalType::VARCHAR:
692 return ARTKey::CreateARTKey<string_t>(allocator, type: value.type(), element: value);
693 default:
694 throw InternalException("Invalid type for the ART key");
695 }
696}
697
698bool ART::SearchEqual(ARTKey &key, idx_t max_count, vector<row_t> &result_ids) {
699
700 auto leaf_node = Lookup(node: *tree, key, depth: 0);
701 if (!leaf_node.IsSet()) {
702 return true;
703 }
704
705 auto &leaf = Leaf::Get(art: *this, ptr: leaf_node);
706 if (leaf.count > max_count) {
707 return false;
708 }
709 for (idx_t i = 0; i < leaf.count; i++) {
710 row_t row_id = leaf.GetRowId(art: *this, position: i);
711 result_ids.push_back(x: row_id);
712 }
713 return true;
714}
715
716void ART::SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size) {
717
718 // we need to look for a leaf
719 auto leaf_node = Lookup(node: *tree, key, depth: 0);
720 if (!leaf_node.IsSet()) {
721 result_size = 0;
722 return;
723 }
724
725 auto &leaf = Leaf::Get(art: *this, ptr: leaf_node);
726 result_size = leaf.count;
727}
728
729//===--------------------------------------------------------------------===//
730// Lookup
731//===--------------------------------------------------------------------===//
732
733Node ART::Lookup(Node node, const ARTKey &key, idx_t depth) {
734
735 while (node.IsSet()) {
736 if (node.DecodeARTNodeType() == NType::LEAF) {
737 auto &leaf = Leaf::Get(art: *this, ptr: node);
738
739 // check if leaf contains key
740 for (idx_t i = 0; i < leaf.prefix.count; i++) {
741 if (leaf.prefix.GetByte(art: *this, position: i) != key[i + depth]) {
742 return Node();
743 }
744 }
745 return node;
746 }
747 auto &node_prefix = node.GetPrefix(art&: *this);
748 if (node_prefix.count) {
749 for (idx_t pos = 0; pos < node_prefix.count; pos++) {
750 if (key[depth + pos] != node_prefix.GetByte(art: *this, position: pos)) {
751 // prefix mismatch, subtree of node does not contain key
752 return Node();
753 }
754 }
755 depth += node_prefix.count;
756 }
757
758 // prefix matches key, but no child at byte, does not contain key
759 auto child = node.GetChild(art&: *this, byte: key[depth]);
760 if (!child) {
761 return Node();
762 }
763
764 // recurse into child
765 node = *child;
766 D_ASSERT(node.IsSet());
767 depth++;
768 }
769
770 return Node();
771}
772
773//===--------------------------------------------------------------------===//
774// Greater Than
775// Returns: True (If found leaf >= key)
776// False (Otherwise)
777//===--------------------------------------------------------------------===//
778
779bool ART::SearchGreater(ARTIndexScanState &state, ARTKey &key, bool inclusive, idx_t max_count,
780 vector<row_t> &result_ids) {
781
782 auto &it = state.iterator;
783
784 // greater than scan: first set the iterator to the node at which we will start our scan by finding the lowest node
785 // that satisfies our requirement
786 if (!it.art) {
787 it.art = this;
788 if (!it.LowerBound(node: *tree, key, is_inclusive: inclusive)) {
789 return true;
790 }
791 }
792
793 // after that we continue the scan; we don't need to check the bounds as any value following this value is
794 // automatically bigger and hence satisfies our predicate
795 ARTKey empty_key = ARTKey();
796 return it.Scan(key: empty_key, max_count, result_ids, is_inclusive: false);
797}
798
799//===--------------------------------------------------------------------===//
800// Less Than
801//===--------------------------------------------------------------------===//
802
803bool ART::SearchLess(ARTIndexScanState &state, ARTKey &upper_bound, bool inclusive, idx_t max_count,
804 vector<row_t> &result_ids) {
805
806 if (!tree->IsSet()) {
807 return true;
808 }
809
810 auto &it = state.iterator;
811
812 if (!it.art) {
813 it.art = this;
814 // first find the minimum value in the ART: we start scanning from this value
815 it.FindMinimum(node&: *tree);
816 // early out min value higher than upper bound query
817 if (it.cur_key > upper_bound) {
818 return true;
819 }
820 }
821
822 // now continue the scan until we reach the upper bound
823 return it.Scan(key: upper_bound, max_count, result_ids, is_inclusive: inclusive);
824}
825
826//===--------------------------------------------------------------------===//
827// Closed Range Query
828//===--------------------------------------------------------------------===//
829
830bool ART::SearchCloseRange(ARTIndexScanState &state, ARTKey &lower_bound, ARTKey &upper_bound, bool left_inclusive,
831 bool right_inclusive, idx_t max_count, vector<row_t> &result_ids) {
832 auto &it = state.iterator;
833
834 // first find the first node that satisfies the left predicate
835 if (!it.art) {
836 it.art = this;
837 if (!it.LowerBound(node: *tree, key: lower_bound, is_inclusive: left_inclusive)) {
838 return true;
839 }
840 }
841
842 // now continue the scan until we reach the upper bound
843 return it.Scan(key: upper_bound, max_count, result_ids, is_inclusive: right_inclusive);
844}
845
846bool ART::Scan(const Transaction &transaction, const DataTable &table, IndexScanState &table_state,
847 const idx_t max_count, vector<row_t> &result_ids) {
848 auto &state = table_state.Cast<ARTIndexScanState>();
849 vector<row_t> row_ids;
850 bool success;
851
852 // FIXME: the key directly owning the data for a single key might be more efficient
853 D_ASSERT(state.values[0].type().InternalType() == types[0]);
854 ArenaAllocator arena_allocator(Allocator::Get(db));
855 auto key = CreateKey(allocator&: arena_allocator, type: types[0], value&: state.values[0]);
856
857 if (state.values[1].IsNull()) {
858
859 // single predicate
860 lock_guard<mutex> l(lock);
861 switch (state.expressions[0]) {
862 case ExpressionType::COMPARE_EQUAL:
863 success = SearchEqual(key, max_count, result_ids&: row_ids);
864 break;
865 case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
866 success = SearchGreater(state, key, inclusive: true, max_count, result_ids&: row_ids);
867 break;
868 case ExpressionType::COMPARE_GREATERTHAN:
869 success = SearchGreater(state, key, inclusive: false, max_count, result_ids&: row_ids);
870 break;
871 case ExpressionType::COMPARE_LESSTHANOREQUALTO:
872 success = SearchLess(state, upper_bound&: key, inclusive: true, max_count, result_ids&: row_ids);
873 break;
874 case ExpressionType::COMPARE_LESSTHAN:
875 success = SearchLess(state, upper_bound&: key, inclusive: false, max_count, result_ids&: row_ids);
876 break;
877 default:
878 throw InternalException("Operation not implemented");
879 }
880
881 } else {
882
883 // two predicates
884 lock_guard<mutex> l(lock);
885
886 D_ASSERT(state.values[1].type().InternalType() == types[0]);
887 auto upper_bound = CreateKey(allocator&: arena_allocator, type: types[0], value&: state.values[1]);
888
889 bool left_inclusive = state.expressions[0] == ExpressionType ::COMPARE_GREATERTHANOREQUALTO;
890 bool right_inclusive = state.expressions[1] == ExpressionType ::COMPARE_LESSTHANOREQUALTO;
891 success = SearchCloseRange(state, lower_bound&: key, upper_bound, left_inclusive, right_inclusive, max_count, result_ids&: row_ids);
892 }
893
894 if (!success) {
895 return false;
896 }
897 if (row_ids.empty()) {
898 return true;
899 }
900
901 // sort the row ids
902 sort(first: row_ids.begin(), last: row_ids.end());
903 // duplicate eliminate the row ids and append them to the row ids of the state
904 result_ids.reserve(n: row_ids.size());
905
906 result_ids.push_back(x: row_ids[0]);
907 for (idx_t i = 1; i < row_ids.size(); i++) {
908 if (row_ids[i] != row_ids[i - 1]) {
909 result_ids.push_back(x: row_ids[i]);
910 }
911 }
912 return true;
913}
914
915//===--------------------------------------------------------------------===//
916// More Verification / Constraint Checking
917//===--------------------------------------------------------------------===//
918
919string ART::GenerateErrorKeyName(DataChunk &input, idx_t row) {
920
921 // FIXME: why exactly can we not pass the expression_chunk as an argument to this
922 // FIXME: function instead of re-executing?
923 // re-executing the expressions is not very fast, but we're going to throw, so we don't care
924 DataChunk expression_chunk;
925 expression_chunk.Initialize(allocator&: Allocator::DefaultAllocator(), types: logical_types);
926 ExecuteExpressions(input, result&: expression_chunk);
927
928 string key_name;
929 for (idx_t k = 0; k < expression_chunk.ColumnCount(); k++) {
930 if (k > 0) {
931 key_name += ", ";
932 }
933 key_name += unbound_expressions[k]->GetName() + ": " + expression_chunk.data[k].GetValue(index: row).ToString();
934 }
935 return key_name;
936}
937
938string ART::GenerateConstraintErrorMessage(VerifyExistenceType verify_type, const string &key_name) {
939 switch (verify_type) {
940 case VerifyExistenceType::APPEND: {
941 // APPEND to PK/UNIQUE table, but node/key already exists in PK/UNIQUE table
942 string type = IsPrimary() ? "primary key" : "unique";
943 return StringUtil::Format(
944 fmt_str: "Duplicate key \"%s\" violates %s constraint. "
945 "If this is an unexpected constraint violation please double "
946 "check with the known index limitations section in our documentation (docs - sql - indexes).",
947 params: key_name, params: type);
948 }
949 case VerifyExistenceType::APPEND_FK: {
950 // APPEND_FK to FK table, node/key does not exist in PK/UNIQUE table
951 return StringUtil::Format(
952 fmt_str: "Violates foreign key constraint because key \"%s\" does not exist in the referenced table", params: key_name);
953 }
954 case VerifyExistenceType::DELETE_FK: {
955 // DELETE_FK that still exists in a FK table, i.e., not a valid delete
956 return StringUtil::Format(fmt_str: "Violates foreign key constraint because key \"%s\" is still referenced by a foreign "
957 "key in a different table",
958 params: key_name);
959 }
960 default:
961 throw NotImplementedException("Type not implemented for VerifyExistenceType");
962 }
963}
964
965void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) {
966
967 // don't alter the index during constraint checking
968 lock_guard<mutex> l(lock);
969
970 // first resolve the expressions for the index
971 DataChunk expression_chunk;
972 expression_chunk.Initialize(allocator&: Allocator::DefaultAllocator(), types: logical_types);
973 ExecuteExpressions(input, result&: expression_chunk);
974
975 // generate the keys for the given input
976 ArenaAllocator arena_allocator(BufferAllocator::Get(db));
977 vector<ARTKey> keys(expression_chunk.size());
978 GenerateKeys(allocator&: arena_allocator, input&: expression_chunk, keys);
979
980 idx_t found_conflict = DConstants::INVALID_INDEX;
981 for (idx_t i = 0; found_conflict == DConstants::INVALID_INDEX && i < input.size(); i++) {
982
983 if (keys[i].Empty()) {
984 if (conflict_manager.AddNull(chunk_index: i)) {
985 found_conflict = i;
986 }
987 continue;
988 }
989
990 auto leaf_node = Lookup(node: *tree, key: keys[i], depth: 0);
991 if (!leaf_node.IsSet()) {
992 if (conflict_manager.AddMiss(chunk_index: i)) {
993 found_conflict = i;
994 }
995 continue;
996 }
997
998 // When we find a node, we need to update the 'matches' and 'row_ids'
999 // NOTE: Leafs can have more than one row_id, but for UNIQUE/PRIMARY KEY they will only have one
1000 Leaf &leaf = Leaf::Get(art: *this, ptr: leaf_node);
1001 D_ASSERT(leaf.count == 1);
1002 auto row_id = leaf.GetRowId(art: *this, position: 0);
1003 if (conflict_manager.AddHit(chunk_index: i, row_id)) {
1004 found_conflict = i;
1005 }
1006 }
1007
1008 conflict_manager.FinishLookup();
1009
1010 if (found_conflict == DConstants::INVALID_INDEX) {
1011 return;
1012 }
1013
1014 auto key_name = GenerateErrorKeyName(input, row: found_conflict);
1015 auto exception_msg = GenerateConstraintErrorMessage(verify_type: conflict_manager.LookupType(), key_name);
1016 throw ConstraintException(exception_msg);
1017}
1018
1019//===--------------------------------------------------------------------===//
1020// Serialization
1021//===--------------------------------------------------------------------===//
1022
1023BlockPointer ART::Serialize(MetaBlockWriter &writer) {
1024
1025 lock_guard<mutex> l(lock);
1026 if (tree->IsSet()) {
1027 serialized_data_pointer = tree->Serialize(art&: *this, writer);
1028 } else {
1029 serialized_data_pointer = {(block_id_t)DConstants::INVALID_INDEX, (uint32_t)DConstants::INVALID_INDEX};
1030 }
1031
1032 return serialized_data_pointer;
1033}
1034
1035//===--------------------------------------------------------------------===//
1036// Vacuum
1037//===--------------------------------------------------------------------===//
1038
1039void ART::InitializeVacuum(ARTFlags &flags) {
1040
1041 flags.vacuum_flags.reserve(n: allocators.size());
1042 for (auto &allocator : allocators) {
1043 flags.vacuum_flags.push_back(x: allocator->InitializeVacuum());
1044 }
1045}
1046
1047void ART::FinalizeVacuum(const ARTFlags &flags) {
1048
1049 for (idx_t i = 0; i < allocators.size(); i++) {
1050 if (flags.vacuum_flags[i]) {
1051 allocators[i]->FinalizeVacuum();
1052 }
1053 }
1054}
1055
1056void ART::Vacuum(IndexLock &state) {
1057
1058 if (!tree->IsSet()) {
1059 for (auto &allocator : allocators) {
1060 allocator->Reset();
1061 }
1062 return;
1063 }
1064
1065 // holds true, if an allocator needs a vacuum, and false otherwise
1066 ARTFlags flags;
1067 InitializeVacuum(flags);
1068
1069 // skip vacuum if no allocators require it
1070 auto perform_vacuum = false;
1071 for (const auto &vacuum_flag : flags.vacuum_flags) {
1072 if (vacuum_flag) {
1073 perform_vacuum = true;
1074 break;
1075 }
1076 }
1077 if (!perform_vacuum) {
1078 return;
1079 }
1080
1081 // traverse the allocated memory of the tree to perform a vacuum
1082 Node::Vacuum(art&: *this, node&: *tree, flags);
1083
1084 // finalize the vacuum operation
1085 FinalizeVacuum(flags);
1086
1087 for (auto &allocator : allocators) {
1088 allocator->Verify();
1089 }
1090}
1091
1092//===--------------------------------------------------------------------===//
1093// Merging
1094//===--------------------------------------------------------------------===//
1095
1096void ART::InitializeMerge(ARTFlags &flags) {
1097
1098 flags.merge_buffer_counts.reserve(n: allocators.size());
1099 for (auto &allocator : allocators) {
1100 flags.merge_buffer_counts.emplace_back(args: allocator->buffers.size());
1101 }
1102}
1103
1104bool ART::MergeIndexes(IndexLock &state, Index &other_index) {
1105
1106 auto &other_art = other_index.Cast<ART>();
1107 if (!other_art.tree->IsSet()) {
1108 return true;
1109 }
1110
1111 if (tree->IsSet()) {
1112 // fully deserialize other_index, and traverse it to increment its buffer IDs
1113 ARTFlags flags;
1114 InitializeMerge(flags);
1115 other_art.tree->InitializeMerge(art&: other_art, flags);
1116 }
1117
1118 // merge the node storage
1119 for (idx_t i = 0; i < allocators.size(); i++) {
1120 allocators[i]->Merge(other&: *other_art.allocators[i]);
1121 }
1122
1123 // merge the ARTs
1124 if (!tree->Merge(art&: *this, other&: *other_art.tree)) {
1125 return false;
1126 }
1127
1128 for (auto &allocator : allocators) {
1129 allocator->Verify();
1130 }
1131 return true;
1132}
1133
1134//===--------------------------------------------------------------------===//
1135// Utility
1136//===--------------------------------------------------------------------===//
1137
1138string ART::VerifyAndToString(IndexLock &state, const bool only_verify) {
1139 return VerifyAndToStringInternal(only_verify);
1140}
1141
1142string ART::VerifyAndToStringInternal(const bool only_verify) {
1143 if (tree->IsSet()) {
1144 return "ART: " + tree->VerifyAndToString(art&: *this, only_verify);
1145 }
1146 return "[empty]";
1147}
1148
1149} // namespace duckdb
1150