1 | #include "duckdb/execution/index/art/art.hpp" |
2 | #include "duckdb/execution/expression_executor.hpp" |
3 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
4 | #include <algorithm> |
5 | #include <ctgmath> |
6 | |
7 | using namespace duckdb; |
8 | using namespace std; |
9 | |
10 | ART::ART(vector<column_t> column_ids, vector<unique_ptr<Expression>> unbound_expressions, |
11 | bool is_unique) |
12 | : Index(IndexType::ART, column_ids, move(unbound_expressions)), is_unique(is_unique) { |
13 | tree = nullptr; |
14 | expression_result.Initialize(types); |
15 | int n = 1; |
16 | //! little endian if true |
17 | if (*(char *)&n == 1) { |
18 | is_little_endian = true; |
19 | } else { |
20 | is_little_endian = false; |
21 | } |
22 | switch (types[0]) { |
23 | case TypeId::BOOL: |
24 | case TypeId::INT8: |
25 | case TypeId::INT16: |
26 | case TypeId::INT32: |
27 | case TypeId::INT64: |
28 | case TypeId::FLOAT: |
29 | case TypeId::DOUBLE: |
30 | case TypeId::VARCHAR: |
31 | break; |
32 | default: |
33 | throw InvalidTypeException(types[0], "Invalid type for index" ); |
34 | } |
35 | } |
36 | |
37 | ART::~ART() { |
38 | } |
39 | |
40 | bool ART::LeafMatches(Node *node, Key &key, unsigned depth) { |
41 | auto leaf = static_cast<Leaf *>(node); |
42 | Key &leaf_key = *leaf->value; |
43 | for (idx_t i = depth; i < leaf_key.len; i++) { |
44 | if (leaf_key[i] != key[i]) { |
45 | return false; |
46 | } |
47 | } |
48 | |
49 | return true; |
50 | } |
51 | |
52 | unique_ptr<IndexScanState> ART::InitializeScanSinglePredicate(Transaction &transaction, vector<column_t> column_ids, |
53 | Value value, ExpressionType expression_type) { |
54 | auto result = make_unique<ARTIndexScanState>(column_ids); |
55 | result->values[0] = value; |
56 | result->expressions[0] = expression_type; |
57 | return move(result); |
58 | } |
59 | |
60 | unique_ptr<IndexScanState> ART::InitializeScanTwoPredicates(Transaction &transaction, vector<column_t> column_ids, |
61 | Value low_value, ExpressionType low_expression_type, |
62 | Value high_value, ExpressionType high_expression_type) { |
63 | auto result = make_unique<ARTIndexScanState>(column_ids); |
64 | result->values[0] = low_value; |
65 | result->expressions[0] = low_expression_type; |
66 | result->values[1] = high_value; |
67 | result->expressions[1] = high_expression_type; |
68 | return move(result); |
69 | } |
70 | |
71 | //===--------------------------------------------------------------------===// |
72 | // Insert |
73 | //===--------------------------------------------------------------------===// |
74 | template <class T> |
75 | static void generate_keys(Vector &input, idx_t count, vector<unique_ptr<Key>> &keys, bool is_little_endian) { |
76 | VectorData idata; |
77 | input.Orrify(count, idata); |
78 | |
79 | auto input_data = (T *)idata.data; |
80 | for (idx_t i = 0; i < count; i++) { |
81 | auto idx = idata.sel->get_index(i); |
82 | if ((*idata.nullmask)[idx]) { |
83 | keys.push_back(nullptr); |
84 | } else { |
85 | keys.push_back(Key::CreateKey<T>(input_data[idx], is_little_endian)); |
86 | } |
87 | } |
88 | } |
89 | |
90 | template <class T> |
91 | static void concatenate_keys(Vector &input, idx_t count, vector<unique_ptr<Key>> &keys, bool is_little_endian) { |
92 | VectorData idata; |
93 | input.Orrify(count, idata); |
94 | |
95 | auto input_data = (T *)idata.data; |
96 | for (idx_t i = 0; i < count; i++) { |
97 | auto idx = idata.sel->get_index(i); |
98 | if ((*idata.nullmask)[idx] || !keys[i]) { |
99 | // either this column is NULL, or the previous column is NULL! |
100 | keys[i] = nullptr; |
101 | } else { |
102 | // concatenate the keys |
103 | auto old_key = move(keys[i]); |
104 | auto new_key = Key::CreateKey<T>(input_data[idx], is_little_endian); |
105 | auto keyLen = old_key->len + new_key->len; |
106 | auto compound_data = unique_ptr<data_t[]>(new data_t[keyLen]); |
107 | memcpy(compound_data.get(), old_key->data.get(), old_key->len); |
108 | memcpy(compound_data.get() + old_key->len, new_key->data.get(), new_key->len); |
109 | keys[i] = make_unique<Key>(move(compound_data), keyLen); |
110 | } |
111 | } |
112 | } |
113 | |
114 | void ART::GenerateKeys(DataChunk &input, vector<unique_ptr<Key>> &keys) { |
115 | keys.reserve(STANDARD_VECTOR_SIZE); |
116 | // generate keys for the first input column |
117 | switch (input.data[0].type) { |
118 | case TypeId::BOOL: |
119 | generate_keys<bool>(input.data[0], input.size(), keys, is_little_endian); |
120 | break; |
121 | case TypeId::INT8: |
122 | generate_keys<int8_t>(input.data[0], input.size(), keys, is_little_endian); |
123 | break; |
124 | case TypeId::INT16: |
125 | generate_keys<int16_t>(input.data[0], input.size(), keys, is_little_endian); |
126 | break; |
127 | case TypeId::INT32: |
128 | generate_keys<int32_t>(input.data[0], input.size(), keys, is_little_endian); |
129 | break; |
130 | case TypeId::INT64: |
131 | generate_keys<int64_t>(input.data[0], input.size(), keys, is_little_endian); |
132 | break; |
133 | case TypeId::FLOAT: |
134 | generate_keys<float>(input.data[0], input.size(), keys, is_little_endian); |
135 | break; |
136 | case TypeId::DOUBLE: |
137 | generate_keys<double>(input.data[0], input.size(), keys, is_little_endian); |
138 | break; |
139 | case TypeId::VARCHAR: |
140 | generate_keys<string_t>(input.data[0], input.size(), keys, is_little_endian); |
141 | break; |
142 | default: |
143 | throw InvalidTypeException(input.data[0].type, "Invalid type for index" ); |
144 | } |
145 | for (idx_t i = 1; i < input.column_count(); i++) { |
146 | // for each of the remaining columns, concatenate |
147 | switch (input.data[i].type) { |
148 | case TypeId::BOOL: |
149 | concatenate_keys<bool>(input.data[i], input.size(), keys, is_little_endian); |
150 | break; |
151 | case TypeId::INT8: |
152 | concatenate_keys<int8_t>(input.data[i], input.size(), keys, is_little_endian); |
153 | break; |
154 | case TypeId::INT16: |
155 | concatenate_keys<int16_t>(input.data[i], input.size(), keys, is_little_endian); |
156 | break; |
157 | case TypeId::INT32: |
158 | concatenate_keys<int32_t>(input.data[i], input.size(), keys, is_little_endian); |
159 | break; |
160 | case TypeId::INT64: |
161 | concatenate_keys<int64_t>(input.data[i], input.size(), keys, is_little_endian); |
162 | break; |
163 | case TypeId::FLOAT: |
164 | concatenate_keys<float>(input.data[i], input.size(), keys, is_little_endian); |
165 | break; |
166 | case TypeId::DOUBLE: |
167 | concatenate_keys<double>(input.data[i], input.size(), keys, is_little_endian); |
168 | break; |
169 | case TypeId::VARCHAR: |
170 | concatenate_keys<string_t>(input.data[i], input.size(), keys, is_little_endian); |
171 | break; |
172 | default: |
173 | throw InvalidTypeException(input.data[0].type, "Invalid type for index" ); |
174 | } |
175 | } |
176 | } |
177 | |
178 | bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) { |
179 | assert(row_ids.type == ROW_TYPE); |
180 | assert(types[0] == input.data[0].type); |
181 | |
182 | // generate the keys for the given input |
183 | vector<unique_ptr<Key>> keys; |
184 | GenerateKeys(input, keys); |
185 | |
186 | // now insert the elements into the index |
187 | row_ids.Normalify(input.size()); |
188 | auto row_identifiers = FlatVector::GetData<row_t>(row_ids); |
189 | idx_t failed_index = INVALID_INDEX; |
190 | for (idx_t i = 0; i < input.size(); i++) { |
191 | if (!keys[i]) { |
192 | continue; |
193 | } |
194 | |
195 | row_t row_id = row_identifiers[i]; |
196 | if (!Insert(tree, move(keys[i]), 0, row_id)) { |
197 | // failed to insert because of constraint violation |
198 | failed_index = i; |
199 | break; |
200 | } |
201 | } |
202 | if (failed_index != INVALID_INDEX) { |
203 | // failed to insert because of constraint violation: remove previously inserted entries |
204 | // generate keys again |
205 | keys.clear(); |
206 | GenerateKeys(input, keys); |
207 | unique_ptr<Key> key; |
208 | |
209 | // now erase the entries |
210 | for (idx_t i = 0; i < failed_index; i++) { |
211 | if (!keys[i]) { |
212 | continue; |
213 | } |
214 | row_t row_id = row_identifiers[i]; |
215 | Erase(tree, *keys[i], 0, row_id); |
216 | } |
217 | return false; |
218 | } |
219 | return true; |
220 | } |
221 | |
222 | bool ART::Append(IndexLock &lock, DataChunk &appended_data, Vector &row_identifiers) { |
223 | // first resolve the expressions for the index |
224 | ExecuteExpressions(appended_data, expression_result); |
225 | |
226 | // now insert into the index |
227 | return Insert(lock, expression_result, row_identifiers); |
228 | } |
229 | |
230 | void ART::VerifyAppend(DataChunk &chunk) { |
231 | if (!is_unique) { |
232 | return; |
233 | } |
234 | // unique index, check |
235 | lock_guard<mutex> l(lock); |
236 | // first resolve the expressions for the index |
237 | ExecuteExpressions(chunk, expression_result); |
238 | |
239 | // generate the keys for the given input |
240 | vector<unique_ptr<Key>> keys; |
241 | GenerateKeys(expression_result, keys); |
242 | |
243 | for (idx_t i = 0; i < chunk.size(); i++) { |
244 | if (!keys[i]) { |
245 | continue; |
246 | } |
247 | if (Lookup(tree, *keys[i], 0) != nullptr) { |
248 | // node already exists in tree |
249 | throw ConstraintException("duplicate key value violates primary key or unique constraint" ); |
250 | } |
251 | } |
252 | } |
253 | |
254 | bool ART::InsertToLeaf(Leaf &leaf, row_t row_id) { |
255 | if (is_unique && leaf.num_elements != 0) { |
256 | return false; |
257 | } |
258 | leaf.Insert(row_id); |
259 | return true; |
260 | } |
261 | |
262 | bool ART::Insert(unique_ptr<Node> &node, unique_ptr<Key> value, unsigned depth, row_t row_id) { |
263 | Key &key = *value; |
264 | if (!node) { |
265 | // node is currently empty, create a leaf here with the key |
266 | node = make_unique<Leaf>(*this, move(value), row_id); |
267 | return true; |
268 | } |
269 | |
270 | if (node->type == NodeType::NLeaf) { |
271 | // Replace leaf with Node4 and store both leaves in it |
272 | auto leaf = static_cast<Leaf *>(node.get()); |
273 | |
274 | Key &existingKey = *leaf->value; |
275 | uint32_t newPrefixLength = 0; |
276 | // Leaf node is already there, update row_id vector |
277 | if (depth + newPrefixLength == existingKey.len && existingKey.len == key.len) { |
278 | return InsertToLeaf(*leaf, row_id); |
279 | } |
280 | while (existingKey[depth + newPrefixLength] == key[depth + newPrefixLength]) { |
281 | newPrefixLength++; |
282 | // Leaf node is already there, update row_id vector |
283 | if (depth + newPrefixLength == existingKey.len && existingKey.len == key.len) { |
284 | return InsertToLeaf(*leaf, row_id); |
285 | } |
286 | } |
287 | |
288 | unique_ptr<Node> newNode = make_unique<Node4>(*this, newPrefixLength); |
289 | newNode->prefix_length = newPrefixLength; |
290 | memcpy(newNode->prefix.get(), &key[depth], newPrefixLength); |
291 | Node4::insert(*this, newNode, existingKey[depth + newPrefixLength], node); |
292 | unique_ptr<Node> leaf_node = make_unique<Leaf>(*this, move(value), row_id); |
293 | Node4::insert(*this, newNode, key[depth + newPrefixLength], leaf_node); |
294 | node = move(newNode); |
295 | return true; |
296 | } |
297 | |
298 | // Handle prefix of inner node |
299 | if (node->prefix_length) { |
300 | uint32_t mismatchPos = Node::PrefixMismatch(*this, node.get(), key, depth); |
301 | if (mismatchPos != node->prefix_length) { |
302 | // Prefix differs, create new node |
303 | unique_ptr<Node> newNode = make_unique<Node4>(*this, mismatchPos); |
304 | newNode->prefix_length = mismatchPos; |
305 | memcpy(newNode->prefix.get(), node->prefix.get(), mismatchPos); |
306 | // Break up prefix |
307 | auto node_ptr = node.get(); |
308 | Node4::insert(*this, newNode, node->prefix[mismatchPos], node); |
309 | node_ptr->prefix_length -= (mismatchPos + 1); |
310 | memmove(node_ptr->prefix.get(), node_ptr->prefix.get() + mismatchPos + 1, node_ptr->prefix_length); |
311 | unique_ptr<Node> leaf_node = make_unique<Leaf>(*this, move(value), row_id); |
312 | Node4::insert(*this, newNode, key[depth + mismatchPos], leaf_node); |
313 | node = move(newNode); |
314 | return true; |
315 | } |
316 | depth += node->prefix_length; |
317 | } |
318 | |
319 | // Recurse |
320 | idx_t pos = node->GetChildPos(key[depth]); |
321 | if (pos != INVALID_INDEX) { |
322 | auto child = node->GetChild(pos); |
323 | return Insert(*child, move(value), depth + 1, row_id); |
324 | } |
325 | unique_ptr<Node> newNode = make_unique<Leaf>(*this, move(value), row_id); |
326 | Node::InsertLeaf(*this, node, key[depth], newNode); |
327 | return true; |
328 | } |
329 | |
330 | //===--------------------------------------------------------------------===// |
331 | // Delete |
332 | //===--------------------------------------------------------------------===// |
333 | void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) { |
334 | // first resolve the expressions |
335 | ExecuteExpressions(input, expression_result); |
336 | |
337 | // then generate the keys for the given input |
338 | vector<unique_ptr<Key>> keys; |
339 | GenerateKeys(expression_result, keys); |
340 | |
341 | // now erase the elements from the database |
342 | row_ids.Normalify(input.size()); |
343 | auto row_identifiers = FlatVector::GetData<row_t>(row_ids); |
344 | |
345 | for (idx_t i = 0; i < input.size(); i++) { |
346 | if (!keys[i]) { |
347 | continue; |
348 | } |
349 | Erase(tree, *keys[i], 0, row_identifiers[i]); |
350 | } |
351 | } |
352 | |
353 | void ART::Erase(unique_ptr<Node> &node, Key &key, unsigned depth, row_t row_id) { |
354 | if (!node) { |
355 | return; |
356 | } |
357 | // Delete a leaf from a tree |
358 | if (node->type == NodeType::NLeaf) { |
359 | // Make sure we have the right leaf |
360 | if (ART::LeafMatches(node.get(), key, depth)) { |
361 | auto leaf = static_cast<Leaf *>(node.get()); |
362 | leaf->Remove(row_id); |
363 | if (leaf->num_elements == 0) { |
364 | node.reset(); |
365 | } |
366 | } |
367 | return; |
368 | } |
369 | |
370 | // Handle prefix |
371 | if (node->prefix_length) { |
372 | if (Node::PrefixMismatch(*this, node.get(), key, depth) != node->prefix_length) { |
373 | return; |
374 | } |
375 | depth += node->prefix_length; |
376 | } |
377 | idx_t pos = node->GetChildPos(key[depth]); |
378 | if (pos != INVALID_INDEX) { |
379 | auto child = node->GetChild(pos); |
380 | assert(child); |
381 | |
382 | unique_ptr<Node> &child_ref = *child; |
383 | if (child_ref->type == NodeType::NLeaf && LeafMatches(child_ref.get(), key, depth)) { |
384 | // Leaf found, remove entry |
385 | auto leaf = static_cast<Leaf *>(child_ref.get()); |
386 | leaf->Remove(row_id); |
387 | if (leaf->num_elements == 0) { |
388 | // Leaf is empty, delete leaf, decrement node counter and maybe shrink node |
389 | Node::Erase(*this, node, pos); |
390 | } |
391 | } else { |
392 | // Recurse |
393 | Erase(*child, key, depth + 1, row_id); |
394 | } |
395 | } |
396 | } |
397 | |
398 | //===--------------------------------------------------------------------===// |
399 | // Point Query |
400 | //===--------------------------------------------------------------------===// |
401 | static unique_ptr<Key> CreateKey(ART &art, TypeId type, Value &value) { |
402 | assert(type == value.type); |
403 | switch (type) { |
404 | case TypeId::BOOL: |
405 | return Key::CreateKey<bool>(value.value_.boolean, art.is_little_endian); |
406 | case TypeId::INT8: |
407 | return Key::CreateKey<int8_t>(value.value_.tinyint, art.is_little_endian); |
408 | case TypeId::INT16: |
409 | return Key::CreateKey<int16_t>(value.value_.smallint, art.is_little_endian); |
410 | case TypeId::INT32: |
411 | return Key::CreateKey<int32_t>(value.value_.integer, art.is_little_endian); |
412 | case TypeId::INT64: |
413 | return Key::CreateKey<int64_t>(value.value_.bigint, art.is_little_endian); |
414 | case TypeId::FLOAT: |
415 | return Key::CreateKey<float>(value.value_.float_, art.is_little_endian); |
416 | case TypeId::DOUBLE: |
417 | return Key::CreateKey<double>(value.value_.double_, art.is_little_endian); |
418 | case TypeId::VARCHAR: |
419 | return Key::CreateKey<string_t>(string_t(value.str_value.c_str(), value.str_value.size()), |
420 | art.is_little_endian); |
421 | default: |
422 | throw InvalidTypeException(type, "Invalid type for index" ); |
423 | } |
424 | } |
425 | |
426 | void ART::SearchEqual(vector<row_t> &result_ids, ARTIndexScanState *state) { |
427 | unique_ptr<Key> key = CreateKey(*this, types[0], state->values[0]); |
428 | auto leaf = static_cast<Leaf *>(Lookup(tree, *key, 0)); |
429 | if (!leaf) { |
430 | return; |
431 | } |
432 | for (idx_t i = 0; i < leaf->num_elements; i++) { |
433 | row_t row_id = leaf->GetRowId(i); |
434 | result_ids.push_back(row_id); |
435 | } |
436 | } |
437 | |
438 | Node *ART::Lookup(unique_ptr<Node> &node, Key &key, unsigned depth) { |
439 | auto node_val = node.get(); |
440 | |
441 | while (node_val) { |
442 | if (node_val->type == NodeType::NLeaf) { |
443 | auto leaf = static_cast<Leaf *>(node_val); |
444 | Key &leafKey = *leaf->value; |
445 | //! Check leaf |
446 | for (idx_t i = depth; i < leafKey.len; i++) { |
447 | if (leafKey[i] != key[i]) { |
448 | return nullptr; |
449 | } |
450 | } |
451 | return node_val; |
452 | } |
453 | if (node_val->prefix_length) { |
454 | for (idx_t pos = 0; pos < node_val->prefix_length; pos++) { |
455 | if (key[depth + pos] != node_val->prefix[pos]) { |
456 | return nullptr; |
457 | } |
458 | } |
459 | depth += node_val->prefix_length; |
460 | } |
461 | idx_t pos = node_val->GetChildPos(key[depth]); |
462 | if (pos == INVALID_INDEX) { |
463 | return nullptr; |
464 | } |
465 | node_val = node_val->GetChild(pos)->get(); |
466 | assert(node_val); |
467 | |
468 | depth++; |
469 | } |
470 | |
471 | return nullptr; |
472 | } |
473 | |
474 | //===--------------------------------------------------------------------===// |
475 | // Iterator scans |
476 | //===--------------------------------------------------------------------===// |
477 | template <bool HAS_BOUND, bool INCLUSIVE> |
478 | void ART::IteratorScan(ARTIndexScanState *state, Iterator *it, vector<row_t> &result_ids, Key *bound) { |
479 | bool has_next; |
480 | do { |
481 | if (HAS_BOUND) { |
482 | assert(bound); |
483 | if (INCLUSIVE) { |
484 | if (*it->node->value > *bound) { |
485 | break; |
486 | } |
487 | } else { |
488 | if (*it->node->value >= *bound) { |
489 | break; |
490 | } |
491 | } |
492 | } |
493 | for (idx_t i = 0; i < it->node->num_elements; i++) { |
494 | row_t row_id = it->node->GetRowId(i); |
495 | result_ids.push_back(row_id); |
496 | } |
497 | has_next = ART::IteratorNext(*it); |
498 | } while (has_next); |
499 | } |
500 | |
501 | bool ART::IteratorNext(Iterator &it) { |
502 | // Skip leaf |
503 | if ((it.depth) && ((it.stack[it.depth - 1].node)->type == NodeType::NLeaf)) { |
504 | it.depth--; |
505 | } |
506 | |
507 | // Look for the next leaf |
508 | while (it.depth > 0) { |
509 | auto &top = it.stack[it.depth - 1]; |
510 | Node *node = top.node; |
511 | |
512 | if (node->type == NodeType::NLeaf) { |
513 | // found a leaf: move to next node |
514 | it.node = (Leaf *)node; |
515 | return true; |
516 | } |
517 | |
518 | // Find next node |
519 | top.pos = node->GetNextPos(top.pos); |
520 | if (top.pos != INVALID_INDEX) { |
521 | // next node found: go there |
522 | it.stack[it.depth].node = node->GetChild(top.pos)->get(); |
523 | it.stack[it.depth].pos = INVALID_INDEX; |
524 | it.depth++; |
525 | } else { |
526 | // no node found: move up the tree |
527 | it.depth--; |
528 | } |
529 | } |
530 | return false; |
531 | } |
532 | |
533 | //===--------------------------------------------------------------------===// |
534 | // Greater Than |
535 | // Returns: True (If found leaf >= key) |
536 | // False (Otherwise) |
537 | //===--------------------------------------------------------------------===// |
538 | bool ART::Bound(unique_ptr<Node> &n, Key &key, Iterator &it, bool inclusive) { |
539 | it.depth = 0; |
540 | bool equal = false; |
541 | if (!n) { |
542 | return false; |
543 | } |
544 | Node *node = n.get(); |
545 | |
546 | idx_t depth = 0; |
547 | while (true) { |
548 | auto &top = it.stack[it.depth]; |
549 | top.node = node; |
550 | it.depth++; |
551 | if (!equal) { |
552 | while (node->type != NodeType::NLeaf) { |
553 | node = node->GetChild(node->GetMin())->get(); |
554 | auto &c_top = it.stack[it.depth]; |
555 | c_top.node = node; |
556 | it.depth++; |
557 | } |
558 | } |
559 | if (node->type == NodeType::NLeaf) { |
560 | // found a leaf node: check if it is bigger or equal than the current key |
561 | auto leaf = static_cast<Leaf *>(node); |
562 | it.node = leaf; |
563 | // if the search is not inclusive the leaf node could still be equal to the current value |
564 | // check if leaf is equal to the current key |
565 | if (*leaf->value == key) { |
566 | // if its not inclusive check if there is a next leaf |
567 | if (!inclusive && !IteratorNext(it)) { |
568 | return false; |
569 | } else { |
570 | return true; |
571 | } |
572 | } |
573 | |
574 | if (*leaf->value > key) { |
575 | return true; |
576 | } |
577 | // Leaf is lower than key |
578 | // Check if next leaf is still lower than key |
579 | while (IteratorNext(it)) { |
580 | if (*it.node->value == key) { |
581 | // if its not inclusive check if there is a next leaf |
582 | if (!inclusive && !IteratorNext(it)) { |
583 | return false; |
584 | } else { |
585 | return true; |
586 | } |
587 | } else if (*it.node->value > key) { |
588 | // if its not inclusive check if there is a next leaf |
589 | return true; |
590 | } |
591 | } |
592 | return false; |
593 | } |
594 | uint32_t mismatchPos = Node::PrefixMismatch(*this, node, key, depth); |
595 | if (mismatchPos != node->prefix_length) { |
596 | if (node->prefix[mismatchPos] < key[depth + mismatchPos]) { |
597 | // Less |
598 | it.depth--; |
599 | return IteratorNext(it); |
600 | } else { |
601 | // Greater |
602 | top.pos = INVALID_INDEX; |
603 | return IteratorNext(it); |
604 | } |
605 | } |
606 | // prefix matches, search inside the child for the key |
607 | depth += node->prefix_length; |
608 | |
609 | top.pos = node->GetChildGreaterEqual(key[depth], equal); |
610 | if (top.pos == INVALID_INDEX) { |
611 | // Find min leaf |
612 | top.pos = node->GetMin(); |
613 | } |
614 | node = node->GetChild(top.pos)->get(); |
615 | //! This means all children of this node qualify as geq |
616 | |
617 | depth++; |
618 | } |
619 | } |
620 | |
621 | void ART::SearchGreater(vector<row_t> &result_ids, ARTIndexScanState *state, bool inclusive) { |
622 | Iterator *it = &state->iterator; |
623 | auto key = CreateKey(*this, types[0], state->values[0]); |
624 | |
625 | // greater than scan: first set the iterator to the node at which we will start our scan by finding the lowest node |
626 | // that satisfies our requirement |
627 | if (!it->start) { |
628 | bool found = ART::Bound(tree, *key, *it, inclusive); |
629 | if (!found) { |
630 | return; |
631 | } |
632 | it->start = true; |
633 | } |
634 | // after that we continue the scan; we don't need to check the bounds as any value following this value is |
635 | // automatically bigger and hence satisfies our predicate |
636 | IteratorScan<false, false>(state, it, result_ids, nullptr); |
637 | } |
638 | |
639 | //===--------------------------------------------------------------------===// |
640 | // Less Than |
641 | //===--------------------------------------------------------------------===// |
642 | static Leaf &FindMinimum(Iterator &it, Node &node) { |
643 | Node *next = nullptr; |
644 | idx_t pos = 0; |
645 | switch (node.type) { |
646 | case NodeType::NLeaf: |
647 | it.node = (Leaf *)&node; |
648 | return (Leaf &)node; |
649 | case NodeType::N4: |
650 | next = ((Node4 &)node).child[0].get(); |
651 | break; |
652 | case NodeType::N16: |
653 | next = ((Node16 &)node).child[0].get(); |
654 | break; |
655 | case NodeType::N48: { |
656 | auto &n48 = (Node48 &)node; |
657 | while (n48.childIndex[pos] == Node::EMPTY_MARKER) { |
658 | pos++; |
659 | } |
660 | next = n48.child[n48.childIndex[pos]].get(); |
661 | break; |
662 | } |
663 | case NodeType::N256: { |
664 | auto &n256 = (Node256 &)node; |
665 | while (!n256.child[pos]) { |
666 | pos++; |
667 | } |
668 | next = n256.child[pos].get(); |
669 | break; |
670 | } |
671 | } |
672 | it.stack[it.depth].node = &node; |
673 | it.stack[it.depth].pos = pos; |
674 | it.depth++; |
675 | return FindMinimum(it, *next); |
676 | } |
677 | |
678 | void ART::SearchLess(vector<row_t> &result_ids, ARTIndexScanState *state, bool inclusive) { |
679 | if (!tree) { |
680 | return; |
681 | } |
682 | |
683 | Iterator *it = &state->iterator; |
684 | auto upper_bound = CreateKey(*this, types[0], state->values[0]); |
685 | |
686 | if (!it->start) { |
687 | // first find the minimum value in the ART: we start scanning from this value |
688 | auto &minimum = FindMinimum(state->iterator, *tree); |
689 | // early out min value higher than upper bound query |
690 | if (*minimum.value > *upper_bound) { |
691 | return; |
692 | } |
693 | it->start = true; |
694 | } |
695 | // now continue the scan until we reach the upper bound |
696 | if (inclusive) { |
697 | IteratorScan<true, true>(state, it, result_ids, upper_bound.get()); |
698 | } else { |
699 | IteratorScan<true, false>(state, it, result_ids, upper_bound.get()); |
700 | } |
701 | } |
702 | |
703 | //===--------------------------------------------------------------------===// |
704 | // Closed Range Query |
705 | //===--------------------------------------------------------------------===// |
706 | void ART::SearchCloseRange(vector<row_t> &result_ids, ARTIndexScanState *state, bool left_inclusive, |
707 | bool right_inclusive) { |
708 | auto lower_bound = CreateKey(*this, types[0], state->values[0]); |
709 | auto upper_bound = CreateKey(*this, types[0], state->values[1]); |
710 | Iterator *it = &state->iterator; |
711 | // first find the first node that satisfies the left predicate |
712 | if (!it->start) { |
713 | bool found = ART::Bound(tree, *lower_bound, *it, left_inclusive); |
714 | if (!found) { |
715 | return; |
716 | } |
717 | it->start = true; |
718 | } |
719 | // now continue the scan until we reach the upper bound |
720 | if (right_inclusive) { |
721 | IteratorScan<true, true>(state, it, result_ids, upper_bound.get()); |
722 | } else { |
723 | IteratorScan<true, false>(state, it, result_ids, upper_bound.get()); |
724 | } |
725 | } |
726 | |
727 | void ART::Scan(Transaction &transaction, DataTable &table, TableIndexScanState &table_state, DataChunk &result) { |
728 | auto state = (ARTIndexScanState *)table_state.index_state.get(); |
729 | |
730 | // scan the index |
731 | if (!state->checked) { |
732 | vector<row_t> result_ids; |
733 | assert(state->values[0].type == types[0]); |
734 | |
735 | if (state->values[1].is_null) { |
736 | lock_guard<mutex> l(lock); |
737 | // single predicate |
738 | switch (state->expressions[0]) { |
739 | case ExpressionType::COMPARE_EQUAL: |
740 | SearchEqual(result_ids, state); |
741 | break; |
742 | case ExpressionType::COMPARE_GREATERTHANOREQUALTO: |
743 | SearchGreater(result_ids, state, true); |
744 | break; |
745 | case ExpressionType::COMPARE_GREATERTHAN: |
746 | SearchGreater(result_ids, state, false); |
747 | break; |
748 | case ExpressionType::COMPARE_LESSTHANOREQUALTO: |
749 | SearchLess(result_ids, state, true); |
750 | break; |
751 | case ExpressionType::COMPARE_LESSTHAN: |
752 | SearchLess(result_ids, state, false); |
753 | break; |
754 | default: |
755 | throw NotImplementedException("Operation not implemented" ); |
756 | } |
757 | } else { |
758 | lock_guard<mutex> l(lock); |
759 | // two predicates |
760 | assert(state->values[1].type == types[0]); |
761 | bool left_inclusive = state->expressions[0] == ExpressionType ::COMPARE_GREATERTHANOREQUALTO; |
762 | bool right_inclusive = state->expressions[1] == ExpressionType ::COMPARE_LESSTHANOREQUALTO; |
763 | SearchCloseRange(result_ids, state, left_inclusive, right_inclusive); |
764 | } |
765 | state->checked = true; |
766 | |
767 | if (result_ids.size() == 0) { |
768 | return; |
769 | } |
770 | |
771 | // sort the row ids |
772 | sort(result_ids.begin(), result_ids.end()); |
773 | // duplicate eliminate the row ids and append them to the row ids of the state |
774 | state->result_ids.reserve(result_ids.size()); |
775 | |
776 | state->result_ids.push_back(result_ids[0]); |
777 | for (idx_t i = 1; i < result_ids.size(); i++) { |
778 | if (result_ids[i] != result_ids[i - 1]) { |
779 | state->result_ids.push_back(result_ids[i]); |
780 | } |
781 | } |
782 | } |
783 | |
784 | if (state->result_index >= state->result_ids.size()) { |
785 | // exhausted all row ids |
786 | return; |
787 | } |
788 | |
789 | // create a vector pointing to the current set of row ids |
790 | Vector row_identifiers(ROW_TYPE, (data_ptr_t)&state->result_ids[state->result_index]); |
791 | idx_t scan_count = std::min((idx_t)STANDARD_VECTOR_SIZE, (idx_t)state->result_ids.size() - state->result_index); |
792 | |
793 | // fetch the actual values from the base table |
794 | table.Fetch(transaction, result, state->column_ids, row_identifiers, scan_count, table_state); |
795 | |
796 | // move to the next set of row ids |
797 | state->result_index += scan_count; |
798 | } |
799 | |