1#include "duckdb/execution/index/art/iterator.hpp"
2
3#include "duckdb/common/limits.hpp"
4#include "duckdb/execution/index/art/art.hpp"
5#include "duckdb/execution/index/art/node.hpp"
6#include "duckdb/execution/index/art/prefix.hpp"
7
8namespace duckdb {
9
10void IteratorCurrentKey::Push(const uint8_t byte) {
11 if (cur_key_pos == key.size()) {
12 key.push_back(x: byte);
13 }
14 D_ASSERT(cur_key_pos < key.size());
15 key[cur_key_pos++] = byte;
16}
17
18void IteratorCurrentKey::Pop(const idx_t n) {
19 cur_key_pos -= n;
20 D_ASSERT(cur_key_pos <= key.size());
21}
22
23uint8_t &IteratorCurrentKey::operator[](idx_t idx) {
24 if (idx >= key.size()) {
25 key.push_back(x: 0);
26 }
27 D_ASSERT(idx < key.size());
28 return key[idx];
29}
30
31bool IteratorCurrentKey::operator>(const ARTKey &k) const {
32 for (idx_t i = 0; i < MinValue<idx_t>(a: cur_key_pos, b: k.len); i++) {
33 if (key[i] > k.data[i]) {
34 return true;
35 } else if (key[i] < k.data[i]) {
36 return false;
37 }
38 }
39 return cur_key_pos > k.len;
40}
41
42bool IteratorCurrentKey::operator>=(const ARTKey &k) const {
43 for (idx_t i = 0; i < MinValue<idx_t>(a: cur_key_pos, b: k.len); i++) {
44 if (key[i] > k.data[i]) {
45 return true;
46 } else if (key[i] < k.data[i]) {
47 return false;
48 }
49 }
50 return cur_key_pos >= k.len;
51}
52
53bool IteratorCurrentKey::operator==(const ARTKey &k) const {
54 if (cur_key_pos != k.len) {
55 return false;
56 }
57 for (idx_t i = 0; i < cur_key_pos; i++) {
58 if (key[i] != k.data[i]) {
59 return false;
60 }
61 }
62 return true;
63}
64
65void Iterator::FindMinimum(Node &node) {
66
67 // reconstruct the prefix
68 // FIXME: get all bytes at once to increase performance
69 auto &node_prefix = node.GetPrefix(art&: *art);
70 for (idx_t i = 0; i < node_prefix.count; i++) {
71 cur_key.Push(byte: node_prefix.GetByte(art: *art, position: i));
72 }
73
74 // found the minimum
75 if (node.DecodeARTNodeType() == NType::LEAF) {
76 last_leaf = Node::GetAllocator(art: *art, type: NType::LEAF).Get<Leaf>(ptr: node);
77 return;
78 }
79
80 // go to the leftmost entry in the current node
81 uint8_t byte = 0;
82 auto next = node.GetNextChild(art&: *art, byte);
83 D_ASSERT(next);
84 cur_key.Push(byte);
85
86 // recurse
87 nodes.emplace(args&: node, args&: byte);
88 FindMinimum(node&: *next);
89}
90
91void Iterator::PushKey(const Node &node, const uint8_t byte) {
92 if (node.DecodeARTNodeType() != NType::LEAF) {
93 cur_key.Push(byte);
94 }
95}
96
97bool Iterator::Scan(const ARTKey &key, const idx_t &max_count, vector<row_t> &result_ids, const bool &is_inclusive) {
98
99 bool has_next;
100 do {
101 if (!key.Empty()) {
102 // no more row IDs within the key bounds
103 if (is_inclusive) {
104 if (cur_key > key) {
105 return true;
106 }
107 } else {
108 if (cur_key >= key) {
109 return true;
110 }
111 }
112 }
113
114 // adding more elements would exceed the max count
115 if (result_ids.size() + last_leaf->count > max_count) {
116 return false;
117 }
118
119 // FIXME: copy all at once to improve performance
120 for (idx_t i = 0; i < last_leaf->count; i++) {
121 row_t row_id = last_leaf->GetRowId(art: *art, position: i);
122 result_ids.push_back(x: row_id);
123 }
124
125 // get the next leaf
126 has_next = Next();
127
128 } while (has_next);
129
130 return true;
131}
132
133void Iterator::PopNode() {
134 auto cur_node = nodes.top();
135 idx_t elements_to_pop = cur_node.node.GetPrefix(art&: *art).count + (nodes.size() != 1);
136 cur_key.Pop(n: elements_to_pop);
137 nodes.pop();
138}
139
140bool Iterator::Next() {
141 if (!nodes.empty()) {
142 auto cur_node = nodes.top().node;
143 if (cur_node.DecodeARTNodeType() == NType::LEAF) {
144 // pop leaf
145 // we must pop the prefix size + the key to the node, unless we are popping the root
146 PopNode();
147 }
148 }
149
150 // look for the next leaf
151 while (!nodes.empty()) {
152
153 // cur_node
154 auto &top = nodes.top();
155 Node node = top.node;
156
157 // found a leaf: move to next node
158 if (node.DecodeARTNodeType() == NType::LEAF) {
159 last_leaf = Node::GetAllocator(art: *art, type: NType::LEAF).Get<Leaf>(ptr: node);
160 return true;
161 }
162
163 // find next node
164 if (top.byte == NumericLimits<uint8_t>::Maximum()) {
165 // no node found: move up the tree, pop prefix and key of current node
166 PopNode();
167 continue;
168 }
169
170 top.byte == 0 ? top.byte : top.byte++;
171 auto next_node = node.GetNextChild(art&: *art, byte&: top.byte);
172
173 if (next_node) {
174 // add the next node's key byte
175 PushKey(node, byte: top.byte);
176
177 // add prefix of new node
178 // FIXME: get all bytes at once to increase performance
179 auto &next_node_prefix = next_node->GetPrefix(art&: *art);
180 for (idx_t i = 0; i < next_node_prefix.count; i++) {
181 cur_key.Push(byte: next_node_prefix.GetByte(art: *art, position: i));
182 }
183
184 // next node found: push it
185 nodes.emplace(args&: *next_node, args: 0);
186 } else {
187
188 // no node found: move up the tree, pop prefix and key of current node
189 PopNode();
190 }
191 }
192 return false;
193}
194
195bool Iterator::LowerBound(Node node, const ARTKey &key, const bool &is_inclusive) {
196
197 if (!node.IsSet()) {
198 return false;
199 }
200
201 idx_t depth = 0;
202 bool equal = true;
203 while (true) {
204
205 nodes.emplace(args&: node, args: 0);
206 auto &top = nodes.top();
207
208 // reconstruct the prefix
209 // FIXME: get all bytes at once to increase performance
210 reference<Prefix> node_prefix(top.node.GetPrefix(art&: *art));
211 for (idx_t i = 0; i < node_prefix.get().count; i++) {
212 cur_key.Push(byte: node_prefix.get().GetByte(art: *art, position: i));
213 }
214
215 // greater case: find leftmost leaf node directly
216 if (!equal) {
217 while (node.DecodeARTNodeType() != NType::LEAF) {
218
219 uint8_t byte = 0;
220 auto next_node = *node.GetNextChild(art&: *art, byte);
221 D_ASSERT(next_node.IsSet());
222
223 PushKey(node, byte);
224 nodes.emplace(args&: node, args&: byte);
225 node = next_node;
226
227 // reconstruct the prefix
228 node_prefix = node.GetPrefix(art&: *art);
229 for (idx_t i = 0; i < node_prefix.get().count; i++) {
230 cur_key.Push(byte: node_prefix.get().GetByte(art: *art, position: i));
231 }
232
233 auto &c_top = nodes.top();
234 c_top.node = node;
235 }
236 }
237
238 if (node.DecodeARTNodeType() == NType::LEAF) {
239 // found a leaf node: check if it is bigger or equal than the current key
240 last_leaf = Node::GetAllocator(art: *art, type: NType::LEAF).Get<Leaf>(ptr: node);
241
242 // if the search is not inclusive the leaf node could still be equal to the current value
243 // check if leaf is equal to the current key
244 if (cur_key == key) {
245 // if it's not inclusive check if there is a next leaf
246 if (!is_inclusive && !Next()) {
247 return false;
248 } else {
249 return true;
250 }
251 }
252
253 if (cur_key > key) {
254 return true;
255 }
256 // Case1: When the ART has only one leaf node, the Next() will return false
257 // Case2: This means the previous node prefix(if any) + a_key(one element of of key array of previous node)
258 // == key[q..=w].
259 // But key[w+1..=z] maybe greater than leaf node prefix.
260 // One fact is key[w] is alawys equal to a_key and the next element
261 // of key array of previous node is always > a_key So we just call Next() once.
262
263 return Next();
264 }
265
266 // equal case:
267 node_prefix = node.GetPrefix(art&: *art);
268 auto mismatch_pos = node_prefix.get().KeyMismatchPosition(art: *art, key, depth);
269 if (mismatch_pos != node_prefix.get().count) {
270 if (node_prefix.get().GetByte(art: *art, position: mismatch_pos) < key[depth + mismatch_pos]) {
271 // less
272 PopNode();
273 return Next();
274 }
275 // greater
276 top.byte = 0;
277 return Next();
278 }
279
280 // prefix matches, search inside the child for the key
281 depth += node_prefix.get().count;
282 top.byte = key[depth];
283 auto child = node.GetNextChild(art&: *art, byte&: top.byte);
284 equal = key[depth] == top.byte;
285
286 // the maximum key byte of the current node is less than the key
287 // fall back to the previous node
288 if (!child) {
289 PopNode();
290 return Next();
291 }
292
293 PushKey(node, byte: top.byte);
294 node = *child;
295
296 // all children of this node qualify as greater or equal
297 depth++;
298 }
299}
300
301} // namespace duckdb
302