1 | #include "duckdb/execution/index/art/iterator.hpp" |
2 | |
3 | #include "duckdb/common/limits.hpp" |
4 | #include "duckdb/execution/index/art/art.hpp" |
5 | #include "duckdb/execution/index/art/node.hpp" |
6 | #include "duckdb/execution/index/art/prefix.hpp" |
7 | |
8 | namespace duckdb { |
9 | |
10 | void IteratorCurrentKey::Push(const uint8_t byte) { |
11 | if (cur_key_pos == key.size()) { |
12 | key.push_back(x: byte); |
13 | } |
14 | D_ASSERT(cur_key_pos < key.size()); |
15 | key[cur_key_pos++] = byte; |
16 | } |
17 | |
18 | void IteratorCurrentKey::Pop(const idx_t n) { |
19 | cur_key_pos -= n; |
20 | D_ASSERT(cur_key_pos <= key.size()); |
21 | } |
22 | |
23 | uint8_t &IteratorCurrentKey::operator[](idx_t idx) { |
24 | if (idx >= key.size()) { |
25 | key.push_back(x: 0); |
26 | } |
27 | D_ASSERT(idx < key.size()); |
28 | return key[idx]; |
29 | } |
30 | |
31 | bool IteratorCurrentKey::operator>(const ARTKey &k) const { |
32 | for (idx_t i = 0; i < MinValue<idx_t>(a: cur_key_pos, b: k.len); i++) { |
33 | if (key[i] > k.data[i]) { |
34 | return true; |
35 | } else if (key[i] < k.data[i]) { |
36 | return false; |
37 | } |
38 | } |
39 | return cur_key_pos > k.len; |
40 | } |
41 | |
42 | bool IteratorCurrentKey::operator>=(const ARTKey &k) const { |
43 | for (idx_t i = 0; i < MinValue<idx_t>(a: cur_key_pos, b: k.len); i++) { |
44 | if (key[i] > k.data[i]) { |
45 | return true; |
46 | } else if (key[i] < k.data[i]) { |
47 | return false; |
48 | } |
49 | } |
50 | return cur_key_pos >= k.len; |
51 | } |
52 | |
53 | bool IteratorCurrentKey::operator==(const ARTKey &k) const { |
54 | if (cur_key_pos != k.len) { |
55 | return false; |
56 | } |
57 | for (idx_t i = 0; i < cur_key_pos; i++) { |
58 | if (key[i] != k.data[i]) { |
59 | return false; |
60 | } |
61 | } |
62 | return true; |
63 | } |
64 | |
65 | void Iterator::FindMinimum(Node &node) { |
66 | |
67 | // reconstruct the prefix |
68 | // FIXME: get all bytes at once to increase performance |
69 | auto &node_prefix = node.GetPrefix(art&: *art); |
70 | for (idx_t i = 0; i < node_prefix.count; i++) { |
71 | cur_key.Push(byte: node_prefix.GetByte(art: *art, position: i)); |
72 | } |
73 | |
74 | // found the minimum |
75 | if (node.DecodeARTNodeType() == NType::LEAF) { |
76 | last_leaf = Node::GetAllocator(art: *art, type: NType::LEAF).Get<Leaf>(ptr: node); |
77 | return; |
78 | } |
79 | |
80 | // go to the leftmost entry in the current node |
81 | uint8_t byte = 0; |
82 | auto next = node.GetNextChild(art&: *art, byte); |
83 | D_ASSERT(next); |
84 | cur_key.Push(byte); |
85 | |
86 | // recurse |
87 | nodes.emplace(args&: node, args&: byte); |
88 | FindMinimum(node&: *next); |
89 | } |
90 | |
91 | void Iterator::PushKey(const Node &node, const uint8_t byte) { |
92 | if (node.DecodeARTNodeType() != NType::LEAF) { |
93 | cur_key.Push(byte); |
94 | } |
95 | } |
96 | |
97 | bool Iterator::Scan(const ARTKey &key, const idx_t &max_count, vector<row_t> &result_ids, const bool &is_inclusive) { |
98 | |
99 | bool has_next; |
100 | do { |
101 | if (!key.Empty()) { |
102 | // no more row IDs within the key bounds |
103 | if (is_inclusive) { |
104 | if (cur_key > key) { |
105 | return true; |
106 | } |
107 | } else { |
108 | if (cur_key >= key) { |
109 | return true; |
110 | } |
111 | } |
112 | } |
113 | |
114 | // adding more elements would exceed the max count |
115 | if (result_ids.size() + last_leaf->count > max_count) { |
116 | return false; |
117 | } |
118 | |
119 | // FIXME: copy all at once to improve performance |
120 | for (idx_t i = 0; i < last_leaf->count; i++) { |
121 | row_t row_id = last_leaf->GetRowId(art: *art, position: i); |
122 | result_ids.push_back(x: row_id); |
123 | } |
124 | |
125 | // get the next leaf |
126 | has_next = Next(); |
127 | |
128 | } while (has_next); |
129 | |
130 | return true; |
131 | } |
132 | |
133 | void Iterator::PopNode() { |
134 | auto cur_node = nodes.top(); |
135 | idx_t elements_to_pop = cur_node.node.GetPrefix(art&: *art).count + (nodes.size() != 1); |
136 | cur_key.Pop(n: elements_to_pop); |
137 | nodes.pop(); |
138 | } |
139 | |
140 | bool Iterator::Next() { |
141 | if (!nodes.empty()) { |
142 | auto cur_node = nodes.top().node; |
143 | if (cur_node.DecodeARTNodeType() == NType::LEAF) { |
144 | // pop leaf |
145 | // we must pop the prefix size + the key to the node, unless we are popping the root |
146 | PopNode(); |
147 | } |
148 | } |
149 | |
150 | // look for the next leaf |
151 | while (!nodes.empty()) { |
152 | |
153 | // cur_node |
154 | auto &top = nodes.top(); |
155 | Node node = top.node; |
156 | |
157 | // found a leaf: move to next node |
158 | if (node.DecodeARTNodeType() == NType::LEAF) { |
159 | last_leaf = Node::GetAllocator(art: *art, type: NType::LEAF).Get<Leaf>(ptr: node); |
160 | return true; |
161 | } |
162 | |
163 | // find next node |
164 | if (top.byte == NumericLimits<uint8_t>::Maximum()) { |
165 | // no node found: move up the tree, pop prefix and key of current node |
166 | PopNode(); |
167 | continue; |
168 | } |
169 | |
170 | top.byte == 0 ? top.byte : top.byte++; |
171 | auto next_node = node.GetNextChild(art&: *art, byte&: top.byte); |
172 | |
173 | if (next_node) { |
174 | // add the next node's key byte |
175 | PushKey(node, byte: top.byte); |
176 | |
177 | // add prefix of new node |
178 | // FIXME: get all bytes at once to increase performance |
179 | auto &next_node_prefix = next_node->GetPrefix(art&: *art); |
180 | for (idx_t i = 0; i < next_node_prefix.count; i++) { |
181 | cur_key.Push(byte: next_node_prefix.GetByte(art: *art, position: i)); |
182 | } |
183 | |
184 | // next node found: push it |
185 | nodes.emplace(args&: *next_node, args: 0); |
186 | } else { |
187 | |
188 | // no node found: move up the tree, pop prefix and key of current node |
189 | PopNode(); |
190 | } |
191 | } |
192 | return false; |
193 | } |
194 | |
195 | bool Iterator::LowerBound(Node node, const ARTKey &key, const bool &is_inclusive) { |
196 | |
197 | if (!node.IsSet()) { |
198 | return false; |
199 | } |
200 | |
201 | idx_t depth = 0; |
202 | bool equal = true; |
203 | while (true) { |
204 | |
205 | nodes.emplace(args&: node, args: 0); |
206 | auto &top = nodes.top(); |
207 | |
208 | // reconstruct the prefix |
209 | // FIXME: get all bytes at once to increase performance |
210 | reference<Prefix> node_prefix(top.node.GetPrefix(art&: *art)); |
211 | for (idx_t i = 0; i < node_prefix.get().count; i++) { |
212 | cur_key.Push(byte: node_prefix.get().GetByte(art: *art, position: i)); |
213 | } |
214 | |
215 | // greater case: find leftmost leaf node directly |
216 | if (!equal) { |
217 | while (node.DecodeARTNodeType() != NType::LEAF) { |
218 | |
219 | uint8_t byte = 0; |
220 | auto next_node = *node.GetNextChild(art&: *art, byte); |
221 | D_ASSERT(next_node.IsSet()); |
222 | |
223 | PushKey(node, byte); |
224 | nodes.emplace(args&: node, args&: byte); |
225 | node = next_node; |
226 | |
227 | // reconstruct the prefix |
228 | node_prefix = node.GetPrefix(art&: *art); |
229 | for (idx_t i = 0; i < node_prefix.get().count; i++) { |
230 | cur_key.Push(byte: node_prefix.get().GetByte(art: *art, position: i)); |
231 | } |
232 | |
233 | auto &c_top = nodes.top(); |
234 | c_top.node = node; |
235 | } |
236 | } |
237 | |
238 | if (node.DecodeARTNodeType() == NType::LEAF) { |
239 | // found a leaf node: check if it is bigger or equal than the current key |
240 | last_leaf = Node::GetAllocator(art: *art, type: NType::LEAF).Get<Leaf>(ptr: node); |
241 | |
242 | // if the search is not inclusive the leaf node could still be equal to the current value |
243 | // check if leaf is equal to the current key |
244 | if (cur_key == key) { |
245 | // if it's not inclusive check if there is a next leaf |
246 | if (!is_inclusive && !Next()) { |
247 | return false; |
248 | } else { |
249 | return true; |
250 | } |
251 | } |
252 | |
253 | if (cur_key > key) { |
254 | return true; |
255 | } |
256 | // Case1: When the ART has only one leaf node, the Next() will return false |
257 | // Case2: This means the previous node prefix(if any) + a_key(one element of of key array of previous node) |
258 | // == key[q..=w]. |
259 | // But key[w+1..=z] maybe greater than leaf node prefix. |
260 | // One fact is key[w] is alawys equal to a_key and the next element |
261 | // of key array of previous node is always > a_key So we just call Next() once. |
262 | |
263 | return Next(); |
264 | } |
265 | |
266 | // equal case: |
267 | node_prefix = node.GetPrefix(art&: *art); |
268 | auto mismatch_pos = node_prefix.get().KeyMismatchPosition(art: *art, key, depth); |
269 | if (mismatch_pos != node_prefix.get().count) { |
270 | if (node_prefix.get().GetByte(art: *art, position: mismatch_pos) < key[depth + mismatch_pos]) { |
271 | // less |
272 | PopNode(); |
273 | return Next(); |
274 | } |
275 | // greater |
276 | top.byte = 0; |
277 | return Next(); |
278 | } |
279 | |
280 | // prefix matches, search inside the child for the key |
281 | depth += node_prefix.get().count; |
282 | top.byte = key[depth]; |
283 | auto child = node.GetNextChild(art&: *art, byte&: top.byte); |
284 | equal = key[depth] == top.byte; |
285 | |
286 | // the maximum key byte of the current node is less than the key |
287 | // fall back to the previous node |
288 | if (!child) { |
289 | PopNode(); |
290 | return Next(); |
291 | } |
292 | |
293 | PushKey(node, byte: top.byte); |
294 | node = *child; |
295 | |
296 | // all children of this node qualify as greater or equal |
297 | depth++; |
298 | } |
299 | } |
300 | |
301 | } // namespace duckdb |
302 | |