1 | /* |
2 | Copyright (c) 2018 Contributors as noted in the AUTHORS file |
3 | |
4 | This file is part of libzmq, the ZeroMQ core engine in C++. |
5 | |
6 | libzmq is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU Lesser General Public License (LGPL) as published |
8 | by the Free Software Foundation; either version 3 of the License, or |
9 | (at your option) any later version. |
10 | |
11 | As a special exception, the Contributors give you permission to link |
12 | this library with independent modules to produce an executable, |
13 | regardless of the license terms of these independent modules, and to |
14 | copy and distribute the resulting executable under terms of your choice, |
15 | provided that you also meet, for each linked independent module, the |
16 | terms and conditions of the license of that module. An independent |
17 | module is a module which is not derived from or based on this library. |
18 | If you modify this library, you must extend this exception to your |
19 | version of the library. |
20 | |
21 | libzmq is distributed in the hope that it will be useful, but WITHOUT |
22 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
23 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
24 | License for more details. |
25 | |
26 | You should have received a copy of the GNU Lesser General Public License |
27 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
28 | */ |
29 | |
30 | #include "precompiled.hpp" |
31 | #include "macros.hpp" |
32 | #include "err.hpp" |
33 | #include "radix_tree.hpp" |
34 | |
35 | #include <stdlib.h> |
36 | #include <string.h> |
37 | #include <vector> |
38 | |
39 | node_t::node_t (unsigned char *data_) : _data (data_) |
40 | { |
41 | } |
42 | |
43 | uint32_t node_t::refcount () |
44 | { |
45 | uint32_t u32; |
46 | memcpy (&u32, _data, sizeof (u32)); |
47 | return u32; |
48 | } |
49 | |
50 | void node_t::set_refcount (uint32_t value_) |
51 | { |
52 | memcpy (_data, &value_, sizeof (value_)); |
53 | } |
54 | |
55 | uint32_t node_t::prefix_length () |
56 | { |
57 | uint32_t u32; |
58 | memcpy (&u32, _data + sizeof (uint32_t), sizeof (u32)); |
59 | return u32; |
60 | } |
61 | |
62 | void node_t::set_prefix_length (uint32_t value_) |
63 | { |
64 | memcpy (_data + sizeof (value_), &value_, sizeof (value_)); |
65 | } |
66 | |
67 | uint32_t node_t::edgecount () |
68 | { |
69 | uint32_t u32; |
70 | memcpy (&u32, _data + 2 * sizeof (uint32_t), sizeof (u32)); |
71 | return u32; |
72 | } |
73 | |
74 | void node_t::set_edgecount (uint32_t value_) |
75 | { |
76 | memcpy (_data + 2 * sizeof (value_), &value_, sizeof (value_)); |
77 | } |
78 | |
79 | unsigned char *node_t::prefix () |
80 | { |
81 | return _data + 3 * sizeof (uint32_t); |
82 | } |
83 | |
84 | void node_t::set_prefix (const unsigned char *bytes_) |
85 | { |
86 | memcpy (prefix (), bytes_, prefix_length ()); |
87 | } |
88 | |
89 | unsigned char *node_t::first_bytes () |
90 | { |
91 | return prefix () + prefix_length (); |
92 | } |
93 | |
94 | void node_t::set_first_bytes (const unsigned char *bytes_) |
95 | { |
96 | memcpy (first_bytes (), bytes_, edgecount ()); |
97 | } |
98 | |
99 | unsigned char node_t::first_byte_at (size_t index_) |
100 | { |
101 | zmq_assert (index_ < edgecount ()); |
102 | return first_bytes ()[index_]; |
103 | } |
104 | |
105 | void node_t::set_first_byte_at (size_t index_, unsigned char byte_) |
106 | { |
107 | zmq_assert (index_ < edgecount ()); |
108 | first_bytes ()[index_] = byte_; |
109 | } |
110 | |
111 | unsigned char *node_t::node_pointers () |
112 | { |
113 | return prefix () + prefix_length () + edgecount (); |
114 | } |
115 | |
116 | void node_t::set_node_pointers (const unsigned char *pointers_) |
117 | { |
118 | memcpy (node_pointers (), pointers_, edgecount () * sizeof (void *)); |
119 | } |
120 | |
121 | node_t node_t::node_at (size_t index_) |
122 | { |
123 | zmq_assert (index_ < edgecount ()); |
124 | |
125 | unsigned char *data; |
126 | memcpy (&data, node_pointers () + index_ * sizeof (void *), sizeof (data)); |
127 | return node_t (data); |
128 | } |
129 | |
130 | void node_t::set_node_at (size_t index_, node_t node_) |
131 | { |
132 | zmq_assert (index_ < edgecount ()); |
133 | memcpy (node_pointers () + index_ * sizeof (void *), &node_._data, |
134 | sizeof (node_._data)); |
135 | } |
136 | |
137 | void node_t::set_edge_at (size_t index_, |
138 | unsigned char first_byte_, |
139 | node_t node_) |
140 | { |
141 | set_first_byte_at (index_, first_byte_); |
142 | set_node_at (index_, node_); |
143 | } |
144 | |
145 | bool node_t::operator== (node_t other_) const |
146 | { |
147 | return _data == other_._data; |
148 | } |
149 | |
150 | bool node_t::operator!= (node_t other_) const |
151 | { |
152 | return !(*this == other_); |
153 | } |
154 | |
155 | void node_t::resize (size_t prefix_length_, size_t edgecount_) |
156 | { |
157 | size_t node_size = 3 * sizeof (uint32_t) + prefix_length_ |
158 | + edgecount_ * (1 + sizeof (void *)); |
159 | unsigned char *new_data = |
160 | static_cast<unsigned char *> (realloc (_data, node_size)); |
161 | zmq_assert (new_data); |
162 | _data = new_data; |
163 | set_prefix_length (static_cast<uint32_t> (prefix_length_)); |
164 | set_edgecount (static_cast<uint32_t> (edgecount_)); |
165 | } |
166 | |
167 | node_t make_node (size_t refcount_, size_t prefix_length_, size_t edgecount_) |
168 | { |
169 | size_t node_size = 3 * sizeof (uint32_t) + prefix_length_ |
170 | + edgecount_ * (1 + sizeof (void *)); |
171 | |
172 | unsigned char *data = static_cast<unsigned char *> (malloc (node_size)); |
173 | zmq_assert (data); |
174 | |
175 | node_t node (data); |
176 | node.set_refcount (static_cast<uint32_t> (refcount_)); |
177 | node.set_prefix_length (static_cast<uint32_t> (prefix_length_)); |
178 | node.set_edgecount (static_cast<uint32_t> (edgecount_)); |
179 | return node; |
180 | } |
181 | |
182 | // ---------------------------------------------------------------------- |
183 | |
184 | zmq::radix_tree_t::radix_tree_t () : _root (make_node (0, 0, 0)), _size (0) |
185 | { |
186 | } |
187 | |
188 | static void free_nodes (node_t node_) |
189 | { |
190 | for (size_t i = 0; i < node_.edgecount (); ++i) |
191 | free_nodes (node_.node_at (i)); |
192 | free (node_._data); |
193 | } |
194 | |
195 | zmq::radix_tree_t::~radix_tree_t () |
196 | { |
197 | free_nodes (_root); |
198 | } |
199 | |
200 | match_result_t::match_result_t (size_t key_bytes_matched_, |
201 | size_t prefix_bytes_matched_, |
202 | size_t edge_index_, |
203 | size_t parent_edge_index_, |
204 | node_t current_, |
205 | node_t parent_, |
206 | node_t grandparent_) : |
207 | _key_bytes_matched (key_bytes_matched_), |
208 | _prefix_bytes_matched (prefix_bytes_matched_), |
209 | _edge_index (edge_index_), |
210 | _parent_edge_index (parent_edge_index_), |
211 | _current_node (current_), |
212 | _parent_node (parent_), |
213 | _grandparent_node (grandparent_) |
214 | { |
215 | } |
216 | |
217 | match_result_t zmq::radix_tree_t::match (const unsigned char *key_, |
218 | size_t key_size_, |
219 | bool is_lookup_ = false) const |
220 | { |
221 | zmq_assert (key_); |
222 | |
223 | // Node we're currently at in the traversal and its predecessors. |
224 | node_t current_node = _root; |
225 | node_t parent_node = current_node; |
226 | node_t grandparent_node = current_node; |
227 | // Index of the next byte to match in the key. |
228 | size_t key_byte_index = 0; |
229 | // Index of the next byte to match in the current node's prefix. |
230 | size_t prefix_byte_index = 0; |
231 | // Index of the edge from parent to current node. |
232 | size_t edge_index = 0; |
233 | // Index of the edge from grandparent to parent. |
234 | size_t parent_edge_index = 0; |
235 | |
236 | while (current_node.prefix_length () > 0 || current_node.edgecount () > 0) { |
237 | for (prefix_byte_index = 0; |
238 | prefix_byte_index < current_node.prefix_length () |
239 | && key_byte_index < key_size_; |
240 | ++prefix_byte_index, ++key_byte_index) { |
241 | if (current_node.prefix ()[prefix_byte_index] |
242 | != key_[key_byte_index]) |
243 | break; |
244 | } |
245 | |
246 | // Even if a prefix of the key matches and we're doing a |
247 | // lookup, this means we've found a matching subscription. |
248 | if (is_lookup_ && prefix_byte_index == current_node.prefix_length () |
249 | && current_node.refcount () > 0) { |
250 | key_byte_index = key_size_; |
251 | break; |
252 | } |
253 | |
254 | // There was a mismatch or we've matched the whole key, so |
255 | // there's nothing more to do. |
256 | if (prefix_byte_index != current_node.prefix_length () |
257 | || key_byte_index == key_size_) |
258 | break; |
259 | |
260 | // We need to match the rest of the key. Check if there's an |
261 | // outgoing edge from this node. |
262 | node_t next_node = current_node; |
263 | for (size_t i = 0; i < current_node.edgecount (); ++i) { |
264 | if (current_node.first_byte_at (i) == key_[key_byte_index]) { |
265 | parent_edge_index = edge_index; |
266 | edge_index = i; |
267 | next_node = current_node.node_at (i); |
268 | break; |
269 | } |
270 | } |
271 | |
272 | if (next_node == current_node) |
273 | break; // No outgoing edge. |
274 | grandparent_node = parent_node; |
275 | parent_node = current_node; |
276 | current_node = next_node; |
277 | } |
278 | |
279 | return match_result_t (key_byte_index, prefix_byte_index, edge_index, |
280 | parent_edge_index, current_node, parent_node, |
281 | grandparent_node); |
282 | } |
283 | |
284 | bool zmq::radix_tree_t::add (const unsigned char *key_, size_t key_size_) |
285 | { |
286 | match_result_t match_result = match (key_, key_size_); |
287 | size_t key_bytes_matched = match_result._key_bytes_matched; |
288 | size_t prefix_bytes_matched = match_result._prefix_bytes_matched; |
289 | size_t edge_index = match_result._edge_index; |
290 | node_t current_node = match_result._current_node; |
291 | node_t parent_node = match_result._parent_node; |
292 | |
293 | if (key_bytes_matched != key_size_) { |
294 | // Not all characters match, we might have to split the node. |
295 | if (prefix_bytes_matched == current_node.prefix_length ()) { |
296 | // The mismatch is at one of the outgoing edges, so we |
297 | // create an edge from the current node to a new leaf node |
298 | // that has the rest of the key as the prefix. |
299 | node_t key_node = make_node (1, key_size_ - key_bytes_matched, 0); |
300 | key_node.set_prefix (key_ + key_bytes_matched); |
301 | |
302 | // Reallocate for one more edge. |
303 | current_node.resize (current_node.prefix_length (), |
304 | current_node.edgecount () + 1); |
305 | |
306 | // Make room for the new edge. We need to shift the chunk |
307 | // of node pointers one byte to the right. Since resize() |
308 | // increments the edgecount by 1, node_pointers() tells us the |
309 | // destination address. The chunk of node pointers starts |
310 | // at one byte to the left of this destination. |
311 | // |
312 | // Since the regions can overlap, we use memmove. |
313 | memmove (current_node.node_pointers (), |
314 | current_node.node_pointers () - 1, |
315 | (current_node.edgecount () - 1) * sizeof (void *)); |
316 | |
317 | // Add an edge to the new node. |
318 | current_node.set_edge_at (current_node.edgecount () - 1, |
319 | key_[key_bytes_matched], key_node); |
320 | |
321 | // We need to update all pointers to the current node |
322 | // after the call to resize(). |
323 | if (current_node.prefix_length () == 0) |
324 | _root._data = current_node._data; |
325 | else |
326 | parent_node.set_node_at (edge_index, current_node); |
327 | ++_size; |
328 | return true; |
329 | } |
330 | |
331 | // There was a mismatch, so we need to split this node. |
332 | // |
333 | // Create two nodes that will be reachable from the parent. |
334 | // One node will have the rest of the characters from the key, |
335 | // and the other node will have the rest of the characters |
336 | // from the current node's prefix. |
337 | node_t key_node = make_node (1, key_size_ - key_bytes_matched, 0); |
338 | node_t split_node = |
339 | make_node (current_node.refcount (), |
340 | current_node.prefix_length () - prefix_bytes_matched, |
341 | current_node.edgecount ()); |
342 | |
343 | // Copy the prefix chunks to the new nodes. |
344 | key_node.set_prefix (key_ + key_bytes_matched); |
345 | split_node.set_prefix (current_node.prefix () + prefix_bytes_matched); |
346 | |
347 | // Copy the current node's edges to the new node. |
348 | split_node.set_first_bytes (current_node.first_bytes ()); |
349 | split_node.set_node_pointers (current_node.node_pointers ()); |
350 | |
351 | // Resize the current node to accommodate a prefix comprising |
352 | // the matched characters and 2 outgoing edges to the above |
353 | // nodes. Set the refcount to 0 since this node doesn't hold a |
354 | // key. |
355 | current_node.resize (prefix_bytes_matched, 2); |
356 | current_node.set_refcount (0); |
357 | |
358 | // Add links to the new nodes. We don't need to copy the |
359 | // prefix since resize() retains it in the current node. |
360 | current_node.set_edge_at (0, key_node.prefix ()[0], key_node); |
361 | current_node.set_edge_at (1, split_node.prefix ()[0], split_node); |
362 | |
363 | ++_size; |
364 | parent_node.set_node_at (edge_index, current_node); |
365 | return true; |
366 | } |
367 | |
368 | // All characters in the key match, but we still might need to split. |
369 | if (prefix_bytes_matched != current_node.prefix_length ()) { |
370 | // All characters in the key match, but not all characters |
371 | // from the current node's prefix match. |
372 | |
373 | // Create a node that contains the rest of the characters from |
374 | // the current node's prefix and the outgoing edges from the |
375 | // current node. |
376 | node_t split_node = |
377 | make_node (current_node.refcount (), |
378 | current_node.prefix_length () - prefix_bytes_matched, |
379 | current_node.edgecount ()); |
380 | split_node.set_prefix (current_node.prefix () + prefix_bytes_matched); |
381 | split_node.set_first_bytes (current_node.first_bytes ()); |
382 | split_node.set_node_pointers (current_node.node_pointers ()); |
383 | |
384 | // Resize the current node to hold only the matched characters |
385 | // from its prefix and one edge to the new node. |
386 | current_node.resize (prefix_bytes_matched, 1); |
387 | |
388 | // Add an edge to the split node and set the refcount to 1 |
389 | // since this key wasn't inserted earlier. We don't need to |
390 | // set the prefix because the first `prefix_bytes_matched` bytes |
391 | // in the prefix are preserved by resize(). |
392 | current_node.set_edge_at (0, split_node.prefix ()[0], split_node); |
393 | current_node.set_refcount (1); |
394 | |
395 | ++_size; |
396 | parent_node.set_node_at (edge_index, current_node); |
397 | return true; |
398 | } |
399 | |
400 | zmq_assert (key_bytes_matched == key_size_); |
401 | zmq_assert (prefix_bytes_matched == current_node.prefix_length ()); |
402 | |
403 | ++_size; |
404 | current_node.set_refcount (current_node.refcount () + 1); |
405 | return current_node.refcount () == 1; |
406 | } |
407 | |
408 | bool zmq::radix_tree_t::rm (const unsigned char *key_, size_t key_size_) |
409 | { |
410 | match_result_t match_result = match (key_, key_size_); |
411 | size_t key_bytes_matched = match_result._key_bytes_matched; |
412 | size_t prefix_bytes_matched = match_result._prefix_bytes_matched; |
413 | size_t edge_index = match_result._edge_index; |
414 | size_t parent_edge_index = match_result._parent_edge_index; |
415 | node_t current_node = match_result._current_node; |
416 | node_t parent_node = match_result._parent_node; |
417 | node_t grandparent_node = match_result._grandparent_node; |
418 | |
419 | if (key_bytes_matched != key_size_ |
420 | || prefix_bytes_matched != current_node.prefix_length () |
421 | || current_node.refcount () == 0) |
422 | return false; |
423 | |
424 | current_node.set_refcount (current_node.refcount () - 1); |
425 | --_size; |
426 | if (current_node.refcount () > 0) |
427 | return false; |
428 | |
429 | // Don't delete the root node. |
430 | if (current_node == _root) |
431 | return true; |
432 | |
433 | size_t outgoing_edges = current_node.edgecount (); |
434 | if (outgoing_edges > 1) |
435 | // This node can't be merged with any other node, so there's |
436 | // nothing more to do. |
437 | return true; |
438 | |
439 | if (outgoing_edges == 1) { |
440 | // Merge this node with the single child node. |
441 | node_t child = current_node.node_at (0); |
442 | |
443 | // Make room for the child node's prefix and edges. We need to |
444 | // keep the old prefix length since resize() will overwrite |
445 | // it. |
446 | uint32_t old_prefix_length = current_node.prefix_length (); |
447 | current_node.resize (old_prefix_length + child.prefix_length (), |
448 | child.edgecount ()); |
449 | |
450 | // Append the child node's prefix to the current node. |
451 | memcpy (current_node.prefix () + old_prefix_length, child.prefix (), |
452 | child.prefix_length ()); |
453 | |
454 | // Copy the rest of child node's data to the current node. |
455 | current_node.set_first_bytes (child.first_bytes ()); |
456 | current_node.set_node_pointers (child.node_pointers ()); |
457 | current_node.set_refcount (child.refcount ()); |
458 | |
459 | free (child._data); |
460 | parent_node.set_node_at (edge_index, current_node); |
461 | return true; |
462 | } |
463 | |
464 | if (parent_node.edgecount () == 2 && parent_node.refcount () == 0 |
465 | && parent_node != _root) { |
466 | // Removing this node leaves the parent with one child. |
467 | // If the parent doesn't hold a key or if it isn't the root, |
468 | // we can merge it with its single child node. |
469 | zmq_assert (edge_index < 2); |
470 | node_t other_child = parent_node.node_at (!edge_index); |
471 | |
472 | // Make room for the child node's prefix and edges. We need to |
473 | // keep the old prefix length since resize() will overwrite |
474 | // it. |
475 | uint32_t old_prefix_length = parent_node.prefix_length (); |
476 | parent_node.resize (old_prefix_length + other_child.prefix_length (), |
477 | other_child.edgecount ()); |
478 | |
479 | // Append the child node's prefix to the current node. |
480 | memcpy (parent_node.prefix () + old_prefix_length, |
481 | other_child.prefix (), other_child.prefix_length ()); |
482 | |
483 | // Copy the rest of child node's data to the current node. |
484 | parent_node.set_first_bytes (other_child.first_bytes ()); |
485 | parent_node.set_node_pointers (other_child.node_pointers ()); |
486 | parent_node.set_refcount (other_child.refcount ()); |
487 | |
488 | free (current_node._data); |
489 | free (other_child._data); |
490 | grandparent_node.set_node_at (parent_edge_index, parent_node); |
491 | return true; |
492 | } |
493 | |
494 | // This is a leaf node that doesn't leave its parent with one |
495 | // outgoing edge. Remove the outgoing edge to this node from the |
496 | // parent. |
497 | zmq_assert (outgoing_edges == 0); |
498 | |
499 | // Replace the edge to the current node with the last edge. An |
500 | // edge consists of a byte and a pointer to the next node. First |
501 | // replace the byte. |
502 | size_t last_index = parent_node.edgecount () - 1; |
503 | unsigned char last_byte = parent_node.first_byte_at (last_index); |
504 | node_t last_node = parent_node.node_at (last_index); |
505 | parent_node.set_edge_at (edge_index, last_byte, last_node); |
506 | |
507 | // Move the chunk of pointers one byte to the left, effectively |
508 | // deleting the last byte in the region of first bytes by |
509 | // overwriting it. |
510 | memmove (parent_node.node_pointers () - 1, parent_node.node_pointers (), |
511 | parent_node.edgecount () * sizeof (void *)); |
512 | |
513 | // Shrink the parent node to the new size, which "deletes" the |
514 | // last pointer in the chunk of node pointers. |
515 | parent_node.resize (parent_node.prefix_length (), |
516 | parent_node.edgecount () - 1); |
517 | |
518 | // Nothing points to this node now, so we can reclaim it. |
519 | free (current_node._data); |
520 | |
521 | if (parent_node.prefix_length () == 0) |
522 | _root._data = parent_node._data; |
523 | else |
524 | grandparent_node.set_node_at (parent_edge_index, parent_node); |
525 | return true; |
526 | } |
527 | |
528 | bool zmq::radix_tree_t::check (const unsigned char *key_, size_t key_size_) |
529 | { |
530 | if (_root.refcount () > 0) |
531 | return true; |
532 | |
533 | match_result_t match_result = match (key_, key_size_, true); |
534 | return match_result._key_bytes_matched == key_size_ |
535 | && match_result._prefix_bytes_matched |
536 | == match_result._current_node.prefix_length () |
537 | && match_result._current_node.refcount () > 0; |
538 | } |
539 | |
540 | static void |
541 | visit_keys (node_t node_, |
542 | std::vector<unsigned char> &buffer_, |
543 | void (*func_) (unsigned char *data_, size_t size_, void *arg_), |
544 | void *arg_) |
545 | { |
546 | for (size_t i = 0; i < node_.prefix_length (); ++i) |
547 | buffer_.push_back (node_.prefix ()[i]); |
548 | |
549 | if (node_.refcount () > 0) { |
550 | zmq_assert (!buffer_.empty ()); |
551 | func_ (&buffer_[0], buffer_.size (), arg_); |
552 | } |
553 | |
554 | for (size_t i = 0; i < node_.edgecount (); ++i) |
555 | visit_keys (node_.node_at (i), buffer_, func_, arg_); |
556 | for (size_t i = 0; i < node_.prefix_length (); ++i) |
557 | buffer_.pop_back (); |
558 | } |
559 | |
560 | void zmq::radix_tree_t::apply ( |
561 | void (*func_) (unsigned char *data_, size_t size_, void *arg_), void *arg_) |
562 | { |
563 | if (_root.refcount () > 0) |
564 | func_ (NULL, 0, arg_); // Root node is always empty. |
565 | |
566 | std::vector<unsigned char> buffer; |
567 | for (size_t i = 0; i < _root.edgecount (); ++i) |
568 | visit_keys (_root.node_at (i), buffer, func_, arg_); |
569 | } |
570 | |
571 | size_t zmq::radix_tree_t::size () const |
572 | { |
573 | return _size; |
574 | } |
575 | |