1/*
2 Copyright (c) 2018 Contributors as noted in the AUTHORS file
3
4 This file is part of libzmq, the ZeroMQ core engine in C++.
5
6 libzmq is free software; you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 As a special exception, the Contributors give you permission to link
12 this library with independent modules to produce an executable,
13 regardless of the license terms of these independent modules, and to
14 copy and distribute the resulting executable under terms of your choice,
15 provided that you also meet, for each linked independent module, the
16 terms and conditions of the license of that module. An independent
17 module is a module which is not derived from or based on this library.
18 If you modify this library, you must extend this exception to your
19 version of the library.
20
21 libzmq is distributed in the hope that it will be useful, but WITHOUT
22 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
24 License for more details.
25
26 You should have received a copy of the GNU Lesser General Public License
27 along with this program. If not, see <http://www.gnu.org/licenses/>.
28*/
29
30#include "precompiled.hpp"
31#include "macros.hpp"
32#include "err.hpp"
33#include "radix_tree.hpp"
34
35#include <stdlib.h>
36#include <string.h>
37#include <vector>
38
39node_t::node_t (unsigned char *data_) : _data (data_)
40{
41}
42
43uint32_t node_t::refcount ()
44{
45 uint32_t u32;
46 memcpy (&u32, _data, sizeof (u32));
47 return u32;
48}
49
50void node_t::set_refcount (uint32_t value_)
51{
52 memcpy (_data, &value_, sizeof (value_));
53}
54
55uint32_t node_t::prefix_length ()
56{
57 uint32_t u32;
58 memcpy (&u32, _data + sizeof (uint32_t), sizeof (u32));
59 return u32;
60}
61
62void node_t::set_prefix_length (uint32_t value_)
63{
64 memcpy (_data + sizeof (value_), &value_, sizeof (value_));
65}
66
67uint32_t node_t::edgecount ()
68{
69 uint32_t u32;
70 memcpy (&u32, _data + 2 * sizeof (uint32_t), sizeof (u32));
71 return u32;
72}
73
74void node_t::set_edgecount (uint32_t value_)
75{
76 memcpy (_data + 2 * sizeof (value_), &value_, sizeof (value_));
77}
78
79unsigned char *node_t::prefix ()
80{
81 return _data + 3 * sizeof (uint32_t);
82}
83
84void node_t::set_prefix (const unsigned char *bytes_)
85{
86 memcpy (prefix (), bytes_, prefix_length ());
87}
88
89unsigned char *node_t::first_bytes ()
90{
91 return prefix () + prefix_length ();
92}
93
94void node_t::set_first_bytes (const unsigned char *bytes_)
95{
96 memcpy (first_bytes (), bytes_, edgecount ());
97}
98
99unsigned char node_t::first_byte_at (size_t index_)
100{
101 zmq_assert (index_ < edgecount ());
102 return first_bytes ()[index_];
103}
104
105void node_t::set_first_byte_at (size_t index_, unsigned char byte_)
106{
107 zmq_assert (index_ < edgecount ());
108 first_bytes ()[index_] = byte_;
109}
110
111unsigned char *node_t::node_pointers ()
112{
113 return prefix () + prefix_length () + edgecount ();
114}
115
116void node_t::set_node_pointers (const unsigned char *pointers_)
117{
118 memcpy (node_pointers (), pointers_, edgecount () * sizeof (void *));
119}
120
121node_t node_t::node_at (size_t index_)
122{
123 zmq_assert (index_ < edgecount ());
124
125 unsigned char *data;
126 memcpy (&data, node_pointers () + index_ * sizeof (void *), sizeof (data));
127 return node_t (data);
128}
129
130void node_t::set_node_at (size_t index_, node_t node_)
131{
132 zmq_assert (index_ < edgecount ());
133 memcpy (node_pointers () + index_ * sizeof (void *), &node_._data,
134 sizeof (node_._data));
135}
136
137void node_t::set_edge_at (size_t index_,
138 unsigned char first_byte_,
139 node_t node_)
140{
141 set_first_byte_at (index_, first_byte_);
142 set_node_at (index_, node_);
143}
144
145bool node_t::operator== (node_t other_) const
146{
147 return _data == other_._data;
148}
149
150bool node_t::operator!= (node_t other_) const
151{
152 return !(*this == other_);
153}
154
155void node_t::resize (size_t prefix_length_, size_t edgecount_)
156{
157 size_t node_size = 3 * sizeof (uint32_t) + prefix_length_
158 + edgecount_ * (1 + sizeof (void *));
159 unsigned char *new_data =
160 static_cast<unsigned char *> (realloc (_data, node_size));
161 zmq_assert (new_data);
162 _data = new_data;
163 set_prefix_length (static_cast<uint32_t> (prefix_length_));
164 set_edgecount (static_cast<uint32_t> (edgecount_));
165}
166
167node_t make_node (size_t refcount_, size_t prefix_length_, size_t edgecount_)
168{
169 size_t node_size = 3 * sizeof (uint32_t) + prefix_length_
170 + edgecount_ * (1 + sizeof (void *));
171
172 unsigned char *data = static_cast<unsigned char *> (malloc (node_size));
173 zmq_assert (data);
174
175 node_t node (data);
176 node.set_refcount (static_cast<uint32_t> (refcount_));
177 node.set_prefix_length (static_cast<uint32_t> (prefix_length_));
178 node.set_edgecount (static_cast<uint32_t> (edgecount_));
179 return node;
180}
181
182// ----------------------------------------------------------------------
183
184zmq::radix_tree_t::radix_tree_t () : _root (make_node (0, 0, 0)), _size (0)
185{
186}
187
188static void free_nodes (node_t node_)
189{
190 for (size_t i = 0; i < node_.edgecount (); ++i)
191 free_nodes (node_.node_at (i));
192 free (node_._data);
193}
194
195zmq::radix_tree_t::~radix_tree_t ()
196{
197 free_nodes (_root);
198}
199
200match_result_t::match_result_t (size_t key_bytes_matched_,
201 size_t prefix_bytes_matched_,
202 size_t edge_index_,
203 size_t parent_edge_index_,
204 node_t current_,
205 node_t parent_,
206 node_t grandparent_) :
207 _key_bytes_matched (key_bytes_matched_),
208 _prefix_bytes_matched (prefix_bytes_matched_),
209 _edge_index (edge_index_),
210 _parent_edge_index (parent_edge_index_),
211 _current_node (current_),
212 _parent_node (parent_),
213 _grandparent_node (grandparent_)
214{
215}
216
217match_result_t zmq::radix_tree_t::match (const unsigned char *key_,
218 size_t key_size_,
219 bool is_lookup_ = false) const
220{
221 zmq_assert (key_);
222
223 // Node we're currently at in the traversal and its predecessors.
224 node_t current_node = _root;
225 node_t parent_node = current_node;
226 node_t grandparent_node = current_node;
227 // Index of the next byte to match in the key.
228 size_t key_byte_index = 0;
229 // Index of the next byte to match in the current node's prefix.
230 size_t prefix_byte_index = 0;
231 // Index of the edge from parent to current node.
232 size_t edge_index = 0;
233 // Index of the edge from grandparent to parent.
234 size_t parent_edge_index = 0;
235
236 while (current_node.prefix_length () > 0 || current_node.edgecount () > 0) {
237 for (prefix_byte_index = 0;
238 prefix_byte_index < current_node.prefix_length ()
239 && key_byte_index < key_size_;
240 ++prefix_byte_index, ++key_byte_index) {
241 if (current_node.prefix ()[prefix_byte_index]
242 != key_[key_byte_index])
243 break;
244 }
245
246 // Even if a prefix of the key matches and we're doing a
247 // lookup, this means we've found a matching subscription.
248 if (is_lookup_ && prefix_byte_index == current_node.prefix_length ()
249 && current_node.refcount () > 0) {
250 key_byte_index = key_size_;
251 break;
252 }
253
254 // There was a mismatch or we've matched the whole key, so
255 // there's nothing more to do.
256 if (prefix_byte_index != current_node.prefix_length ()
257 || key_byte_index == key_size_)
258 break;
259
260 // We need to match the rest of the key. Check if there's an
261 // outgoing edge from this node.
262 node_t next_node = current_node;
263 for (size_t i = 0; i < current_node.edgecount (); ++i) {
264 if (current_node.first_byte_at (i) == key_[key_byte_index]) {
265 parent_edge_index = edge_index;
266 edge_index = i;
267 next_node = current_node.node_at (i);
268 break;
269 }
270 }
271
272 if (next_node == current_node)
273 break; // No outgoing edge.
274 grandparent_node = parent_node;
275 parent_node = current_node;
276 current_node = next_node;
277 }
278
279 return match_result_t (key_byte_index, prefix_byte_index, edge_index,
280 parent_edge_index, current_node, parent_node,
281 grandparent_node);
282}
283
284bool zmq::radix_tree_t::add (const unsigned char *key_, size_t key_size_)
285{
286 match_result_t match_result = match (key_, key_size_);
287 size_t key_bytes_matched = match_result._key_bytes_matched;
288 size_t prefix_bytes_matched = match_result._prefix_bytes_matched;
289 size_t edge_index = match_result._edge_index;
290 node_t current_node = match_result._current_node;
291 node_t parent_node = match_result._parent_node;
292
293 if (key_bytes_matched != key_size_) {
294 // Not all characters match, we might have to split the node.
295 if (prefix_bytes_matched == current_node.prefix_length ()) {
296 // The mismatch is at one of the outgoing edges, so we
297 // create an edge from the current node to a new leaf node
298 // that has the rest of the key as the prefix.
299 node_t key_node = make_node (1, key_size_ - key_bytes_matched, 0);
300 key_node.set_prefix (key_ + key_bytes_matched);
301
302 // Reallocate for one more edge.
303 current_node.resize (current_node.prefix_length (),
304 current_node.edgecount () + 1);
305
306 // Make room for the new edge. We need to shift the chunk
307 // of node pointers one byte to the right. Since resize()
308 // increments the edgecount by 1, node_pointers() tells us the
309 // destination address. The chunk of node pointers starts
310 // at one byte to the left of this destination.
311 //
312 // Since the regions can overlap, we use memmove.
313 memmove (current_node.node_pointers (),
314 current_node.node_pointers () - 1,
315 (current_node.edgecount () - 1) * sizeof (void *));
316
317 // Add an edge to the new node.
318 current_node.set_edge_at (current_node.edgecount () - 1,
319 key_[key_bytes_matched], key_node);
320
321 // We need to update all pointers to the current node
322 // after the call to resize().
323 if (current_node.prefix_length () == 0)
324 _root._data = current_node._data;
325 else
326 parent_node.set_node_at (edge_index, current_node);
327 ++_size;
328 return true;
329 }
330
331 // There was a mismatch, so we need to split this node.
332 //
333 // Create two nodes that will be reachable from the parent.
334 // One node will have the rest of the characters from the key,
335 // and the other node will have the rest of the characters
336 // from the current node's prefix.
337 node_t key_node = make_node (1, key_size_ - key_bytes_matched, 0);
338 node_t split_node =
339 make_node (current_node.refcount (),
340 current_node.prefix_length () - prefix_bytes_matched,
341 current_node.edgecount ());
342
343 // Copy the prefix chunks to the new nodes.
344 key_node.set_prefix (key_ + key_bytes_matched);
345 split_node.set_prefix (current_node.prefix () + prefix_bytes_matched);
346
347 // Copy the current node's edges to the new node.
348 split_node.set_first_bytes (current_node.first_bytes ());
349 split_node.set_node_pointers (current_node.node_pointers ());
350
351 // Resize the current node to accommodate a prefix comprising
352 // the matched characters and 2 outgoing edges to the above
353 // nodes. Set the refcount to 0 since this node doesn't hold a
354 // key.
355 current_node.resize (prefix_bytes_matched, 2);
356 current_node.set_refcount (0);
357
358 // Add links to the new nodes. We don't need to copy the
359 // prefix since resize() retains it in the current node.
360 current_node.set_edge_at (0, key_node.prefix ()[0], key_node);
361 current_node.set_edge_at (1, split_node.prefix ()[0], split_node);
362
363 ++_size;
364 parent_node.set_node_at (edge_index, current_node);
365 return true;
366 }
367
368 // All characters in the key match, but we still might need to split.
369 if (prefix_bytes_matched != current_node.prefix_length ()) {
370 // All characters in the key match, but not all characters
371 // from the current node's prefix match.
372
373 // Create a node that contains the rest of the characters from
374 // the current node's prefix and the outgoing edges from the
375 // current node.
376 node_t split_node =
377 make_node (current_node.refcount (),
378 current_node.prefix_length () - prefix_bytes_matched,
379 current_node.edgecount ());
380 split_node.set_prefix (current_node.prefix () + prefix_bytes_matched);
381 split_node.set_first_bytes (current_node.first_bytes ());
382 split_node.set_node_pointers (current_node.node_pointers ());
383
384 // Resize the current node to hold only the matched characters
385 // from its prefix and one edge to the new node.
386 current_node.resize (prefix_bytes_matched, 1);
387
388 // Add an edge to the split node and set the refcount to 1
389 // since this key wasn't inserted earlier. We don't need to
390 // set the prefix because the first `prefix_bytes_matched` bytes
391 // in the prefix are preserved by resize().
392 current_node.set_edge_at (0, split_node.prefix ()[0], split_node);
393 current_node.set_refcount (1);
394
395 ++_size;
396 parent_node.set_node_at (edge_index, current_node);
397 return true;
398 }
399
400 zmq_assert (key_bytes_matched == key_size_);
401 zmq_assert (prefix_bytes_matched == current_node.prefix_length ());
402
403 ++_size;
404 current_node.set_refcount (current_node.refcount () + 1);
405 return current_node.refcount () == 1;
406}
407
408bool zmq::radix_tree_t::rm (const unsigned char *key_, size_t key_size_)
409{
410 match_result_t match_result = match (key_, key_size_);
411 size_t key_bytes_matched = match_result._key_bytes_matched;
412 size_t prefix_bytes_matched = match_result._prefix_bytes_matched;
413 size_t edge_index = match_result._edge_index;
414 size_t parent_edge_index = match_result._parent_edge_index;
415 node_t current_node = match_result._current_node;
416 node_t parent_node = match_result._parent_node;
417 node_t grandparent_node = match_result._grandparent_node;
418
419 if (key_bytes_matched != key_size_
420 || prefix_bytes_matched != current_node.prefix_length ()
421 || current_node.refcount () == 0)
422 return false;
423
424 current_node.set_refcount (current_node.refcount () - 1);
425 --_size;
426 if (current_node.refcount () > 0)
427 return false;
428
429 // Don't delete the root node.
430 if (current_node == _root)
431 return true;
432
433 size_t outgoing_edges = current_node.edgecount ();
434 if (outgoing_edges > 1)
435 // This node can't be merged with any other node, so there's
436 // nothing more to do.
437 return true;
438
439 if (outgoing_edges == 1) {
440 // Merge this node with the single child node.
441 node_t child = current_node.node_at (0);
442
443 // Make room for the child node's prefix and edges. We need to
444 // keep the old prefix length since resize() will overwrite
445 // it.
446 uint32_t old_prefix_length = current_node.prefix_length ();
447 current_node.resize (old_prefix_length + child.prefix_length (),
448 child.edgecount ());
449
450 // Append the child node's prefix to the current node.
451 memcpy (current_node.prefix () + old_prefix_length, child.prefix (),
452 child.prefix_length ());
453
454 // Copy the rest of child node's data to the current node.
455 current_node.set_first_bytes (child.first_bytes ());
456 current_node.set_node_pointers (child.node_pointers ());
457 current_node.set_refcount (child.refcount ());
458
459 free (child._data);
460 parent_node.set_node_at (edge_index, current_node);
461 return true;
462 }
463
464 if (parent_node.edgecount () == 2 && parent_node.refcount () == 0
465 && parent_node != _root) {
466 // Removing this node leaves the parent with one child.
467 // If the parent doesn't hold a key or if it isn't the root,
468 // we can merge it with its single child node.
469 zmq_assert (edge_index < 2);
470 node_t other_child = parent_node.node_at (!edge_index);
471
472 // Make room for the child node's prefix and edges. We need to
473 // keep the old prefix length since resize() will overwrite
474 // it.
475 uint32_t old_prefix_length = parent_node.prefix_length ();
476 parent_node.resize (old_prefix_length + other_child.prefix_length (),
477 other_child.edgecount ());
478
479 // Append the child node's prefix to the current node.
480 memcpy (parent_node.prefix () + old_prefix_length,
481 other_child.prefix (), other_child.prefix_length ());
482
483 // Copy the rest of child node's data to the current node.
484 parent_node.set_first_bytes (other_child.first_bytes ());
485 parent_node.set_node_pointers (other_child.node_pointers ());
486 parent_node.set_refcount (other_child.refcount ());
487
488 free (current_node._data);
489 free (other_child._data);
490 grandparent_node.set_node_at (parent_edge_index, parent_node);
491 return true;
492 }
493
494 // This is a leaf node that doesn't leave its parent with one
495 // outgoing edge. Remove the outgoing edge to this node from the
496 // parent.
497 zmq_assert (outgoing_edges == 0);
498
499 // Replace the edge to the current node with the last edge. An
500 // edge consists of a byte and a pointer to the next node. First
501 // replace the byte.
502 size_t last_index = parent_node.edgecount () - 1;
503 unsigned char last_byte = parent_node.first_byte_at (last_index);
504 node_t last_node = parent_node.node_at (last_index);
505 parent_node.set_edge_at (edge_index, last_byte, last_node);
506
507 // Move the chunk of pointers one byte to the left, effectively
508 // deleting the last byte in the region of first bytes by
509 // overwriting it.
510 memmove (parent_node.node_pointers () - 1, parent_node.node_pointers (),
511 parent_node.edgecount () * sizeof (void *));
512
513 // Shrink the parent node to the new size, which "deletes" the
514 // last pointer in the chunk of node pointers.
515 parent_node.resize (parent_node.prefix_length (),
516 parent_node.edgecount () - 1);
517
518 // Nothing points to this node now, so we can reclaim it.
519 free (current_node._data);
520
521 if (parent_node.prefix_length () == 0)
522 _root._data = parent_node._data;
523 else
524 grandparent_node.set_node_at (parent_edge_index, parent_node);
525 return true;
526}
527
528bool zmq::radix_tree_t::check (const unsigned char *key_, size_t key_size_)
529{
530 if (_root.refcount () > 0)
531 return true;
532
533 match_result_t match_result = match (key_, key_size_, true);
534 return match_result._key_bytes_matched == key_size_
535 && match_result._prefix_bytes_matched
536 == match_result._current_node.prefix_length ()
537 && match_result._current_node.refcount () > 0;
538}
539
540static void
541visit_keys (node_t node_,
542 std::vector<unsigned char> &buffer_,
543 void (*func_) (unsigned char *data_, size_t size_, void *arg_),
544 void *arg_)
545{
546 for (size_t i = 0; i < node_.prefix_length (); ++i)
547 buffer_.push_back (node_.prefix ()[i]);
548
549 if (node_.refcount () > 0) {
550 zmq_assert (!buffer_.empty ());
551 func_ (&buffer_[0], buffer_.size (), arg_);
552 }
553
554 for (size_t i = 0; i < node_.edgecount (); ++i)
555 visit_keys (node_.node_at (i), buffer_, func_, arg_);
556 for (size_t i = 0; i < node_.prefix_length (); ++i)
557 buffer_.pop_back ();
558}
559
560void zmq::radix_tree_t::apply (
561 void (*func_) (unsigned char *data_, size_t size_, void *arg_), void *arg_)
562{
563 if (_root.refcount () > 0)
564 func_ (NULL, 0, arg_); // Root node is always empty.
565
566 std::vector<unsigned char> buffer;
567 for (size_t i = 0; i < _root.edgecount (); ++i)
568 visit_keys (_root.node_at (i), buffer, func_, arg_);
569}
570
571size_t zmq::radix_tree_t::size () const
572{
573 return _size;
574}
575