1/*
2 * Copyright © 2022 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Garret Rieger
25 */
26
27#include "../hb-set.hh"
28#include "../hb-priority-queue.hh"
29#include "../hb-serialize.hh"
30
31#ifndef GRAPH_GRAPH_HH
32#define GRAPH_GRAPH_HH
33
34namespace graph {
35
36/**
37 * Represents a serialized table in the form of a graph.
38 * Provides methods for modifying and reordering the graph.
39 */
40struct graph_t
41{
42 struct vertex_t
43 {
44 hb_serialize_context_t::object_t obj;
45 int64_t distance = 0 ;
46 unsigned space = 0 ;
47 unsigned start = 0;
48 unsigned end = 0;
49 unsigned priority = 0;
50 private:
51 unsigned incoming_edges_ = 0;
52 unsigned single_parent = (unsigned) -1;
53 hb_hashmap_t<unsigned, unsigned> parents;
54 public:
55
56 auto parents_iter () const HB_AUTO_RETURN
57 (
58 hb_concat (
59 hb_iter (&single_parent, single_parent != (unsigned) -1),
60 parents.keys_ref ()
61 )
62 )
63
64 bool in_error () const
65 {
66 return parents.in_error ();
67 }
68
69 bool link_positions_valid (unsigned num_objects, bool removed_nil)
70 {
71 hb_set_t assigned_bytes;
72 for (const auto& l : obj.real_links)
73 {
74 if (l.objidx >= num_objects
75 || (removed_nil && !l.objidx))
76 {
77 DEBUG_MSG (SUBSET_REPACK, nullptr,
78 "Invalid graph. Invalid object index.");
79 return false;
80 }
81
82 unsigned start = l.position;
83 unsigned end = start + l.width - 1;
84
85 if (unlikely (l.width < 2 || l.width > 4))
86 {
87 DEBUG_MSG (SUBSET_REPACK, nullptr,
88 "Invalid graph. Invalid link width.");
89 return false;
90 }
91
92 if (unlikely (end >= table_size ()))
93 {
94 DEBUG_MSG (SUBSET_REPACK, nullptr,
95 "Invalid graph. Link position is out of bounds.");
96 return false;
97 }
98
99 if (unlikely (assigned_bytes.intersects (start, end)))
100 {
101 DEBUG_MSG (SUBSET_REPACK, nullptr,
102 "Invalid graph. Found offsets whose positions overlap.");
103 return false;
104 }
105
106 assigned_bytes.add_range (start, end);
107 }
108
109 return !assigned_bytes.in_error ();
110 }
111
112 void normalize ()
113 {
114 obj.real_links.qsort ();
115 for (auto& l : obj.real_links)
116 {
117 for (unsigned i = 0; i < l.width; i++)
118 {
119 obj.head[l.position + i] = 0;
120 }
121 }
122 }
123
124 bool equals (const vertex_t& other,
125 const graph_t& graph,
126 const graph_t& other_graph,
127 unsigned depth) const
128 {
129 if (!(as_bytes () == other.as_bytes ()))
130 {
131 DEBUG_MSG (SUBSET_REPACK, nullptr,
132 "vertex [%lu] bytes != [%lu] bytes, depth = %u",
133 (unsigned long) table_size (),
134 (unsigned long) other.table_size (),
135 depth);
136
137 auto a = as_bytes ();
138 auto b = other.as_bytes ();
139 while (a || b)
140 {
141 DEBUG_MSG (SUBSET_REPACK, nullptr,
142 " 0x%x %s 0x%x", (unsigned) *a, (*a == *b) ? "==" : "!=", (unsigned) *b);
143 a++;
144 b++;
145 }
146 return false;
147 }
148
149 return links_equal (obj.real_links, other.obj.real_links, graph, other_graph, depth);
150 }
151
152 hb_bytes_t as_bytes () const
153 {
154 return hb_bytes_t (obj.head, table_size ());
155 }
156
157 friend void swap (vertex_t& a, vertex_t& b)
158 {
159 hb_swap (a.obj, b.obj);
160 hb_swap (a.distance, b.distance);
161 hb_swap (a.space, b.space);
162 hb_swap (a.single_parent, b.single_parent);
163 hb_swap (a.parents, b.parents);
164 hb_swap (a.incoming_edges_, b.incoming_edges_);
165 hb_swap (a.start, b.start);
166 hb_swap (a.end, b.end);
167 hb_swap (a.priority, b.priority);
168 }
169
170 hb_hashmap_t<unsigned, unsigned>
171 position_to_index_map () const
172 {
173 hb_hashmap_t<unsigned, unsigned> result;
174
175 result.alloc (obj.real_links.length);
176 for (const auto& l : obj.real_links) {
177 result.set (l.position, l.objidx);
178 }
179
180 return result;
181 }
182
183 bool is_shared () const
184 {
185 return parents.get_population () > 1;
186 }
187
188 unsigned incoming_edges () const
189 {
190 if (HB_DEBUG_SUBSET_REPACK)
191 {
192 assert (incoming_edges_ == (single_parent != (unsigned) -1) +
193 (parents.values_ref () | hb_reduce (hb_add, 0)));
194 }
195 return incoming_edges_;
196 }
197
198 void reset_parents ()
199 {
200 incoming_edges_ = 0;
201 single_parent = (unsigned) -1;
202 parents.reset ();
203 }
204
205 void add_parent (unsigned parent_index)
206 {
207 assert (parent_index != (unsigned) -1);
208 if (incoming_edges_ == 0)
209 {
210 single_parent = parent_index;
211 incoming_edges_ = 1;
212 return;
213 }
214 else if (single_parent != (unsigned) -1)
215 {
216 assert (incoming_edges_ == 1);
217 if (!parents.set (single_parent, 1))
218 return;
219 single_parent = (unsigned) -1;
220 }
221
222 unsigned *v;
223 if (parents.has (parent_index, &v))
224 {
225 (*v)++;
226 incoming_edges_++;
227 }
228 else if (parents.set (parent_index, 1))
229 incoming_edges_++;
230 }
231
232 void remove_parent (unsigned parent_index)
233 {
234 if (parent_index == single_parent)
235 {
236 single_parent = (unsigned) -1;
237 incoming_edges_--;
238 return;
239 }
240
241 unsigned *v;
242 if (parents.has (parent_index, &v))
243 {
244 incoming_edges_--;
245 if (*v > 1)
246 (*v)--;
247 else
248 parents.del (parent_index);
249
250 if (incoming_edges_ == 1)
251 {
252 single_parent = *parents.keys ();
253 parents.reset ();
254 }
255 }
256 }
257
258 void remove_real_link (unsigned child_index, const void* offset)
259 {
260 unsigned count = obj.real_links.length;
261 for (unsigned i = 0; i < count; i++)
262 {
263 auto& link = obj.real_links.arrayZ[i];
264 if (link.objidx != child_index)
265 continue;
266
267 if ((obj.head + link.position) != offset)
268 continue;
269
270 obj.real_links.remove_unordered (i);
271 return;
272 }
273 }
274
275 bool remap_parents (const hb_vector_t<unsigned>& id_map)
276 {
277 if (single_parent != (unsigned) -1)
278 {
279 assert (single_parent < id_map.length);
280 single_parent = id_map[single_parent];
281 return true;
282 }
283
284 hb_hashmap_t<unsigned, unsigned> new_parents;
285 new_parents.alloc (parents.get_population ());
286 for (auto _ : parents)
287 {
288 assert (_.first < id_map.length);
289 assert (!new_parents.has (id_map[_.first]));
290 new_parents.set (id_map[_.first], _.second);
291 }
292
293 if (new_parents.in_error ())
294 return false;
295
296 parents = std::move (new_parents);
297 return true;
298 }
299
300 void remap_parent (unsigned old_index, unsigned new_index)
301 {
302 if (single_parent != (unsigned) -1)
303 {
304 if (single_parent == old_index)
305 single_parent = new_index;
306 return;
307 }
308
309 const unsigned *pv;
310 if (parents.has (old_index, &pv))
311 {
312 unsigned v = *pv;
313 parents.set (new_index, v);
314 parents.del (old_index);
315 }
316 }
317
318 bool is_leaf () const
319 {
320 return !obj.real_links.length && !obj.virtual_links.length;
321 }
322
323 bool raise_priority ()
324 {
325 if (has_max_priority ()) return false;
326 priority++;
327 return true;
328 }
329
330 bool has_max_priority () const {
331 return priority >= 3;
332 }
333
334 size_t table_size () const {
335 return obj.tail - obj.head;
336 }
337
338 int64_t modified_distance (unsigned order) const
339 {
340 // TODO(garretrieger): once priority is high enough, should try
341 // setting distance = 0 which will force to sort immediately after
342 // it's parent where possible.
343
344 int64_t modified_distance =
345 hb_min (hb_max(distance + distance_modifier (), 0), 0x7FFFFFFFFFF);
346 if (has_max_priority ()) {
347 modified_distance = 0;
348 }
349 return (modified_distance << 18) | (0x003FFFF & order);
350 }
351
352 int64_t distance_modifier () const
353 {
354 if (!priority) return 0;
355 int64_t table_size = obj.tail - obj.head;
356
357 if (priority == 1)
358 return -table_size / 2;
359
360 return -table_size;
361 }
362
363 private:
364 bool links_equal (const hb_vector_t<hb_serialize_context_t::object_t::link_t>& this_links,
365 const hb_vector_t<hb_serialize_context_t::object_t::link_t>& other_links,
366 const graph_t& graph,
367 const graph_t& other_graph,
368 unsigned depth) const
369 {
370 auto a = this_links.iter ();
371 auto b = other_links.iter ();
372
373 while (a && b)
374 {
375 const auto& link_a = *a;
376 const auto& link_b = *b;
377
378 if (link_a.width != link_b.width ||
379 link_a.is_signed != link_b.is_signed ||
380 link_a.whence != link_b.whence ||
381 link_a.position != link_b.position ||
382 link_a.bias != link_b.bias)
383 return false;
384
385 if (!graph.vertices_[link_a.objidx].equals (
386 other_graph.vertices_[link_b.objidx], graph, other_graph, depth + 1))
387 return false;
388
389 a++;
390 b++;
391 }
392
393 if (bool (a) != bool (b))
394 return false;
395
396 return true;
397 }
398 };
399
400 template <typename T>
401 struct vertex_and_table_t
402 {
403 vertex_and_table_t () : index (0), vertex (nullptr), table (nullptr)
404 {}
405
406 unsigned index;
407 vertex_t* vertex;
408 T* table;
409
410 operator bool () {
411 return table && vertex;
412 }
413 };
414
415 /*
416 * A topological sorting of an object graph. Ordered
417 * in reverse serialization order (first object in the
418 * serialization is at the end of the list). This matches
419 * the 'packed' object stack used internally in the
420 * serializer
421 */
422 template<typename T>
423 graph_t (const T& objects)
424 : parents_invalid (true),
425 distance_invalid (true),
426 positions_invalid (true),
427 successful (true),
428 buffers ()
429 {
430 num_roots_for_space_.push (1);
431 bool removed_nil = false;
432 vertices_.alloc (objects.length);
433 vertices_scratch_.alloc (objects.length);
434 unsigned count = objects.length;
435 for (unsigned i = 0; i < count; i++)
436 {
437 // If this graph came from a serialization buffer object 0 is the
438 // nil object. We don't need it for our purposes here so drop it.
439 if (i == 0 && !objects.arrayZ[i])
440 {
441 removed_nil = true;
442 continue;
443 }
444
445 vertex_t* v = vertices_.push ();
446 if (check_success (!vertices_.in_error ()))
447 v->obj = *objects.arrayZ[i];
448
449 check_success (v->link_positions_valid (count, removed_nil));
450
451 if (!removed_nil) continue;
452 // Fix indices to account for removed nil object.
453 for (auto& l : v->obj.all_links_writer ()) {
454 l.objidx--;
455 }
456 }
457 }
458
459 ~graph_t ()
460 {
461 for (char* b : buffers)
462 hb_free (b);
463 }
464
465 bool operator== (const graph_t& other) const
466 {
467 return root ().equals (other.root (), *this, other, 0);
468 }
469
470 // Sorts links of all objects in a consistent manner and zeroes all offsets.
471 void normalize ()
472 {
473 for (auto& v : vertices_.writer ())
474 v.normalize ();
475 }
476
477 bool in_error () const
478 {
479 return !successful ||
480 vertices_.in_error () ||
481 num_roots_for_space_.in_error ();
482 }
483
484 const vertex_t& root () const
485 {
486 return vertices_[root_idx ()];
487 }
488
489 unsigned root_idx () const
490 {
491 // Object graphs are in reverse order, the first object is at the end
492 // of the vector. Since the graph is topologically sorted it's safe to
493 // assume the first object has no incoming edges.
494 return vertices_.length - 1;
495 }
496
497 const hb_serialize_context_t::object_t& object (unsigned i) const
498 {
499 return vertices_[i].obj;
500 }
501
502 bool add_buffer (char* buffer)
503 {
504 buffers.push (buffer);
505 return !buffers.in_error ();
506 }
507
508 /*
509 * Adds a 16 bit link from parent_id to child_id
510 */
511 template<typename T>
512 void add_link (T* offset,
513 unsigned parent_id,
514 unsigned child_id)
515 {
516 auto& v = vertices_[parent_id];
517 auto* link = v.obj.real_links.push ();
518 link->width = 2;
519 link->objidx = child_id;
520 link->position = (char*) offset - (char*) v.obj.head;
521 vertices_[child_id].add_parent (parent_id);
522 }
523
524 /*
525 * Generates a new topological sorting of graph ordered by the shortest
526 * distance to each node if positions are marked as invalid.
527 */
528 void sort_shortest_distance_if_needed ()
529 {
530 if (!positions_invalid) return;
531 sort_shortest_distance ();
532 }
533
534
535 /*
536 * Generates a new topological sorting of graph ordered by the shortest
537 * distance to each node.
538 */
539 void sort_shortest_distance ()
540 {
541 positions_invalid = true;
542
543 if (vertices_.length <= 1) {
544 // Graph of 1 or less doesn't need sorting.
545 return;
546 }
547
548 update_distances ();
549
550 hb_priority_queue_t queue;
551 hb_vector_t<vertex_t> &sorted_graph = vertices_scratch_;
552 if (unlikely (!check_success (sorted_graph.resize (vertices_.length)))) return;
553 hb_vector_t<unsigned> id_map;
554 if (unlikely (!check_success (id_map.resize (vertices_.length)))) return;
555
556 hb_vector_t<unsigned> removed_edges;
557 if (unlikely (!check_success (removed_edges.resize (vertices_.length)))) return;
558 update_parents ();
559
560 queue.insert (root ().modified_distance (0), root_idx ());
561 int new_id = root_idx ();
562 unsigned order = 1;
563 while (!queue.in_error () && !queue.is_empty ())
564 {
565 unsigned next_id = queue.pop_minimum().second;
566
567 sorted_graph[new_id] = std::move (vertices_[next_id]);
568 const vertex_t& next = sorted_graph[new_id];
569
570 if (unlikely (!check_success(new_id >= 0))) {
571 // We are out of ids. Which means we've visited a node more than once.
572 // This graph contains a cycle which is not allowed.
573 DEBUG_MSG (SUBSET_REPACK, nullptr, "Invalid graph. Contains cycle.");
574 return;
575 }
576
577 id_map[next_id] = new_id--;
578
579 for (const auto& link : next.obj.all_links ()) {
580 removed_edges[link.objidx]++;
581 if (!(vertices_[link.objidx].incoming_edges () - removed_edges[link.objidx]))
582 // Add the order that the links were encountered to the priority.
583 // This ensures that ties between priorities objects are broken in a consistent
584 // way. More specifically this is set up so that if a set of objects have the same
585 // distance they'll be added to the topological order in the order that they are
586 // referenced from the parent object.
587 queue.insert (vertices_[link.objidx].modified_distance (order++),
588 link.objidx);
589 }
590 }
591
592 check_success (!queue.in_error ());
593 check_success (!sorted_graph.in_error ());
594
595 check_success (remap_all_obj_indices (id_map, &sorted_graph));
596 vertices_ = std::move (sorted_graph);
597
598 if (!check_success (new_id == -1))
599 print_orphaned_nodes ();
600 }
601
602 /*
603 * Finds the set of nodes (placed into roots) that should be assigned unique spaces.
604 * More specifically this looks for the top most 24 bit or 32 bit links in the graph.
605 * Some special casing is done that is specific to the layout of GSUB/GPOS tables.
606 */
607 void find_space_roots (hb_set_t& visited, hb_set_t& roots)
608 {
609 int root_index = (int) root_idx ();
610 for (int i = root_index; i >= 0; i--)
611 {
612 if (visited.has (i)) continue;
613
614 // Only real links can form 32 bit spaces
615 for (auto& l : vertices_[i].obj.real_links)
616 {
617 if (l.is_signed || l.width < 3)
618 continue;
619
620 if (i == root_index && l.width == 3)
621 // Ignore 24bit links from the root node, this skips past the single 24bit
622 // pointer to the lookup list.
623 continue;
624
625 if (l.width == 3)
626 {
627 // A 24bit offset forms a root, unless there is 32bit offsets somewhere
628 // in it's subgraph, then those become the roots instead. This is to make sure
629 // that extension subtables beneath a 24bit lookup become the spaces instead
630 // of the offset to the lookup.
631 hb_set_t sub_roots;
632 find_32bit_roots (l.objidx, sub_roots);
633 if (sub_roots) {
634 for (unsigned sub_root_idx : sub_roots) {
635 roots.add (sub_root_idx);
636 find_subgraph (sub_root_idx, visited);
637 }
638 continue;
639 }
640 }
641
642 roots.add (l.objidx);
643 find_subgraph (l.objidx, visited);
644 }
645 }
646 }
647
648 template <typename T, typename ...Ts>
649 vertex_and_table_t<T> as_table (unsigned parent, const void* offset, Ts... ds)
650 {
651 return as_table_from_index<T> (index_for_offset (parent, offset), std::forward<Ts>(ds)...);
652 }
653
654 template <typename T, typename ...Ts>
655 vertex_and_table_t<T> as_mutable_table (unsigned parent, const void* offset, Ts... ds)
656 {
657 return as_table_from_index<T> (mutable_index_for_offset (parent, offset), std::forward<Ts>(ds)...);
658 }
659
660 template <typename T, typename ...Ts>
661 vertex_and_table_t<T> as_table_from_index (unsigned index, Ts... ds)
662 {
663 if (index >= vertices_.length)
664 return vertex_and_table_t<T> ();
665
666 vertex_and_table_t<T> r;
667 r.vertex = &vertices_[index];
668 r.table = (T*) r.vertex->obj.head;
669 r.index = index;
670 if (!r.table)
671 return vertex_and_table_t<T> ();
672
673 if (!r.table->sanitize (*(r.vertex), std::forward<Ts>(ds)...))
674 return vertex_and_table_t<T> ();
675
676 return r;
677 }
678
679 // Finds the object id of the object pointed to by the offset at 'offset'
680 // within object[node_idx].
681 unsigned index_for_offset (unsigned node_idx, const void* offset) const
682 {
683 const auto& node = object (node_idx);
684 if (offset < node.head || offset >= node.tail) return -1;
685
686 unsigned count = node.real_links.length;
687 for (unsigned i = 0; i < count; i++)
688 {
689 // Use direct access for increased performance, this is a hot method.
690 const auto& link = node.real_links.arrayZ[i];
691 if (offset != node.head + link.position)
692 continue;
693 return link.objidx;
694 }
695
696 return -1;
697 }
698
699 // Finds the object id of the object pointed to by the offset at 'offset'
700 // within object[node_idx]. Ensures that the returned object is safe to mutate.
701 // That is, if the original child object is shared by parents other than node_idx
702 // it will be duplicated and the duplicate will be returned instead.
703 unsigned mutable_index_for_offset (unsigned node_idx, const void* offset)
704 {
705 unsigned child_idx = index_for_offset (node_idx, offset);
706 auto& child = vertices_[child_idx];
707 for (unsigned p : child.parents_iter ())
708 {
709 if (p != node_idx) {
710 return duplicate (node_idx, child_idx);
711 }
712 }
713
714 return child_idx;
715 }
716
717
718 /*
719 * Assign unique space numbers to each connected subgraph of 24 bit and/or 32 bit offset(s).
720 * Currently, this is implemented specifically tailored to the structure of a GPOS/GSUB
721 * (including with 24bit offsets) table.
722 */
723 bool assign_spaces ()
724 {
725 update_parents ();
726
727 hb_set_t visited;
728 hb_set_t roots;
729 find_space_roots (visited, roots);
730
731 // Mark everything not in the subgraphs of the roots as visited. This prevents
732 // subgraphs from being connected via nodes not in those subgraphs.
733 visited.invert ();
734
735 if (!roots) return false;
736
737 while (roots)
738 {
739 uint32_t next = HB_SET_VALUE_INVALID;
740 if (unlikely (!check_success (!roots.in_error ()))) break;
741 if (!roots.next (&next)) break;
742
743 hb_set_t connected_roots;
744 find_connected_nodes (next, roots, visited, connected_roots);
745 if (unlikely (!check_success (!connected_roots.in_error ()))) break;
746
747 isolate_subgraph (connected_roots);
748 if (unlikely (!check_success (!connected_roots.in_error ()))) break;
749
750 unsigned next_space = this->next_space ();
751 num_roots_for_space_.push (0);
752 for (unsigned root : connected_roots)
753 {
754 DEBUG_MSG (SUBSET_REPACK, nullptr, "Subgraph %u gets space %u", root, next_space);
755 vertices_[root].space = next_space;
756 num_roots_for_space_[next_space] = num_roots_for_space_[next_space] + 1;
757 distance_invalid = true;
758 positions_invalid = true;
759 }
760
761 // TODO(grieger): special case for GSUB/GPOS use extension promotions to move 16 bit space
762 // into the 32 bit space as needed, instead of using isolation.
763 }
764
765
766
767 return true;
768 }
769
770 /*
771 * Isolates the subgraph of nodes reachable from root. Any links to nodes in the subgraph
772 * that originate from outside of the subgraph will be removed by duplicating the linked to
773 * object.
774 *
775 * Indices stored in roots will be updated if any of the roots are duplicated to new indices.
776 */
777 bool isolate_subgraph (hb_set_t& roots)
778 {
779 update_parents ();
780 hb_map_t subgraph;
781
782 // incoming edges to root_idx should be all 32 bit in length so we don't need to de-dup these
783 // set the subgraph incoming edge count to match all of root_idx's incoming edges
784 hb_set_t parents;
785 for (unsigned root_idx : roots)
786 {
787 subgraph.set (root_idx, wide_parents (root_idx, parents));
788 find_subgraph (root_idx, subgraph);
789 }
790 if (subgraph.in_error ())
791 return false;
792
793 unsigned original_root_idx = root_idx ();
794 hb_map_t index_map;
795 bool made_changes = false;
796 for (auto entry : subgraph.iter ())
797 {
798 assert (entry.first < vertices_.length);
799 const auto& node = vertices_[entry.first];
800 unsigned subgraph_incoming_edges = entry.second;
801
802 if (subgraph_incoming_edges < node.incoming_edges ())
803 {
804 // Only de-dup objects with incoming links from outside the subgraph.
805 made_changes = true;
806 duplicate_subgraph (entry.first, index_map);
807 }
808 }
809
810 if (in_error ())
811 return false;
812
813 if (!made_changes)
814 return false;
815
816 if (original_root_idx != root_idx ()
817 && parents.has (original_root_idx))
818 {
819 // If the root idx has changed since parents was determined, update root idx in parents
820 parents.add (root_idx ());
821 parents.del (original_root_idx);
822 }
823
824 auto new_subgraph =
825 + subgraph.keys ()
826 | hb_map([&] (uint32_t node_idx) {
827 const uint32_t *v;
828 if (index_map.has (node_idx, &v)) return *v;
829 return node_idx;
830 })
831 ;
832
833 remap_obj_indices (index_map, new_subgraph);
834 remap_obj_indices (index_map, parents.iter (), true);
835
836 // Update roots set with new indices as needed.
837 for (auto next : roots)
838 {
839 const uint32_t *v;
840 if (index_map.has (next, &v))
841 {
842 roots.del (next);
843 roots.add (*v);
844 }
845 }
846
847 return true;
848 }
849
850 void find_subgraph (unsigned node_idx, hb_map_t& subgraph)
851 {
852 for (const auto& link : vertices_[node_idx].obj.all_links ())
853 {
854 hb_codepoint_t *v;
855 if (subgraph.has (link.objidx, &v))
856 {
857 (*v)++;
858 continue;
859 }
860 subgraph.set (link.objidx, 1);
861 find_subgraph (link.objidx, subgraph);
862 }
863 }
864
865 void find_subgraph (unsigned node_idx, hb_set_t& subgraph)
866 {
867 if (subgraph.has (node_idx)) return;
868 subgraph.add (node_idx);
869 for (const auto& link : vertices_[node_idx].obj.all_links ())
870 find_subgraph (link.objidx, subgraph);
871 }
872
873 size_t find_subgraph_size (unsigned node_idx, hb_set_t& subgraph, unsigned max_depth = -1)
874 {
875 if (subgraph.has (node_idx)) return 0;
876 subgraph.add (node_idx);
877
878 const auto& o = vertices_[node_idx].obj;
879 size_t size = o.tail - o.head;
880 if (max_depth == 0)
881 return size;
882
883 for (const auto& link : o.all_links ())
884 size += find_subgraph_size (link.objidx, subgraph, max_depth - 1);
885 return size;
886 }
887
888 /*
889 * Finds the topmost children of 32bit offsets in the subgraph starting
890 * at node_idx. Found indices are placed into 'found'.
891 */
892 void find_32bit_roots (unsigned node_idx, hb_set_t& found)
893 {
894 for (const auto& link : vertices_[node_idx].obj.all_links ())
895 {
896 if (!link.is_signed && link.width == 4) {
897 found.add (link.objidx);
898 continue;
899 }
900 find_32bit_roots (link.objidx, found);
901 }
902 }
903
904 /*
905 * Moves the child of old_parent_idx pointed to by old_offset to a new
906 * vertex at the new_offset.
907 */
908 template<typename O>
909 void move_child (unsigned old_parent_idx,
910 const O* old_offset,
911 unsigned new_parent_idx,
912 const O* new_offset)
913 {
914 distance_invalid = true;
915 positions_invalid = true;
916
917 auto& old_v = vertices_[old_parent_idx];
918 auto& new_v = vertices_[new_parent_idx];
919
920 unsigned child_id = index_for_offset (old_parent_idx,
921 old_offset);
922
923 auto* new_link = new_v.obj.real_links.push ();
924 new_link->width = O::static_size;
925 new_link->objidx = child_id;
926 new_link->position = (const char*) new_offset - (const char*) new_v.obj.head;
927
928 auto& child = vertices_[child_id];
929 child.add_parent (new_parent_idx);
930
931 old_v.remove_real_link (child_id, old_offset);
932 child.remove_parent (old_parent_idx);
933 }
934
935 /*
936 * duplicates all nodes in the subgraph reachable from node_idx. Does not re-assign
937 * links. index_map is updated with mappings from old id to new id. If a duplication has already
938 * been performed for a given index, then it will be skipped.
939 */
940 void duplicate_subgraph (unsigned node_idx, hb_map_t& index_map)
941 {
942 if (index_map.has (node_idx))
943 return;
944
945 unsigned clone_idx = duplicate (node_idx);
946 if (!check_success (clone_idx != (unsigned) -1))
947 return;
948
949 index_map.set (node_idx, clone_idx);
950 for (const auto& l : object (node_idx).all_links ()) {
951 duplicate_subgraph (l.objidx, index_map);
952 }
953 }
954
955 /*
956 * Creates a copy of node_idx and returns it's new index.
957 */
958 unsigned duplicate (unsigned node_idx)
959 {
960 positions_invalid = true;
961 distance_invalid = true;
962
963 auto* clone = vertices_.push ();
964 auto& child = vertices_[node_idx];
965 if (vertices_.in_error ()) {
966 return -1;
967 }
968
969 clone->obj.head = child.obj.head;
970 clone->obj.tail = child.obj.tail;
971 clone->distance = child.distance;
972 clone->space = child.space;
973 clone->reset_parents ();
974
975 unsigned clone_idx = vertices_.length - 2;
976 for (const auto& l : child.obj.real_links)
977 {
978 clone->obj.real_links.push (l);
979 vertices_[l.objidx].add_parent (clone_idx);
980 }
981 for (const auto& l : child.obj.virtual_links)
982 {
983 clone->obj.virtual_links.push (l);
984 vertices_[l.objidx].add_parent (clone_idx);
985 }
986
987 check_success (!clone->obj.real_links.in_error ());
988 check_success (!clone->obj.virtual_links.in_error ());
989
990 // The last object is the root of the graph, so swap back the root to the end.
991 // The root's obj idx does change, however since it's root nothing else refers to it.
992 // all other obj idx's will be unaffected.
993 hb_swap (vertices_[vertices_.length - 2], *clone);
994
995 // Since the root moved, update the parents arrays of all children on the root.
996 for (const auto& l : root ().obj.all_links ())
997 vertices_[l.objidx].remap_parent (root_idx () - 1, root_idx ());
998
999 return clone_idx;
1000 }
1001
1002 /*
1003 * Creates a copy of child and re-assigns the link from
1004 * parent to the clone. The copy is a shallow copy, objects
1005 * linked from child are not duplicated.
1006 */
1007 unsigned duplicate_if_shared (unsigned parent_idx, unsigned child_idx)
1008 {
1009 unsigned new_idx = duplicate (parent_idx, child_idx);
1010 if (new_idx == (unsigned) -1) return child_idx;
1011 return new_idx;
1012 }
1013
1014
1015 /*
1016 * Creates a copy of child and re-assigns the link from
1017 * parent to the clone. The copy is a shallow copy, objects
1018 * linked from child are not duplicated.
1019 */
1020 unsigned duplicate (unsigned parent_idx, unsigned child_idx)
1021 {
1022 update_parents ();
1023
1024 unsigned links_to_child = 0;
1025 for (const auto& l : vertices_[parent_idx].obj.all_links ())
1026 {
1027 if (l.objidx == child_idx) links_to_child++;
1028 }
1029
1030 if (vertices_[child_idx].incoming_edges () <= links_to_child)
1031 {
1032 // Can't duplicate this node, doing so would orphan the original one as all remaining links
1033 // to child are from parent.
1034 DEBUG_MSG (SUBSET_REPACK, nullptr, " Not duplicating %u => %u",
1035 parent_idx, child_idx);
1036 return -1;
1037 }
1038
1039 DEBUG_MSG (SUBSET_REPACK, nullptr, " Duplicating %u => %u",
1040 parent_idx, child_idx);
1041
1042 unsigned clone_idx = duplicate (child_idx);
1043 if (clone_idx == (unsigned) -1) return false;
1044 // duplicate shifts the root node idx, so if parent_idx was root update it.
1045 if (parent_idx == clone_idx) parent_idx++;
1046
1047 auto& parent = vertices_[parent_idx];
1048 for (auto& l : parent.obj.all_links_writer ())
1049 {
1050 if (l.objidx != child_idx)
1051 continue;
1052
1053 reassign_link (l, parent_idx, clone_idx);
1054 }
1055
1056 return clone_idx;
1057 }
1058
1059
1060 /*
1061 * Adds a new node to the graph, not connected to anything.
1062 */
1063 unsigned new_node (char* head, char* tail)
1064 {
1065 positions_invalid = true;
1066 distance_invalid = true;
1067
1068 auto* clone = vertices_.push ();
1069 if (vertices_.in_error ()) {
1070 return -1;
1071 }
1072
1073 clone->obj.head = head;
1074 clone->obj.tail = tail;
1075 clone->distance = 0;
1076 clone->space = 0;
1077
1078 unsigned clone_idx = vertices_.length - 2;
1079
1080 // The last object is the root of the graph, so swap back the root to the end.
1081 // The root's obj idx does change, however since it's root nothing else refers to it.
1082 // all other obj idx's will be unaffected.
1083 hb_swap (vertices_[vertices_.length - 2], *clone);
1084
1085 // Since the root moved, update the parents arrays of all children on the root.
1086 for (const auto& l : root ().obj.all_links ())
1087 vertices_[l.objidx].remap_parent (root_idx () - 1, root_idx ());
1088
1089 return clone_idx;
1090 }
1091
1092 /*
1093 * Raises the sorting priority of all children.
1094 */
1095 bool raise_childrens_priority (unsigned parent_idx)
1096 {
1097 DEBUG_MSG (SUBSET_REPACK, nullptr, " Raising priority of all children of %u",
1098 parent_idx);
1099 // This operation doesn't change ordering until a sort is run, so no need
1100 // to invalidate positions. It does not change graph structure so no need
1101 // to update distances or edge counts.
1102 auto& parent = vertices_[parent_idx].obj;
1103 bool made_change = false;
1104 for (auto& l : parent.all_links_writer ())
1105 made_change |= vertices_[l.objidx].raise_priority ();
1106 return made_change;
1107 }
1108
1109 bool is_fully_connected ()
1110 {
1111 update_parents();
1112
1113 if (root().incoming_edges ())
1114 // Root cannot have parents.
1115 return false;
1116
1117 for (unsigned i = 0; i < root_idx (); i++)
1118 {
1119 if (!vertices_[i].incoming_edges ())
1120 return false;
1121 }
1122 return true;
1123 }
1124
1125#if 0
1126 /*
1127 * Saves the current graph to a packed binary format which the repacker fuzzer takes
1128 * as a seed.
1129 */
1130 void save_fuzzer_seed (hb_tag_t tag) const
1131 {
1132 FILE* f = fopen ("./repacker_fuzzer_seed", "w");
1133 fwrite ((void*) &tag, sizeof (tag), 1, f);
1134
1135 uint16_t num_objects = vertices_.length;
1136 fwrite ((void*) &num_objects, sizeof (num_objects), 1, f);
1137
1138 for (const auto& v : vertices_)
1139 {
1140 uint16_t blob_size = v.table_size ();
1141 fwrite ((void*) &blob_size, sizeof (blob_size), 1, f);
1142 fwrite ((const void*) v.obj.head, blob_size, 1, f);
1143 }
1144
1145 uint16_t link_count = 0;
1146 for (const auto& v : vertices_)
1147 link_count += v.obj.real_links.length;
1148
1149 fwrite ((void*) &link_count, sizeof (link_count), 1, f);
1150
1151 typedef struct
1152 {
1153 uint16_t parent;
1154 uint16_t child;
1155 uint16_t position;
1156 uint8_t width;
1157 } link_t;
1158
1159 for (unsigned i = 0; i < vertices_.length; i++)
1160 {
1161 for (const auto& l : vertices_[i].obj.real_links)
1162 {
1163 link_t link {
1164 (uint16_t) i, (uint16_t) l.objidx,
1165 (uint16_t) l.position, (uint8_t) l.width
1166 };
1167 fwrite ((void*) &link, sizeof (link), 1, f);
1168 }
1169 }
1170
1171 fclose (f);
1172 }
1173#endif
1174
1175 void print_orphaned_nodes ()
1176 {
1177 if (!DEBUG_ENABLED(SUBSET_REPACK)) return;
1178
1179 DEBUG_MSG (SUBSET_REPACK, nullptr, "Graph is not fully connected.");
1180 parents_invalid = true;
1181 update_parents();
1182
1183 if (root().incoming_edges ()) {
1184 DEBUG_MSG (SUBSET_REPACK, nullptr, "Root node has incoming edges.");
1185 }
1186
1187 for (unsigned i = 0; i < root_idx (); i++)
1188 {
1189 const auto& v = vertices_[i];
1190 if (!v.incoming_edges ())
1191 DEBUG_MSG (SUBSET_REPACK, nullptr, "Node %u is orphaned.", i);
1192 }
1193 }
1194
1195 unsigned num_roots_for_space (unsigned space) const
1196 {
1197 return num_roots_for_space_[space];
1198 }
1199
1200 unsigned next_space () const
1201 {
1202 return num_roots_for_space_.length;
1203 }
1204
1205 void move_to_new_space (const hb_set_t& indices)
1206 {
1207 num_roots_for_space_.push (0);
1208 unsigned new_space = num_roots_for_space_.length - 1;
1209
1210 for (unsigned index : indices) {
1211 auto& node = vertices_[index];
1212 num_roots_for_space_[node.space] = num_roots_for_space_[node.space] - 1;
1213 num_roots_for_space_[new_space] = num_roots_for_space_[new_space] + 1;
1214 node.space = new_space;
1215 distance_invalid = true;
1216 positions_invalid = true;
1217 }
1218 }
1219
1220 unsigned space_for (unsigned index, unsigned* root = nullptr) const
1221 {
1222 loop:
1223 assert (index < vertices_.length);
1224 const auto& node = vertices_[index];
1225 if (node.space)
1226 {
1227 if (root != nullptr)
1228 *root = index;
1229 return node.space;
1230 }
1231
1232 if (!node.incoming_edges ())
1233 {
1234 if (root)
1235 *root = index;
1236 return 0;
1237 }
1238
1239 index = *node.parents_iter ();
1240 goto loop;
1241 }
1242
1243 void err_other_error () { this->successful = false; }
1244
1245 size_t total_size_in_bytes () const {
1246 size_t total_size = 0;
1247 unsigned count = vertices_.length;
1248 for (unsigned i = 0; i < count; i++) {
1249 size_t size = vertices_.arrayZ[i].obj.tail - vertices_.arrayZ[i].obj.head;
1250 total_size += size;
1251 }
1252 return total_size;
1253 }
1254
1255
1256 private:
1257
1258 /*
1259 * Returns the numbers of incoming edges that are 24 or 32 bits wide.
1260 */
1261 unsigned wide_parents (unsigned node_idx, hb_set_t& parents) const
1262 {
1263 unsigned count = 0;
1264 for (unsigned p : vertices_[node_idx].parents_iter ())
1265 {
1266 // Only real links can be wide
1267 for (const auto& l : vertices_[p].obj.real_links)
1268 {
1269 if (l.objidx == node_idx
1270 && (l.width == 3 || l.width == 4)
1271 && !l.is_signed)
1272 {
1273 count++;
1274 parents.add (p);
1275 }
1276 }
1277 }
1278 return count;
1279 }
1280
1281 bool check_success (bool success)
1282 { return this->successful && (success || ((void) err_other_error (), false)); }
1283
1284 public:
1285 /*
1286 * Creates a map from objid to # of incoming edges.
1287 */
1288 void update_parents ()
1289 {
1290 if (!parents_invalid) return;
1291
1292 unsigned count = vertices_.length;
1293
1294 for (unsigned i = 0; i < count; i++)
1295 vertices_.arrayZ[i].reset_parents ();
1296
1297 for (unsigned p = 0; p < count; p++)
1298 {
1299 for (auto& l : vertices_.arrayZ[p].obj.all_links ())
1300 vertices_[l.objidx].add_parent (p);
1301 }
1302
1303 for (unsigned i = 0; i < count; i++)
1304 // parents arrays must be accurate or downstream operations like cycle detection
1305 // and sorting won't work correctly.
1306 check_success (!vertices_.arrayZ[i].in_error ());
1307
1308 parents_invalid = false;
1309 }
1310
1311 /*
1312 * compute the serialized start and end positions for each vertex.
1313 */
1314 void update_positions ()
1315 {
1316 if (!positions_invalid) return;
1317
1318 unsigned current_pos = 0;
1319 for (int i = root_idx (); i >= 0; i--)
1320 {
1321 auto& v = vertices_[i];
1322 v.start = current_pos;
1323 current_pos += v.obj.tail - v.obj.head;
1324 v.end = current_pos;
1325 }
1326
1327 positions_invalid = false;
1328 }
1329
1330 /*
1331 * Finds the distance to each object in the graph
1332 * from the initial node.
1333 */
1334 void update_distances ()
1335 {
1336 if (!distance_invalid) return;
1337
1338 // Uses Dijkstra's algorithm to find all of the shortest distances.
1339 // https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
1340 //
1341 // Implementation Note:
1342 // Since our priority queue doesn't support fast priority decreases
1343 // we instead just add new entries into the queue when a priority changes.
1344 // Redundant ones are filtered out later on by the visited set.
1345 // According to https://www3.cs.stonybrook.edu/~rezaul/papers/TR-07-54.pdf
1346 // for practical performance this is faster then using a more advanced queue
1347 // (such as a fibonacci queue) with a fast decrease priority.
1348 unsigned count = vertices_.length;
1349 for (unsigned i = 0; i < count; i++)
1350 vertices_.arrayZ[i].distance = hb_int_max (int64_t);
1351 vertices_.tail ().distance = 0;
1352
1353 hb_priority_queue_t queue;
1354 queue.insert (0, vertices_.length - 1);
1355
1356 hb_vector_t<bool> visited;
1357 visited.resize (vertices_.length);
1358
1359 while (!queue.in_error () && !queue.is_empty ())
1360 {
1361 unsigned next_idx = queue.pop_minimum ().second;
1362 if (visited[next_idx]) continue;
1363 const auto& next = vertices_[next_idx];
1364 int64_t next_distance = vertices_[next_idx].distance;
1365 visited[next_idx] = true;
1366
1367 for (const auto& link : next.obj.all_links ())
1368 {
1369 if (visited[link.objidx]) continue;
1370
1371 const auto& child = vertices_.arrayZ[link.objidx].obj;
1372 unsigned link_width = link.width ? link.width : 4; // treat virtual offsets as 32 bits wide
1373 int64_t child_weight = (child.tail - child.head) +
1374 ((int64_t) 1 << (link_width * 8)) * (vertices_.arrayZ[link.objidx].space + 1);
1375 int64_t child_distance = next_distance + child_weight;
1376
1377 if (child_distance < vertices_.arrayZ[link.objidx].distance)
1378 {
1379 vertices_.arrayZ[link.objidx].distance = child_distance;
1380 queue.insert (child_distance, link.objidx);
1381 }
1382 }
1383 }
1384
1385 check_success (!queue.in_error ());
1386 if (!check_success (queue.is_empty ()))
1387 {
1388 print_orphaned_nodes ();
1389 return;
1390 }
1391
1392 distance_invalid = false;
1393 }
1394
1395 private:
1396 /*
1397 * Updates a link in the graph to point to a different object. Corrects the
1398 * parents vector on the previous and new child nodes.
1399 */
1400 void reassign_link (hb_serialize_context_t::object_t::link_t& link,
1401 unsigned parent_idx,
1402 unsigned new_idx)
1403 {
1404 unsigned old_idx = link.objidx;
1405 link.objidx = new_idx;
1406 vertices_[old_idx].remove_parent (parent_idx);
1407 vertices_[new_idx].add_parent (parent_idx);
1408 }
1409
1410 /*
1411 * Updates all objidx's in all links using the provided mapping. Corrects incoming edge counts.
1412 */
1413 template<typename Iterator, hb_requires (hb_is_iterator (Iterator))>
1414 void remap_obj_indices (const hb_map_t& id_map,
1415 Iterator subgraph,
1416 bool only_wide = false)
1417 {
1418 if (!id_map) return;
1419 for (unsigned i : subgraph)
1420 {
1421 for (auto& link : vertices_[i].obj.all_links_writer ())
1422 {
1423 const uint32_t *v;
1424 if (!id_map.has (link.objidx, &v)) continue;
1425 if (only_wide && !(link.width == 4 && !link.is_signed)) continue;
1426
1427 reassign_link (link, i, *v);
1428 }
1429 }
1430 }
1431
1432 /*
1433 * Updates all objidx's in all links using the provided mapping.
1434 */
1435 bool remap_all_obj_indices (const hb_vector_t<unsigned>& id_map,
1436 hb_vector_t<vertex_t>* sorted_graph) const
1437 {
1438 unsigned count = sorted_graph->length;
1439 for (unsigned i = 0; i < count; i++)
1440 {
1441 if (!(*sorted_graph)[i].remap_parents (id_map))
1442 return false;
1443 for (auto& link : sorted_graph->arrayZ[i].obj.all_links_writer ())
1444 {
1445 link.objidx = id_map[link.objidx];
1446 }
1447 }
1448 return true;
1449 }
1450
1451 /*
1452 * Finds all nodes in targets that are reachable from start_idx, nodes in visited will be skipped.
1453 * For this search the graph is treated as being undirected.
1454 *
1455 * Connected targets will be added to connected and removed from targets. All visited nodes
1456 * will be added to visited.
1457 */
1458 void find_connected_nodes (unsigned start_idx,
1459 hb_set_t& targets,
1460 hb_set_t& visited,
1461 hb_set_t& connected)
1462 {
1463 if (unlikely (!check_success (!visited.in_error ()))) return;
1464 if (visited.has (start_idx)) return;
1465 visited.add (start_idx);
1466
1467 if (targets.has (start_idx))
1468 {
1469 targets.del (start_idx);
1470 connected.add (start_idx);
1471 }
1472
1473 const auto& v = vertices_[start_idx];
1474
1475 // Graph is treated as undirected so search children and parents of start_idx
1476 for (const auto& l : v.obj.all_links ())
1477 find_connected_nodes (l.objidx, targets, visited, connected);
1478
1479 for (unsigned p : v.parents_iter ())
1480 find_connected_nodes (p, targets, visited, connected);
1481 }
1482
1483 public:
1484 // TODO(garretrieger): make private, will need to move most of offset overflow code into graph.
1485 hb_vector_t<vertex_t> vertices_;
1486 hb_vector_t<vertex_t> vertices_scratch_;
1487 private:
1488 bool parents_invalid;
1489 bool distance_invalid;
1490 bool positions_invalid;
1491 bool successful;
1492 hb_vector_t<unsigned> num_roots_for_space_;
1493 hb_vector_t<char*> buffers;
1494};
1495
1496}
1497
1498#endif // GRAPH_GRAPH_HH
1499