1// Copyright 2017 The Abseil Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// GraphCycles provides incremental cycle detection on a dynamic
16// graph using the following algorithm:
17//
18// A dynamic topological sort algorithm for directed acyclic graphs
19// David J. Pearce, Paul H. J. Kelly
20// Journal of Experimental Algorithmics (JEA) JEA Homepage archive
21// Volume 11, 2006, Article No. 1.7
22//
23// Brief summary of the algorithm:
24//
25// (1) Maintain a rank for each node that is consistent
26// with the topological sort of the graph. I.e., path from x to y
27// implies rank[x] < rank[y].
28// (2) When a new edge (x->y) is inserted, do nothing if rank[x] < rank[y].
29// (3) Otherwise: adjust ranks in the neighborhood of x and y.
30
31#include "absl/base/attributes.h"
32// This file is a no-op if the required LowLevelAlloc support is missing.
33#include "absl/base/internal/low_level_alloc.h"
34#ifndef ABSL_LOW_LEVEL_ALLOC_MISSING
35
36#include "absl/synchronization/internal/graphcycles.h"
37
38#include <algorithm>
39#include <array>
40#include "absl/base/internal/hide_ptr.h"
41#include "absl/base/internal/raw_logging.h"
42#include "absl/base/internal/spinlock.h"
43
44// Do not use STL. This module does not use standard memory allocation.
45
46namespace absl {
47namespace synchronization_internal {
48
49namespace {
50
51// Avoid LowLevelAlloc's default arena since it calls malloc hooks in
52// which people are doing things like acquiring Mutexes.
53static absl::base_internal::SpinLock arena_mu(
54 absl::base_internal::kLinkerInitialized);
55static base_internal::LowLevelAlloc::Arena* arena;
56
57static void InitArenaIfNecessary() {
58 arena_mu.Lock();
59 if (arena == nullptr) {
60 arena = base_internal::LowLevelAlloc::NewArena(0);
61 }
62 arena_mu.Unlock();
63}
64
65// Number of inlined elements in Vec. Hash table implementation
66// relies on this being a power of two.
67static const uint32_t kInline = 8;
68
69// A simple LowLevelAlloc based resizable vector with inlined storage
70// for a few elements. T must be a plain type since constructor
71// and destructor are not run on elements of type T managed by Vec.
72template <typename T>
73class Vec {
74 public:
75 Vec() { Init(); }
76 ~Vec() { Discard(); }
77
78 void clear() {
79 Discard();
80 Init();
81 }
82
83 bool empty() const { return size_ == 0; }
84 uint32_t size() const { return size_; }
85 T* begin() { return ptr_; }
86 T* end() { return ptr_ + size_; }
87 const T& operator[](uint32_t i) const { return ptr_[i]; }
88 T& operator[](uint32_t i) { return ptr_[i]; }
89 const T& back() const { return ptr_[size_-1]; }
90 void pop_back() { size_--; }
91
92 void push_back(const T& v) {
93 if (size_ == capacity_) Grow(size_ + 1);
94 ptr_[size_] = v;
95 size_++;
96 }
97
98 void resize(uint32_t n) {
99 if (n > capacity_) Grow(n);
100 size_ = n;
101 }
102
103 void fill(const T& val) {
104 for (uint32_t i = 0; i < size(); i++) {
105 ptr_[i] = val;
106 }
107 }
108
109 // Guarantees src is empty at end.
110 // Provided for the hash table resizing code below.
111 void MoveFrom(Vec<T>* src) {
112 if (src->ptr_ == src->space_) {
113 // Need to actually copy
114 resize(src->size_);
115 std::copy(src->ptr_, src->ptr_ + src->size_, ptr_);
116 src->size_ = 0;
117 } else {
118 Discard();
119 ptr_ = src->ptr_;
120 size_ = src->size_;
121 capacity_ = src->capacity_;
122 src->Init();
123 }
124 }
125
126 private:
127 T* ptr_;
128 T space_[kInline];
129 uint32_t size_;
130 uint32_t capacity_;
131
132 void Init() {
133 ptr_ = space_;
134 size_ = 0;
135 capacity_ = kInline;
136 }
137
138 void Discard() {
139 if (ptr_ != space_) base_internal::LowLevelAlloc::Free(ptr_);
140 }
141
142 void Grow(uint32_t n) {
143 while (capacity_ < n) {
144 capacity_ *= 2;
145 }
146 size_t request = static_cast<size_t>(capacity_) * sizeof(T);
147 T* copy = static_cast<T*>(
148 base_internal::LowLevelAlloc::AllocWithArena(request, arena));
149 std::copy(ptr_, ptr_ + size_, copy);
150 Discard();
151 ptr_ = copy;
152 }
153
154 Vec(const Vec&) = delete;
155 Vec& operator=(const Vec&) = delete;
156};
157
158// A hash set of non-negative int32_t that uses Vec for its underlying storage.
159class NodeSet {
160 public:
161 NodeSet() { Init(); }
162
163 void clear() { Init(); }
164 bool contains(int32_t v) const { return table_[FindIndex(v)] == v; }
165
166 bool insert(int32_t v) {
167 uint32_t i = FindIndex(v);
168 if (table_[i] == v) {
169 return false;
170 }
171 if (table_[i] == kEmpty) {
172 // Only inserting over an empty cell increases the number of occupied
173 // slots.
174 occupied_++;
175 }
176 table_[i] = v;
177 // Double when 75% full.
178 if (occupied_ >= table_.size() - table_.size()/4) Grow();
179 return true;
180 }
181
182 void erase(uint32_t v) {
183 uint32_t i = FindIndex(v);
184 if (static_cast<uint32_t>(table_[i]) == v) {
185 table_[i] = kDel;
186 }
187 }
188
189 // Iteration: is done via HASH_FOR_EACH
190 // Example:
191 // HASH_FOR_EACH(elem, node->out) { ... }
192#define HASH_FOR_EACH(elem, eset) \
193 for (int32_t elem, _cursor = 0; (eset).Next(&_cursor, &elem); )
194 bool Next(int32_t* cursor, int32_t* elem) {
195 while (static_cast<uint32_t>(*cursor) < table_.size()) {
196 int32_t v = table_[*cursor];
197 (*cursor)++;
198 if (v >= 0) {
199 *elem = v;
200 return true;
201 }
202 }
203 return false;
204 }
205
206 private:
207 enum : int32_t { kEmpty = -1, kDel = -2 };
208 Vec<int32_t> table_;
209 uint32_t occupied_; // Count of non-empty slots (includes deleted slots)
210
211 static uint32_t Hash(uint32_t a) { return a * 41; }
212
213 // Return index for storing v. May return an empty index or deleted index
214 int FindIndex(int32_t v) const {
215 // Search starting at hash index.
216 const uint32_t mask = table_.size() - 1;
217 uint32_t i = Hash(v) & mask;
218 int deleted_index = -1; // If >= 0, index of first deleted element we see
219 while (true) {
220 int32_t e = table_[i];
221 if (v == e) {
222 return i;
223 } else if (e == kEmpty) {
224 // Return any previously encountered deleted slot.
225 return (deleted_index >= 0) ? deleted_index : i;
226 } else if (e == kDel && deleted_index < 0) {
227 // Keep searching since v might be present later.
228 deleted_index = i;
229 }
230 i = (i + 1) & mask; // Linear probing; quadratic is slightly slower.
231 }
232 }
233
234 void Init() {
235 table_.clear();
236 table_.resize(kInline);
237 table_.fill(kEmpty);
238 occupied_ = 0;
239 }
240
241 void Grow() {
242 Vec<int32_t> copy;
243 copy.MoveFrom(&table_);
244 occupied_ = 0;
245 table_.resize(copy.size() * 2);
246 table_.fill(kEmpty);
247
248 for (const auto& e : copy) {
249 if (e >= 0) insert(e);
250 }
251 }
252
253 NodeSet(const NodeSet&) = delete;
254 NodeSet& operator=(const NodeSet&) = delete;
255};
256
257// We encode a node index and a node version in GraphId. The version
258// number is incremented when the GraphId is freed which automatically
259// invalidates all copies of the GraphId.
260
261inline GraphId MakeId(int32_t index, uint32_t version) {
262 GraphId g;
263 g.handle =
264 (static_cast<uint64_t>(version) << 32) | static_cast<uint32_t>(index);
265 return g;
266}
267
268inline int32_t NodeIndex(GraphId id) {
269 return static_cast<uint32_t>(id.handle & 0xfffffffful);
270}
271
272inline uint32_t NodeVersion(GraphId id) {
273 return static_cast<uint32_t>(id.handle >> 32);
274}
275
276struct Node {
277 int32_t rank; // rank number assigned by Pearce-Kelly algorithm
278 uint32_t version; // Current version number
279 int32_t next_hash; // Next entry in hash table
280 bool visited; // Temporary marker used by depth-first-search
281 uintptr_t masked_ptr; // User-supplied pointer
282 NodeSet in; // List of immediate predecessor nodes in graph
283 NodeSet out; // List of immediate successor nodes in graph
284 int priority; // Priority of recorded stack trace.
285 int nstack; // Depth of recorded stack trace.
286 void* stack[40]; // stack[0,nstack-1] holds stack trace for node.
287};
288
289// Hash table for pointer to node index lookups.
290class PointerMap {
291 public:
292 explicit PointerMap(const Vec<Node*>* nodes) : nodes_(nodes) {
293 table_.fill(-1);
294 }
295
296 int32_t Find(void* ptr) {
297 auto masked = base_internal::HidePtr(ptr);
298 for (int32_t i = table_[Hash(ptr)]; i != -1;) {
299 Node* n = (*nodes_)[i];
300 if (n->masked_ptr == masked) return i;
301 i = n->next_hash;
302 }
303 return -1;
304 }
305
306 void Add(void* ptr, int32_t i) {
307 int32_t* head = &table_[Hash(ptr)];
308 (*nodes_)[i]->next_hash = *head;
309 *head = i;
310 }
311
312 int32_t Remove(void* ptr) {
313 // Advance through linked list while keeping track of the
314 // predecessor slot that points to the current entry.
315 auto masked = base_internal::HidePtr(ptr);
316 for (int32_t* slot = &table_[Hash(ptr)]; *slot != -1; ) {
317 int32_t index = *slot;
318 Node* n = (*nodes_)[index];
319 if (n->masked_ptr == masked) {
320 *slot = n->next_hash; // Remove n from linked list
321 n->next_hash = -1;
322 return index;
323 }
324 slot = &n->next_hash;
325 }
326 return -1;
327 }
328
329 private:
330 // Number of buckets in hash table for pointer lookups.
331 static constexpr uint32_t kHashTableSize = 8171; // should be prime
332
333 const Vec<Node*>* nodes_;
334 std::array<int32_t, kHashTableSize> table_;
335
336 static uint32_t Hash(void* ptr) {
337 return reinterpret_cast<uintptr_t>(ptr) % kHashTableSize;
338 }
339};
340
341} // namespace
342
343struct GraphCycles::Rep {
344 Vec<Node*> nodes_;
345 Vec<int32_t> free_nodes_; // Indices for unused entries in nodes_
346 PointerMap ptrmap_;
347
348 // Temporary state.
349 Vec<int32_t> deltaf_; // Results of forward DFS
350 Vec<int32_t> deltab_; // Results of backward DFS
351 Vec<int32_t> list_; // All nodes to reprocess
352 Vec<int32_t> merged_; // Rank values to assign to list_ entries
353 Vec<int32_t> stack_; // Emulates recursion stack for depth-first searches
354
355 Rep() : ptrmap_(&nodes_) {}
356};
357
358static Node* FindNode(GraphCycles::Rep* rep, GraphId id) {
359 Node* n = rep->nodes_[NodeIndex(id)];
360 return (n->version == NodeVersion(id)) ? n : nullptr;
361}
362
363GraphCycles::GraphCycles() {
364 InitArenaIfNecessary();
365 rep_ = new (base_internal::LowLevelAlloc::AllocWithArena(sizeof(Rep), arena))
366 Rep;
367}
368
369GraphCycles::~GraphCycles() {
370 for (auto* node : rep_->nodes_) {
371 node->Node::~Node();
372 base_internal::LowLevelAlloc::Free(node);
373 }
374 rep_->Rep::~Rep();
375 base_internal::LowLevelAlloc::Free(rep_);
376}
377
378bool GraphCycles::CheckInvariants() const {
379 Rep* r = rep_;
380 NodeSet ranks; // Set of ranks seen so far.
381 for (uint32_t x = 0; x < r->nodes_.size(); x++) {
382 Node* nx = r->nodes_[x];
383 void* ptr = base_internal::UnhidePtr<void>(nx->masked_ptr);
384 if (ptr != nullptr && static_cast<uint32_t>(r->ptrmap_.Find(ptr)) != x) {
385 ABSL_RAW_LOG(FATAL, "Did not find live node in hash table %u %p", x, ptr);
386 }
387 if (nx->visited) {
388 ABSL_RAW_LOG(FATAL, "Did not clear visited marker on node %u", x);
389 }
390 if (!ranks.insert(nx->rank)) {
391 ABSL_RAW_LOG(FATAL, "Duplicate occurrence of rank %d", nx->rank);
392 }
393 HASH_FOR_EACH(y, nx->out) {
394 Node* ny = r->nodes_[y];
395 if (nx->rank >= ny->rank) {
396 ABSL_RAW_LOG(FATAL, "Edge %u->%d has bad rank assignment %d->%d", x, y,
397 nx->rank, ny->rank);
398 }
399 }
400 }
401 return true;
402}
403
404GraphId GraphCycles::GetId(void* ptr) {
405 int32_t i = rep_->ptrmap_.Find(ptr);
406 if (i != -1) {
407 return MakeId(i, rep_->nodes_[i]->version);
408 } else if (rep_->free_nodes_.empty()) {
409 Node* n =
410 new (base_internal::LowLevelAlloc::AllocWithArena(sizeof(Node), arena))
411 Node;
412 n->version = 1; // Avoid 0 since it is used by InvalidGraphId()
413 n->visited = false;
414 n->rank = rep_->nodes_.size();
415 n->masked_ptr = base_internal::HidePtr(ptr);
416 n->nstack = 0;
417 n->priority = 0;
418 rep_->nodes_.push_back(n);
419 rep_->ptrmap_.Add(ptr, n->rank);
420 return MakeId(n->rank, n->version);
421 } else {
422 // Preserve preceding rank since the set of ranks in use must be
423 // a permutation of [0,rep_->nodes_.size()-1].
424 int32_t r = rep_->free_nodes_.back();
425 rep_->free_nodes_.pop_back();
426 Node* n = rep_->nodes_[r];
427 n->masked_ptr = base_internal::HidePtr(ptr);
428 n->nstack = 0;
429 n->priority = 0;
430 rep_->ptrmap_.Add(ptr, r);
431 return MakeId(r, n->version);
432 }
433}
434
435void GraphCycles::RemoveNode(void* ptr) {
436 int32_t i = rep_->ptrmap_.Remove(ptr);
437 if (i == -1) {
438 return;
439 }
440 Node* x = rep_->nodes_[i];
441 HASH_FOR_EACH(y, x->out) {
442 rep_->nodes_[y]->in.erase(i);
443 }
444 HASH_FOR_EACH(y, x->in) {
445 rep_->nodes_[y]->out.erase(i);
446 }
447 x->in.clear();
448 x->out.clear();
449 x->masked_ptr = base_internal::HidePtr<void>(nullptr);
450 if (x->version == std::numeric_limits<uint32_t>::max()) {
451 // Cannot use x any more
452 } else {
453 x->version++; // Invalidates all copies of node.
454 rep_->free_nodes_.push_back(i);
455 }
456}
457
458void* GraphCycles::Ptr(GraphId id) {
459 Node* n = FindNode(rep_, id);
460 return n == nullptr ? nullptr
461 : base_internal::UnhidePtr<void>(n->masked_ptr);
462}
463
464bool GraphCycles::HasNode(GraphId node) {
465 return FindNode(rep_, node) != nullptr;
466}
467
468bool GraphCycles::HasEdge(GraphId x, GraphId y) const {
469 Node* xn = FindNode(rep_, x);
470 return xn && FindNode(rep_, y) && xn->out.contains(NodeIndex(y));
471}
472
473void GraphCycles::RemoveEdge(GraphId x, GraphId y) {
474 Node* xn = FindNode(rep_, x);
475 Node* yn = FindNode(rep_, y);
476 if (xn && yn) {
477 xn->out.erase(NodeIndex(y));
478 yn->in.erase(NodeIndex(x));
479 // No need to update the rank assignment since a previous valid
480 // rank assignment remains valid after an edge deletion.
481 }
482}
483
484static bool ForwardDFS(GraphCycles::Rep* r, int32_t n, int32_t upper_bound);
485static void BackwardDFS(GraphCycles::Rep* r, int32_t n, int32_t lower_bound);
486static void Reorder(GraphCycles::Rep* r);
487static void Sort(const Vec<Node*>&, Vec<int32_t>* delta);
488static void MoveToList(
489 GraphCycles::Rep* r, Vec<int32_t>* src, Vec<int32_t>* dst);
490
491bool GraphCycles::InsertEdge(GraphId idx, GraphId idy) {
492 Rep* r = rep_;
493 const int32_t x = NodeIndex(idx);
494 const int32_t y = NodeIndex(idy);
495 Node* nx = FindNode(r, idx);
496 Node* ny = FindNode(r, idy);
497 if (nx == nullptr || ny == nullptr) return true; // Expired ids
498
499 if (nx == ny) return false; // Self edge
500 if (!nx->out.insert(y)) {
501 // Edge already exists.
502 return true;
503 }
504
505 ny->in.insert(x);
506
507 if (nx->rank <= ny->rank) {
508 // New edge is consistent with existing rank assignment.
509 return true;
510 }
511
512 // Current rank assignments are incompatible with the new edge. Recompute.
513 // We only need to consider nodes that fall in the range [ny->rank,nx->rank].
514 if (!ForwardDFS(r, y, nx->rank)) {
515 // Found a cycle. Undo the insertion and tell caller.
516 nx->out.erase(y);
517 ny->in.erase(x);
518 // Since we do not call Reorder() on this path, clear any visited
519 // markers left by ForwardDFS.
520 for (const auto& d : r->deltaf_) {
521 r->nodes_[d]->visited = false;
522 }
523 return false;
524 }
525 BackwardDFS(r, x, ny->rank);
526 Reorder(r);
527 return true;
528}
529
530static bool ForwardDFS(GraphCycles::Rep* r, int32_t n, int32_t upper_bound) {
531 // Avoid recursion since stack space might be limited.
532 // We instead keep a stack of nodes to visit.
533 r->deltaf_.clear();
534 r->stack_.clear();
535 r->stack_.push_back(n);
536 while (!r->stack_.empty()) {
537 n = r->stack_.back();
538 r->stack_.pop_back();
539 Node* nn = r->nodes_[n];
540 if (nn->visited) continue;
541
542 nn->visited = true;
543 r->deltaf_.push_back(n);
544
545 HASH_FOR_EACH(w, nn->out) {
546 Node* nw = r->nodes_[w];
547 if (nw->rank == upper_bound) {
548 return false; // Cycle
549 }
550 if (!nw->visited && nw->rank < upper_bound) {
551 r->stack_.push_back(w);
552 }
553 }
554 }
555 return true;
556}
557
558static void BackwardDFS(GraphCycles::Rep* r, int32_t n, int32_t lower_bound) {
559 r->deltab_.clear();
560 r->stack_.clear();
561 r->stack_.push_back(n);
562 while (!r->stack_.empty()) {
563 n = r->stack_.back();
564 r->stack_.pop_back();
565 Node* nn = r->nodes_[n];
566 if (nn->visited) continue;
567
568 nn->visited = true;
569 r->deltab_.push_back(n);
570
571 HASH_FOR_EACH(w, nn->in) {
572 Node* nw = r->nodes_[w];
573 if (!nw->visited && lower_bound < nw->rank) {
574 r->stack_.push_back(w);
575 }
576 }
577 }
578}
579
580static void Reorder(GraphCycles::Rep* r) {
581 Sort(r->nodes_, &r->deltab_);
582 Sort(r->nodes_, &r->deltaf_);
583
584 // Adds contents of delta lists to list_ (backwards deltas first).
585 r->list_.clear();
586 MoveToList(r, &r->deltab_, &r->list_);
587 MoveToList(r, &r->deltaf_, &r->list_);
588
589 // Produce sorted list of all ranks that will be reassigned.
590 r->merged_.resize(r->deltab_.size() + r->deltaf_.size());
591 std::merge(r->deltab_.begin(), r->deltab_.end(),
592 r->deltaf_.begin(), r->deltaf_.end(),
593 r->merged_.begin());
594
595 // Assign the ranks in order to the collected list.
596 for (uint32_t i = 0; i < r->list_.size(); i++) {
597 r->nodes_[r->list_[i]]->rank = r->merged_[i];
598 }
599}
600
601static void Sort(const Vec<Node*>& nodes, Vec<int32_t>* delta) {
602 struct ByRank {
603 const Vec<Node*>* nodes;
604 bool operator()(int32_t a, int32_t b) const {
605 return (*nodes)[a]->rank < (*nodes)[b]->rank;
606 }
607 };
608 ByRank cmp;
609 cmp.nodes = &nodes;
610 std::sort(delta->begin(), delta->end(), cmp);
611}
612
613static void MoveToList(
614 GraphCycles::Rep* r, Vec<int32_t>* src, Vec<int32_t>* dst) {
615 for (auto& v : *src) {
616 int32_t w = v;
617 v = r->nodes_[w]->rank; // Replace v entry with its rank
618 r->nodes_[w]->visited = false; // Prepare for future DFS calls
619 dst->push_back(w);
620 }
621}
622
623int GraphCycles::FindPath(GraphId idx, GraphId idy, int max_path_len,
624 GraphId path[]) const {
625 Rep* r = rep_;
626 if (FindNode(r, idx) == nullptr || FindNode(r, idy) == nullptr) return 0;
627 const int32_t x = NodeIndex(idx);
628 const int32_t y = NodeIndex(idy);
629
630 // Forward depth first search starting at x until we hit y.
631 // As we descend into a node, we push it onto the path.
632 // As we leave a node, we remove it from the path.
633 int path_len = 0;
634
635 NodeSet seen;
636 r->stack_.clear();
637 r->stack_.push_back(x);
638 while (!r->stack_.empty()) {
639 int32_t n = r->stack_.back();
640 r->stack_.pop_back();
641 if (n < 0) {
642 // Marker to indicate that we are leaving a node
643 path_len--;
644 continue;
645 }
646
647 if (path_len < max_path_len) {
648 path[path_len] = MakeId(n, rep_->nodes_[n]->version);
649 }
650 path_len++;
651 r->stack_.push_back(-1); // Will remove tentative path entry
652
653 if (n == y) {
654 return path_len;
655 }
656
657 HASH_FOR_EACH(w, r->nodes_[n]->out) {
658 if (seen.insert(w)) {
659 r->stack_.push_back(w);
660 }
661 }
662 }
663
664 return 0;
665}
666
667bool GraphCycles::IsReachable(GraphId x, GraphId y) const {
668 return FindPath(x, y, 0, nullptr) > 0;
669}
670
671void GraphCycles::UpdateStackTrace(GraphId id, int priority,
672 int (*get_stack_trace)(void** stack, int)) {
673 Node* n = FindNode(rep_, id);
674 if (n == nullptr || n->priority >= priority) {
675 return;
676 }
677 n->nstack = (*get_stack_trace)(n->stack, ABSL_ARRAYSIZE(n->stack));
678 n->priority = priority;
679}
680
681int GraphCycles::GetStackTrace(GraphId id, void*** ptr) {
682 Node* n = FindNode(rep_, id);
683 if (n == nullptr) {
684 *ptr = nullptr;
685 return 0;
686 } else {
687 *ptr = n->stack;
688 return n->nstack;
689 }
690}
691
692} // namespace synchronization_internal
693} // namespace absl
694
695#endif // ABSL_LOW_LEVEL_ALLOC_MISSING
696