1// Copyright 2006 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#ifndef UTIL_SPARSE_ARRAY_H_
6#define UTIL_SPARSE_ARRAY_H_
7
8// DESCRIPTION
9//
10// SparseArray<T>(m) is a map from integers in [0, m) to T values.
11// It requires (sizeof(T)+sizeof(int))*m memory, but it provides
12// fast iteration through the elements in the array and fast clearing
13// of the array. The array has a concept of certain elements being
14// uninitialized (having no value).
15//
16// Insertion and deletion are constant time operations.
17//
18// Allocating the array is a constant time operation
19// when memory allocation is a constant time operation.
20//
21// Clearing the array is a constant time operation (unusual!).
22//
23// Iterating through the array is an O(n) operation, where n
24// is the number of items in the array (not O(m)).
25//
26// The array iterator visits entries in the order they were first
27// inserted into the array. It is safe to add items to the array while
28// using an iterator: the iterator will visit indices added to the array
29// during the iteration, but will not re-visit indices whose values
30// change after visiting. Thus SparseArray can be a convenient
31// implementation of a work queue.
32//
33// The SparseArray implementation is NOT thread-safe. It is up to the
34// caller to make sure only one thread is accessing the array. (Typically
35// these arrays are temporary values and used in situations where speed is
36// important.)
37//
38// The SparseArray interface does not present all the usual STL bells and
39// whistles.
40//
41// Implemented with reference to Briggs & Torczon, An Efficient
42// Representation for Sparse Sets, ACM Letters on Programming Languages
43// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69.
44//
45// Briggs & Torczon popularized this technique, but it had been known
46// long before their paper. They point out that Aho, Hopcroft, and
47// Ullman's 1974 Design and Analysis of Computer Algorithms and Bentley's
48// 1986 Programming Pearls both hint at the technique in exercises to the
49// reader (in Aho & Hopcroft, exercise 2.12; in Bentley, column 1
50// exercise 8).
51//
52// Briggs & Torczon describe a sparse set implementation. I have
53// trivially generalized it to create a sparse array (actually the original
54// target of the AHU and Bentley exercises).
55
56// IMPLEMENTATION
57//
58// SparseArray is an array dense_ and an array sparse_, both of size max_size_.
59// At any point, the number of elements in the sparse array is size_.
60//
61// The array dense_ contains the size_ elements in the sparse array (with
62// their indices),
63// in the order that the elements were first inserted. This array is dense:
64// the size_ pairs are dense_[0] through dense_[size_-1].
65//
66// The array sparse_ maps from indices in [0,m) to indices in [0,size_).
67// For indices present in the array, dense_[sparse_[i]].index_ == i.
68// For indices not present in the array, sparse_ can contain any value at all,
69// perhaps outside the range [0, size_) but perhaps not.
70//
71// The lax requirement on sparse_ values makes clearing the array very easy:
72// set size_ to 0. Lookups are slightly more complicated.
73// An index i has a value in the array if and only if:
74// sparse_[i] is in [0, size_) AND
75// dense_[sparse_[i]].index_ == i.
76// If both these properties hold, only then it is safe to refer to
77// dense_[sparse_[i]].value_
78// as the value associated with index i.
79//
80// To insert a new entry, set sparse_[i] to size_,
81// initialize dense_[size_], and then increment size_.
82//
83// Deletion of specific values from the array is implemented by
84// swapping dense_[size_-1] and the dense_ being deleted and then
85// updating the appropriate sparse_ entries.
86//
87// To make the sparse array as efficient as possible for non-primitive types,
88// elements may or may not be destroyed when they are deleted from the sparse
89// array through a call to erase(), erase_existing() or resize(). They
90// immediately become inaccessible, but they are only guaranteed to be
91// destroyed when the SparseArray destructor is called.
92//
93// A moved-from SparseArray will be empty.
94
95// Doing this simplifies the logic below.
96#ifndef __has_feature
97#define __has_feature(x) 0
98#endif
99
100#include <assert.h>
101#include <stdint.h>
102#include <string.h>
103#if __has_feature(memory_sanitizer)
104#include <sanitizer/msan_interface.h>
105#endif
106#include <algorithm>
107#include <memory>
108#include <type_traits>
109#include <utility>
110
111namespace re2 {
112
113template<typename Value>
114class SparseArray {
115 public:
116 SparseArray();
117 explicit SparseArray(int max_size);
118 ~SparseArray();
119
120 // IndexValue pairs: exposed in SparseArray::iterator.
121 class IndexValue;
122 static_assert(std::is_trivially_destructible<IndexValue>::value,
123 "IndexValue must be trivially destructible");
124
125 typedef IndexValue value_type;
126 typedef IndexValue* iterator;
127 typedef const IndexValue* const_iterator;
128
129 SparseArray(const SparseArray& src);
130 SparseArray(SparseArray&& src) /*noexcept*/;
131
132 SparseArray& operator=(const SparseArray& src);
133 SparseArray& operator=(SparseArray&& src) /*noexcept*/;
134
135 const IndexValue& iv(int i) const;
136
137 // Return the number of entries in the array.
138 int size() const {
139 return size_;
140 }
141
142 // Indicate whether the array is empty.
143 int empty() const {
144 return size_ == 0;
145 }
146
147 // Iterate over the array.
148 iterator begin() {
149 return dense_.get();
150 }
151 iterator end() {
152 return dense_.get() + size_;
153 }
154
155 const_iterator begin() const {
156 return dense_.get();
157 }
158 const_iterator end() const {
159 return dense_.get() + size_;
160 }
161
162 // Change the maximum size of the array.
163 // Invalidates all iterators.
164 void resize(int max_size);
165
166 // Return the maximum size of the array.
167 // Indices can be in the range [0, max_size).
168 int max_size() const {
169 return max_size_;
170 }
171
172 // Clear the array.
173 void clear() {
174 size_ = 0;
175 }
176
177 // Check whether index i is in the array.
178 bool has_index(int i) const;
179
180 // Comparison function for sorting.
181 // Can sort the sparse array so that future iterations
182 // will visit indices in increasing order using
183 // std::sort(arr.begin(), arr.end(), arr.less);
184 static bool less(const IndexValue& a, const IndexValue& b);
185
186 public:
187 // Set the value at index i to v.
188 iterator set(int i, const Value& v) {
189 return SetInternal(true, i, v);
190 }
191 iterator set(int i, Value&& v) { // NOLINT
192 return SetInternal(true, i, std::move(v));
193 }
194
195 std::pair<iterator, bool> insert(const value_type& v) {
196 return InsertInternal(v);
197 }
198 std::pair<iterator, bool> insert(value_type&& v) { // NOLINT
199 return InsertInternal(std::move(v));
200 }
201
202 template <typename... Args>
203 std::pair<iterator, bool> emplace(Args&&... args) { // NOLINT
204 return InsertInternal(value_type(std::forward<Args>(args)...));
205 }
206
207 iterator find(int i) {
208 if (has_index(i))
209 return dense_.get() + sparse_[i];
210 return end();
211 }
212
213 const_iterator find(int i) const {
214 if (has_index(i))
215 return dense_.get() + sparse_[i];
216 return end();
217 }
218
219 // Change the value at index i to v.
220 // Fast but unsafe: only use if has_index(i) is true.
221 iterator set_existing(int i, const Value& v) {
222 return SetExistingInternal(i, v);
223 }
224 iterator set_existing(int i, Value&& v) { // NOLINT
225 return SetExistingInternal(i, std::move(v));
226 }
227
228 // Set the value at the new index i to v.
229 // Fast but unsafe: only use if has_index(i) is false.
230 iterator set_new(int i, const Value& v) {
231 return SetInternal(false, i, v);
232 }
233 iterator set_new(int i, Value&& v) { // NOLINT
234 return SetInternal(false, i, std::move(v));
235 }
236
237 // Get the value at index i from the array..
238 // Fast but unsafe: only use if has_index(i) is true.
239 const Value& get_existing(int i) const;
240
241 // Erasing items from the array during iteration is in general
242 // NOT safe. There is one special case, which is that the current
243 // index-value pair can be erased as long as the iterator is then
244 // checked for being at the end before being incremented.
245 // For example:
246 //
247 // for (i = m.begin(); i != m.end(); ++i) {
248 // if (ShouldErase(i->index(), i->value())) {
249 // m.erase(i->index());
250 // --i;
251 // }
252 // }
253 //
254 // Except in the specific case just described, elements must
255 // not be erased from the array (including clearing the array)
256 // while iterators are walking over the array. Otherwise,
257 // the iterators could walk past the end of the array.
258
259 // Erases the element at index i from the array.
260 void erase(int i);
261
262 // Erases the element at index i from the array.
263 // Fast but unsafe: only use if has_index(i) is true.
264 void erase_existing(int i);
265
266 private:
267 template <typename U>
268 std::pair<iterator, bool> InsertInternal(U&& v) {
269 DebugCheckInvariants();
270 std::pair<iterator, bool> p;
271 if (has_index(v.index_)) {
272 p = {dense_.get() + sparse_[v.index_], false};
273 } else {
274 p = {set_new(std::forward<U>(v).index_, std::forward<U>(v).second), true};
275 }
276 DebugCheckInvariants();
277 return p;
278 }
279
280 template <typename U>
281 iterator SetInternal(bool allow_overwrite, int i, U&& v) { // NOLINT
282 DebugCheckInvariants();
283 if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
284 assert(false && "illegal index");
285 // Semantically, end() would be better here, but we already know
286 // the user did something stupid, so begin() insulates them from
287 // dereferencing an invalid pointer.
288 return begin();
289 }
290 if (!allow_overwrite) {
291 assert(!has_index(i));
292 create_index(i);
293 } else {
294 if (!has_index(i))
295 create_index(i);
296 }
297 return set_existing(i, std::forward<U>(v)); // NOLINT
298 }
299
300 template <typename U>
301 iterator SetExistingInternal(int i, U&& v) { // NOLINT
302 DebugCheckInvariants();
303 assert(has_index(i));
304 dense_[sparse_[i]].value() = std::forward<U>(v);
305 DebugCheckInvariants();
306 return dense_.get() + sparse_[i];
307 }
308
309 // Add the index i to the array.
310 // Only use if has_index(i) is known to be false.
311 // Since it doesn't set the value associated with i,
312 // this function is private, only intended as a helper
313 // for other methods.
314 void create_index(int i);
315
316 // In debug mode, verify that some invariant properties of the class
317 // are being maintained. This is called at the end of the constructor
318 // and at the beginning and end of all public non-const member functions.
319 void DebugCheckInvariants() const;
320
321 // Initializes memory for elements [min, max).
322 void MaybeInitializeMemory(int min, int max) {
323#if __has_feature(memory_sanitizer)
324 __msan_unpoison(sparse_.get() + min, (max - min) * sizeof sparse_[0]);
325#elif defined(RE2_ON_VALGRIND)
326 for (int i = min; i < max; i++) {
327 sparse_[i] = 0xababababU;
328 }
329#endif
330 }
331
332 int size_ = 0;
333 int max_size_ = 0;
334 std::unique_ptr<int[]> sparse_;
335 std::unique_ptr<IndexValue[]> dense_;
336};
337
338template<typename Value>
339SparseArray<Value>::SparseArray() = default;
340
341template<typename Value>
342SparseArray<Value>::SparseArray(const SparseArray& src)
343 : size_(src.size_),
344 max_size_(src.max_size_),
345 sparse_(new int[max_size_]),
346 dense_(new IndexValue[max_size_]) {
347 std::copy_n(src.sparse_.get(), max_size_, sparse_.get());
348 std::copy_n(src.dense_.get(), max_size_, dense_.get());
349}
350
351template<typename Value>
352SparseArray<Value>::SparseArray(SparseArray&& src) /*noexcept*/ // NOLINT
353 : size_(src.size_),
354 max_size_(src.max_size_),
355 sparse_(std::move(src.sparse_)),
356 dense_(std::move(src.dense_)) {
357 src.size_ = 0;
358 src.max_size_ = 0;
359}
360
361template<typename Value>
362SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) {
363 size_ = src.size_;
364 max_size_ = src.max_size_;
365 std::unique_ptr<int[]> a(new int[max_size_]);
366 std::copy_n(src.sparse_.get(), src.max_size_, a.get());
367 sparse_ = std::move(a);
368 std::unique_ptr<IndexValue[]> b(new IndexValue[max_size_]);
369 std::copy_n(src.dense_.get(), src.max_size_, b.get());
370 dense_ = std::move(b);
371 return *this;
372}
373
374template<typename Value>
375SparseArray<Value>& SparseArray<Value>::operator=(
376 SparseArray&& src) /*noexcept*/ { // NOLINT
377 size_ = src.size_;
378 max_size_ = src.max_size_;
379 sparse_ = std::move(src.sparse_);
380 dense_ = std::move(src.dense_);
381 // clear out the source
382 src.size_ = 0;
383 src.max_size_ = 0;
384 return *this;
385}
386
387// IndexValue pairs: exposed in SparseArray::iterator.
388template<typename Value>
389class SparseArray<Value>::IndexValue {
390 friend class SparseArray;
391 public:
392 typedef int first_type;
393 typedef Value second_type;
394
395 IndexValue() {}
396 IndexValue(int i, const Value& v) : index_(i), second(v) {}
397 IndexValue(int i, Value&& v) : index_(i), second(std::move(v)) {}
398
399 int index() const { return index_; }
400
401 Value& value() /*&*/ { return second; }
402 const Value& value() const /*&*/ { return second; }
403 //Value&& value() /*&&*/ { return std::move(second); } // NOLINT
404
405 private:
406 int index_;
407
408 public:
409 // Provide the data in the 'second' member so that the utilities
410 // in map-util work.
411 // TODO(billydonahue): 'second' is public for short-term compatibility.
412 // Users will be transitioned to using value() accessor.
413 Value second;
414};
415
416template<typename Value>
417const typename SparseArray<Value>::IndexValue&
418SparseArray<Value>::iv(int i) const {
419 assert(i >= 0);
420 assert(i < size_);
421 return dense_[i];
422}
423
424// Change the maximum size of the array.
425// Invalidates all iterators.
426template<typename Value>
427void SparseArray<Value>::resize(int max_size) {
428 DebugCheckInvariants();
429 if (max_size > max_size_) {
430 std::unique_ptr<int[]> a(new int[max_size]);
431 if (sparse_) {
432 std::copy_n(sparse_.get(), max_size_, a.get());
433 }
434 sparse_ = std::move(a);
435
436 std::unique_ptr<IndexValue[]> b(new IndexValue[max_size]);
437 if (dense_) {
438 std::copy_n(dense_.get(), max_size_, b.get());
439 }
440 dense_ = std::move(b);
441
442 MaybeInitializeMemory(max_size_, max_size);
443 }
444 max_size_ = max_size;
445 if (size_ > max_size_)
446 size_ = max_size_;
447 DebugCheckInvariants();
448}
449
450// Check whether index i is in the array.
451template<typename Value>
452bool SparseArray<Value>::has_index(int i) const {
453 assert(i >= 0);
454 assert(i < max_size_);
455 if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size_)) {
456 return false;
457 }
458 // Unsigned comparison avoids checking sparse_[i] < 0.
459 return (uint32_t)sparse_[i] < (uint32_t)size_ &&
460 dense_[sparse_[i]].index_ == i;
461}
462
463template<typename Value>
464const Value& SparseArray<Value>::get_existing(int i) const {
465 assert(has_index(i));
466 return dense_[sparse_[i]].second;
467}
468
469template<typename Value>
470void SparseArray<Value>::erase(int i) {
471 DebugCheckInvariants();
472 if (has_index(i))
473 erase_existing(i);
474 DebugCheckInvariants();
475}
476
477template<typename Value>
478void SparseArray<Value>::erase_existing(int i) {
479 DebugCheckInvariants();
480 assert(has_index(i));
481 int di = sparse_[i];
482 if (di < size_ - 1) {
483 dense_[di] = std::move(dense_[size_ - 1]);
484 sparse_[dense_[di].index_] = di;
485 }
486 size_--;
487 DebugCheckInvariants();
488}
489
490template<typename Value>
491void SparseArray<Value>::create_index(int i) {
492 assert(!has_index(i));
493 assert(size_ < max_size_);
494 sparse_[i] = size_;
495 dense_[size_].index_ = i;
496 size_++;
497}
498
499template<typename Value> SparseArray<Value>::SparseArray(int max_size) {
500 sparse_.reset(new int[max_size]);
501 dense_.reset(new IndexValue[max_size]);
502 size_ = 0;
503 MaybeInitializeMemory(size_, max_size);
504 max_size_ = max_size;
505 DebugCheckInvariants();
506}
507
508template<typename Value> SparseArray<Value>::~SparseArray() {
509 DebugCheckInvariants();
510}
511
512template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
513 assert(0 <= size_);
514 assert(size_ <= max_size_);
515 assert(size_ == 0 || sparse_ != NULL);
516}
517
518// Comparison function for sorting.
519template<typename Value> bool SparseArray<Value>::less(const IndexValue& a,
520 const IndexValue& b) {
521 return a.index_ < b.index_;
522}
523
524} // namespace re2
525
526#endif // UTIL_SPARSE_ARRAY_H_
527