1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #ifndef PARTITIONED_SET_H |
30 | #define PARTITIONED_SET_H |
31 | |
32 | #include "container.h" |
33 | #include "noncopyable.h" |
34 | #include "flat_containers.h" |
35 | #include "ue2common.h" |
36 | |
37 | #include <algorithm> |
38 | #include <vector> |
39 | |
40 | #include <boost/dynamic_bitset.hpp> |
41 | |
42 | namespace ue2 { |
43 | |
44 | static constexpr size_t INVALID_SUBSET = ~(size_t)0; |
45 | |
46 | /** |
47 | * partition_set represents a partitioning of a set of integers [0, n) into |
48 | * disjoint non-empty subsets. |
49 | * |
50 | * The subsets themselves are also indexed by integers. |
51 | * |
52 | * The underlying integer type for the set members is parameterized. |
53 | */ |
54 | |
55 | template<typename T> |
56 | class partitioned_set : noncopyable { |
57 | public: |
58 | class subset { |
59 | public: |
60 | typedef typename std::vector<T>::const_iterator const_iterator; |
61 | |
62 | size_t size() const { |
63 | assert(members.size()); |
64 | return members.size(); |
65 | } |
66 | |
67 | const_iterator begin() const { |
68 | return members.begin(); |
69 | } |
70 | |
71 | const_iterator end() const { |
72 | return members.end(); |
73 | } |
74 | |
75 | private: |
76 | std::vector<T> members; /**< sorted members of the subset */ |
77 | |
78 | friend class partitioned_set; |
79 | }; |
80 | |
81 | /** returns the number of subsets in the partition */ |
82 | size_t size() const { return subsets.size(); } |
83 | |
84 | /** returns the subset with the given index */ |
85 | const subset &operator[](size_t subset_index) const { |
86 | assert(subset_index < size()); |
87 | return subsets[subset_index]; |
88 | } |
89 | |
90 | /** |
91 | * Splits the subset with the given subset_index based on whether its |
92 | * members are also members of the splitter set. |
93 | * |
94 | * The smaller of the intersection and difference is placed into a new |
95 | * subset, the index of which is returned. The larger part remains with the |
96 | * subset index. |
97 | * |
98 | * If the set was not split (due to there being no overlap with splitter or |
99 | * being a complete subset), INVALID_SUBSET is returned. |
100 | */ |
101 | size_t split(size_t subset_index, const flat_set<T> &splitter) { |
102 | assert(!splitter.empty()); |
103 | if (splitter.empty()) { |
104 | return INVALID_SUBSET; |
105 | } |
106 | |
107 | subset &orig = subsets[subset_index]; |
108 | |
109 | assert(orig.size()); |
110 | |
111 | split_temp_diff.clear(); |
112 | split_temp_inter.clear(); |
113 | |
114 | auto sp_it = splitter.begin(); |
115 | auto sp_e = splitter.end(); |
116 | |
117 | /* subset members are always in sorted order. */ |
118 | assert(std::is_sorted(orig.members.begin(), orig.members.end())); |
119 | |
120 | if (orig.members.back() < *sp_it) { |
121 | /* first splitter is greater than all our members */ |
122 | return INVALID_SUBSET; |
123 | } |
124 | |
125 | if (orig.members.front() > *splitter.rbegin()) { |
126 | /* last splitter is less than all our members */ |
127 | return INVALID_SUBSET; |
128 | } |
129 | |
130 | for (auto it = orig.members.begin(); it != orig.members.end(); ++it) { |
131 | const auto &member = *it; |
132 | assert(member < member_to_subset.size()); |
133 | |
134 | sp_it = std::lower_bound(sp_it, sp_e, member); |
135 | if (sp_it == sp_e) { |
136 | split_temp_diff.insert(split_temp_diff.end(), it, |
137 | orig.members.end()); |
138 | break; |
139 | } |
140 | |
141 | if (*sp_it > member) { |
142 | split_temp_diff.push_back(member); |
143 | } else { |
144 | split_temp_inter.push_back(member); |
145 | } |
146 | } |
147 | |
148 | assert(split_temp_diff.size() + split_temp_inter.size() == orig.size()); |
149 | |
150 | if (split_temp_inter.empty()) { |
151 | assert(split_temp_diff == orig.members); |
152 | return INVALID_SUBSET; |
153 | } |
154 | |
155 | if (split_temp_diff.empty()) { |
156 | assert(split_temp_inter == orig.members); |
157 | return INVALID_SUBSET; |
158 | } |
159 | |
160 | assert(MIN(split_temp_inter[0], split_temp_diff[0]) == orig.members[0]); |
161 | |
162 | /* work out which is the bigger half */ |
163 | std::vector<T> *big; |
164 | std::vector<T> *small; |
165 | if (split_temp_diff.size() > split_temp_inter.size()) { |
166 | big = &split_temp_diff; |
167 | small = &split_temp_inter; |
168 | } else { |
169 | big = &split_temp_inter; |
170 | small = &split_temp_diff; |
171 | } |
172 | |
173 | /* larger subset replaces the input subset */ |
174 | std::vector<T> temp_i; |
175 | insert(&temp_i, temp_i.end(), *big); |
176 | orig.members.swap(temp_i); |
177 | |
178 | /* smaller subset is placed in the new subset */ |
179 | size_t new_index = subsets.size(); |
180 | subsets.push_back(subset()); |
181 | insert(&subsets.back().members, subsets.back().members.end(), *small); |
182 | |
183 | for (const auto &e : *small) { |
184 | member_to_subset[e] = new_index; |
185 | } |
186 | |
187 | return new_index; |
188 | } |
189 | |
190 | /** |
191 | * Returns all subsets which have a member in keys. |
192 | */ |
193 | void find_overlapping(const flat_set<T> &keys, |
194 | std::vector<size_t> *containing) const { |
195 | boost::dynamic_bitset<> seen(subsets.size()); // all zero by default. |
196 | |
197 | for (const auto &key : keys) { |
198 | assert(key < member_to_subset.size()); |
199 | size_t sub = member_to_subset[key]; |
200 | assert(sub < subsets.size()); |
201 | seen.set(sub); |
202 | } |
203 | |
204 | for (size_t i = seen.find_first(); i != seen.npos; |
205 | i = seen.find_next(i)) { |
206 | containing->push_back(i); |
207 | } |
208 | } |
209 | |
210 | /** |
211 | * Creates a partitioned set containing elements [0, state_to_subset.size() ) |
212 | * |
213 | * The initial subset that an element belongs to is given by the |
214 | * corresponding entry in state_to_subset. The subsets should be identified |
215 | * by a dense range of indices starting from 0. |
216 | */ |
217 | explicit partitioned_set(const std::vector<size_t> &state_to_subset) { |
218 | assert(!state_to_subset.empty()); |
219 | |
220 | subsets.reserve(state_to_subset.size()); |
221 | member_to_subset.resize(state_to_subset.size()); |
222 | |
223 | split_temp_inter.reserve(state_to_subset.size()); |
224 | split_temp_diff.reserve(state_to_subset.size()); |
225 | |
226 | size_t subset_count = 0; |
227 | for (const auto &sub : state_to_subset) { |
228 | assert(sub != INVALID_SUBSET); |
229 | ENSURE_AT_LEAST(&subset_count, sub + 1); |
230 | } |
231 | assert(subset_count <= state_to_subset.size()); |
232 | |
233 | subsets.resize(subset_count); |
234 | for (size_t i = 0; i < state_to_subset.size(); i++) { |
235 | /* ensure that our underlying type is big enough to hold all our |
236 | * set members */ |
237 | assert(i == (size_t)(T)i); |
238 | |
239 | size_t sub = state_to_subset[i]; |
240 | assert(sub < subsets.size()); |
241 | |
242 | member_to_subset[i] = sub; |
243 | subsets[sub].members.push_back(i); |
244 | } |
245 | |
246 | /* none of the subsets should be empty */ |
247 | assert(std::all_of(subsets.begin(), subsets.end(), |
248 | [](const subset &sub){ return sub.size() > 0; })); |
249 | } |
250 | |
251 | private: |
252 | std::vector<size_t> member_to_subset; |
253 | std::vector<subset> subsets; |
254 | |
255 | std::vector<T> split_temp_inter; /**< used internally by split to hold the |
256 | * intersection. */ |
257 | std::vector<T> split_temp_diff; /**< used internally by split to hold the |
258 | * set difference. */ |
259 | }; |
260 | |
261 | } // namespace |
262 | |
263 | #endif |
264 | |