1 | /* |
2 | * Copyright (c) 2016-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "rose_build_exclusive.h" |
30 | |
31 | #include "ue2common.h" |
32 | #include "rose_build_merge.h" |
33 | #include "nfa/castlecompile.h" |
34 | #include "nfagraph/ng_execute.h" |
35 | #include "nfagraph/ng_holder.h" |
36 | #include "nfagraph/ng_util.h" |
37 | #include "util/clique.h" |
38 | #include "util/compile_context.h" |
39 | #include "util/container.h" |
40 | #include "util/flat_containers.h" |
41 | #include "util/graph.h" |
42 | #include "util/make_unique.h" |
43 | |
44 | using namespace std; |
45 | |
46 | namespace ue2 { |
47 | |
48 | template<typename role_id> |
49 | struct RoleChunk { |
50 | vector<RoleInfo<role_id>> roles; |
51 | }; |
52 | |
53 | static |
54 | CharReach getReachability(const NGHolder &h) { |
55 | CharReach cr; |
56 | for (const auto &v : vertices_range(h)) { |
57 | if (!is_special(v, h)) { |
58 | cr |= h[v].char_reach; |
59 | } |
60 | } |
61 | return cr; |
62 | } |
63 | |
64 | template<typename role_id> |
65 | static |
66 | vector<RoleChunk<role_id>> divideIntoChunks(const RoseBuildImpl &build, |
67 | set<RoleInfo<role_id>> &roleInfoSet) { |
68 | u32 chunkSize = build.cc.grey.tamaChunkSize; |
69 | u32 cnt = 1; |
70 | vector<RoleChunk<role_id>> chunks; |
71 | RoleChunk<role_id> roleChunk; |
72 | for (const auto &roleInfo : roleInfoSet) { |
73 | if (cnt == chunkSize) { |
74 | cnt -= chunkSize; |
75 | chunks.push_back(roleChunk); |
76 | roleChunk.roles.clear(); |
77 | } |
78 | roleChunk.roles.push_back(roleInfo); |
79 | cnt++; |
80 | } |
81 | |
82 | if (cnt > 1) { |
83 | chunks.push_back(roleChunk); |
84 | } |
85 | |
86 | return chunks; |
87 | } |
88 | |
89 | /* add prefix literals to engine graph */ |
90 | static |
91 | bool addPrefixLiterals(NGHolder &h, unordered_set<u32> &tailId, |
92 | const vector<vector<CharReach>> &triggers) { |
93 | DEBUG_PRINTF("add literals to graph\n" ); |
94 | |
95 | NFAVertex start = h.start; |
96 | vector<NFAVertex> heads; |
97 | vector<NFAVertex> tails; |
98 | for (const auto &lit : triggers) { |
99 | NFAVertex last = start; |
100 | if (lit.empty()) { |
101 | return false; |
102 | } |
103 | u32 i = 0; |
104 | for (const auto &c : lit) { |
105 | DEBUG_PRINTF("lit:%s \n" , c.to_string().c_str()); |
106 | NFAVertex u = add_vertex(h); |
107 | h[u].char_reach = c; |
108 | if (!i++) { |
109 | heads.push_back(u); |
110 | last = u; |
111 | continue; |
112 | } |
113 | add_edge(last, u, h); |
114 | last = u; |
115 | } |
116 | tails.push_back(last); |
117 | tailId.insert(h[last].index); |
118 | } |
119 | |
120 | for (auto v : adjacent_vertices_range(start, h)) { |
121 | if (v != h.startDs) { |
122 | for (auto &t : tails) { |
123 | add_edge(t, v, h); |
124 | } |
125 | } |
126 | } |
127 | |
128 | clear_out_edges(start, h); |
129 | add_edge(h.start, h.start, h); |
130 | for (auto &t : heads) { |
131 | add_edge(start, t, h); |
132 | } |
133 | |
134 | DEBUG_PRINTF("literals addition done\n" ); |
135 | return true; |
136 | } |
137 | |
138 | /* check if one literal is suffix of another */ |
139 | static |
140 | bool isSuffix(const vector<vector<CharReach>> &triggers1, |
141 | const vector<vector<CharReach>> &triggers2) { |
142 | // literal suffix test |
143 | for (const auto &lit1 : triggers1) { |
144 | for (const auto &lit2 : triggers2) { |
145 | const size_t len = min(lit1.size(), lit2.size()); |
146 | if (equal(lit1.rbegin(), lit1.rbegin() + len, |
147 | lit2.rbegin(), overlaps)) { |
148 | return true; |
149 | } |
150 | } |
151 | } |
152 | return false; |
153 | } |
154 | |
155 | /* prepare initial infix or suffix graph used for exclusive analysis */ |
156 | template<typename role_id> |
157 | static |
158 | u32 prepareRoleGraph(NGHolder &h, const role_id &s1) { |
159 | u32 num = 0; |
160 | if (s1.castle()) { |
161 | num = num_vertices(h); |
162 | NFAVertex u = add_vertex(h); |
163 | h[u].char_reach = s1.castle()->reach(); |
164 | add_edge(h.startDs, u, h); |
165 | // add self loop to repeat characters |
166 | add_edge(u, u, h); |
167 | } else if (s1.graph()) { |
168 | const NGHolder &g = *s1.graph(); |
169 | cloneHolder(h, g); |
170 | num = num_vertices(h); |
171 | } else { |
172 | // only infixes and suffixes with graph properties are possible |
173 | // candidates, already filtered out other cases before |
174 | // exclusive analysis |
175 | assert(0); |
176 | } |
177 | |
178 | return num; |
179 | } |
180 | |
181 | /* get a subset of literal if reset character is found */ |
182 | static |
183 | vector<CharReach> findStartPos(const CharReach &cr1, |
184 | const vector<CharReach> &lit) { |
185 | auto it = lit.rbegin(), ite = lit.rend(); |
186 | u32 pos = lit.size(); |
187 | for (; it != ite; it++) { |
188 | if (!overlaps(cr1, *it)) { |
189 | break; |
190 | } |
191 | pos--; |
192 | } |
193 | |
194 | return vector<CharReach> (lit.begin() + pos, lit.end()); |
195 | } |
196 | |
197 | template<typename role_id> |
198 | static |
199 | bool isExclusive(const NGHolder &h, |
200 | const u32 num, unordered_set<u32> &tailId, |
201 | map<u32, unordered_set<u32>> &skipList, |
202 | const RoleInfo<role_id> &role1, |
203 | const RoleInfo<role_id> &role2) { |
204 | const u32 id1 = role1.id; |
205 | const u32 id2 = role2.id; |
206 | |
207 | if (contains(skipList, id1) && contains(skipList[id1], id2)) { |
208 | return false; |
209 | } |
210 | |
211 | const auto &triggers1 = role1.literals; |
212 | const auto &triggers2 = role2.literals; |
213 | if (isSuffix(triggers1, triggers2)) { |
214 | skipList[id2].insert(id1); |
215 | return false; |
216 | } |
217 | |
218 | DEBUG_PRINTF("role id2:%u\n" , id2); |
219 | const auto &cr1 = role1.cr; |
220 | if (overlaps(cr1, role2.last_cr)) { |
221 | CharReach cr = cr1 | role1.prefix_cr; |
222 | flat_set<NFAVertex> states; |
223 | for (const auto &lit : triggers2) { |
224 | auto lit1 = findStartPos(cr, lit); |
225 | if (lit1.empty()) { |
226 | continue; |
227 | } |
228 | |
229 | states.clear(); |
230 | |
231 | if (lit1.size() < lit.size()) { |
232 | // Only starts. |
233 | states.insert(h.start); |
234 | states.insert(h.startDs); |
235 | } else { |
236 | // All vertices. |
237 | insert(&states, vertices(h)); |
238 | } |
239 | |
240 | auto activeStates = execute_graph(h, lit1, states); |
241 | // Check if only literal states are on |
242 | for (const auto &s : activeStates) { |
243 | if ((!is_any_start(s, h) && h[s].index <= num) || |
244 | contains(tailId, h[s].index)) { |
245 | skipList[id2].insert(id1); |
246 | return false; |
247 | } |
248 | } |
249 | } |
250 | } |
251 | |
252 | return true; |
253 | } |
254 | |
255 | template<typename role_id> |
256 | static |
257 | unordered_set<u32> checkExclusivity(const NGHolder &h, |
258 | const u32 num, unordered_set<u32> &tailId, |
259 | map<u32, unordered_set<u32>> &skipList, |
260 | const RoleInfo<role_id> &role1, |
261 | const RoleChunk<role_id> &roleChunk) { |
262 | unordered_set<u32> info; |
263 | const u32 id1 = role1.id; |
264 | for (const auto &role2 : roleChunk.roles) { |
265 | const u32 id2 = role2.id; |
266 | if (id1 != id2 && isExclusive(h, num, tailId, skipList, |
267 | role1, role2)) { |
268 | info.insert(id2); |
269 | } |
270 | } |
271 | |
272 | return info; |
273 | } |
274 | |
275 | static |
276 | void findCliques(const map<u32, set<u32>> &exclusiveGroups, |
277 | vector<vector<u32>> &exclusive_roles) { |
278 | if (exclusiveGroups.empty()) { |
279 | return; |
280 | } |
281 | // Construct the exclusivity graph |
282 | map<u32, CliqueVertex> vertex_map; |
283 | unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>(); |
284 | |
285 | // Add vertices representing infixes/suffixes |
286 | for (const auto &e : exclusiveGroups) { |
287 | const u32 id = e.first; |
288 | CliqueVertex v1 = add_vertex(CliqueVertexProps(id), *cg); |
289 | vertex_map[id] = v1; |
290 | } |
291 | |
292 | // Wire exclusive pairs |
293 | for (const auto &e1 : exclusiveGroups) { |
294 | const u32 literalId1 = e1.first; |
295 | CliqueVertex lv = vertex_map[literalId1]; |
296 | const set<u32> &exclusiveSet = e1.second; |
297 | for (const auto &e2 : exclusiveGroups) { |
298 | const u32 literalId2 = e2.first; |
299 | if (literalId1 < literalId2 && |
300 | contains(exclusiveSet, literalId2)) { |
301 | add_edge(lv, vertex_map[literalId2], *cg); |
302 | DEBUG_PRINTF("Wire %u:%u\n" , literalId1, literalId2); |
303 | } |
304 | } |
305 | } |
306 | |
307 | // Find clique groups |
308 | const auto &clique = removeClique(*cg); |
309 | for (const auto &i : clique) { |
310 | DEBUG_PRINTF("cliq:%zu\n" , i.size()); |
311 | if (i.size() > 1) { |
312 | exclusive_roles.push_back(i); |
313 | } |
314 | } |
315 | DEBUG_PRINTF("Clique graph size:%zu\n" , exclusive_roles.size()); |
316 | } |
317 | |
318 | static |
319 | map<u32, set<u32>> findExclusiveGroups(const RoseBuildImpl &build, |
320 | const map<u32, unordered_set<u32>> &exclusiveInfo, |
321 | const map<u32, vector<RoseVertex>> &vertex_map, |
322 | const bool is_infix) { |
323 | map<u32, set<u32>> exclusiveGroups; |
324 | for (const auto &e : exclusiveInfo) { |
325 | u32 i = e.first; |
326 | const auto &s = e.second; |
327 | set<u32> group; |
328 | set<RoseVertex> q1(vertex_map.at(i).begin(), |
329 | vertex_map.at(i).end()); |
330 | DEBUG_PRINTF("vertex set:%zu\n" , q1.size()); |
331 | for (const auto &val : s) { |
332 | set<RoseVertex> q2(vertex_map.at(val).begin(), |
333 | vertex_map.at(val).end()); |
334 | if (contains(exclusiveInfo.at(val), i) && |
335 | (!is_infix || mergeableRoseVertices(build, q1, q2))) { |
336 | group.insert(val); |
337 | } |
338 | } |
339 | if (!group.empty()) { |
340 | exclusiveGroups[i] = group; |
341 | } |
342 | } |
343 | |
344 | return exclusiveGroups; |
345 | } |
346 | |
347 | template<typename role_id> |
348 | static |
349 | bool setTriggerLiterals(RoleInfo<role_id> &roleInfo, |
350 | const map<u32, vector<vector<CharReach>>> &triggers) { |
351 | u32 minLiteralLen = ~0U; |
352 | for (const auto &tr : triggers) { |
353 | for (const auto &lit : tr.second) { |
354 | if (lit.empty()) { |
355 | return false; |
356 | } |
357 | minLiteralLen = min(minLiteralLen, (u32)lit.size()); |
358 | roleInfo.last_cr |= lit.back(); |
359 | for (const auto &c : lit) { |
360 | roleInfo.prefix_cr |= c; |
361 | } |
362 | roleInfo.literals.push_back(lit); |
363 | } |
364 | } |
365 | |
366 | if (roleInfo.role.graph()) { |
367 | const NGHolder &g = *roleInfo.role.graph(); |
368 | roleInfo.cr = getReachability(g); |
369 | } else if (roleInfo.role.castle()) { |
370 | roleInfo.cr = roleInfo.role.castle()->reach(); |
371 | } |
372 | |
373 | // test the score of this engine |
374 | roleInfo.score = 256 - roleInfo.cr.count() + minLiteralLen; |
375 | if (roleInfo.score < 20) { |
376 | return false; |
377 | } |
378 | |
379 | return true; |
380 | } |
381 | |
382 | bool setTriggerLiteralsInfix(RoleInfo<left_id> &roleInfo, |
383 | const map<u32, vector<vector<CharReach>>> &triggers) { |
384 | return setTriggerLiterals(roleInfo, triggers); |
385 | } |
386 | |
387 | bool setTriggerLiteralsSuffix(RoleInfo<suffix_id> &roleInfo, |
388 | const map<u32, vector<vector<CharReach>>> &triggers) { |
389 | return setTriggerLiterals(roleInfo, triggers); |
390 | } |
391 | |
392 | template<typename role_id> |
393 | static |
394 | void exclusiveAnalysis(const RoseBuildImpl &build, |
395 | const map<u32, vector<RoseVertex>> &vertex_map, |
396 | set<RoleInfo<role_id>> &roleInfoSet, |
397 | vector<vector<u32>> &exclusive_roles, const bool is_infix) { |
398 | const auto &chunks = divideIntoChunks(build, roleInfoSet); |
399 | DEBUG_PRINTF("Exclusivity analysis entry\n" ); |
400 | map<u32, unordered_set<u32>> exclusiveInfo; |
401 | |
402 | for (const auto &roleChunk : chunks) { |
403 | map<u32, unordered_set<u32>> skipList; |
404 | for (const auto &role1 : roleChunk.roles) { |
405 | const u32 id1 = role1.id; |
406 | const role_id &s1 = role1.role; |
407 | const auto &triggers1 = role1.literals; |
408 | |
409 | NGHolder h; |
410 | u32 num = prepareRoleGraph(h, s1); |
411 | DEBUG_PRINTF("role id1:%u\n" , id1); |
412 | unordered_set<u32> tailId; |
413 | if (!addPrefixLiterals(h, tailId, triggers1)) { |
414 | continue; |
415 | } |
416 | |
417 | exclusiveInfo[id1] = checkExclusivity(h, num, tailId, |
418 | skipList, role1, roleChunk); |
419 | } |
420 | } |
421 | |
422 | // Create final candidate exclusive groups |
423 | const auto exclusiveGroups = |
424 | findExclusiveGroups(build, exclusiveInfo, vertex_map, is_infix); |
425 | exclusiveInfo.clear(); |
426 | |
427 | // Find cliques for each exclusive groups |
428 | findCliques(exclusiveGroups, exclusive_roles); |
429 | } |
430 | |
431 | void exclusiveAnalysisInfix(const RoseBuildImpl &build, |
432 | const map<u32, vector<RoseVertex>> &vertex_map, |
433 | set<RoleInfo<left_id>> &roleInfoSet, |
434 | vector<vector<u32>> &exclusive_roles) { |
435 | exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles, |
436 | true); |
437 | } |
438 | |
439 | void exclusiveAnalysisSuffix(const RoseBuildImpl &build, |
440 | const map<u32, vector<RoseVertex>> &vertex_map, |
441 | set<RoleInfo<suffix_id>> &roleInfoSet, |
442 | vector<vector<u32>> &exclusive_roles) { |
443 | exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles, |
444 | false); |
445 | } |
446 | |
447 | } // namespace ue2 |
448 | |