1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** |
30 | * \file |
31 | * \brief Castle: multi-tenant repeat engine, compiler code. |
32 | */ |
33 | |
34 | #include "castlecompile.h" |
35 | |
36 | #include "castle_internal.h" |
37 | #include "limex_limits.h" |
38 | #include "nfa_internal.h" |
39 | #include "repeatcompile.h" |
40 | #include "shufticompile.h" |
41 | #include "trufflecompile.h" |
42 | #include "nfagraph/ng_dump.h" |
43 | #include "nfagraph/ng_equivalence.h" |
44 | #include "nfagraph/ng_repeat.h" |
45 | #include "nfagraph/ng_redundancy.h" |
46 | #include "nfagraph/ng_util.h" |
47 | #include "util/alloc.h" |
48 | #include "util/compile_context.h" |
49 | #include "util/container.h" |
50 | #include "util/dump_charclass.h" |
51 | #include "util/flat_containers.h" |
52 | #include "util/graph.h" |
53 | #include "util/make_unique.h" |
54 | #include "util/multibit_build.h" |
55 | #include "util/report_manager.h" |
56 | #include "util/verify_types.h" |
57 | #include "grey.h" |
58 | |
59 | #include <stack> |
60 | #include <cassert> |
61 | |
62 | #include <boost/graph/adjacency_list.hpp> |
63 | #include <boost/range/adaptor/map.hpp> |
64 | |
65 | using namespace std; |
66 | using boost::adaptors::map_keys; |
67 | using boost::adaptors::map_values; |
68 | |
69 | namespace ue2 { |
70 | |
71 | #define CLIQUE_GRAPH_MAX_SIZE 1000 |
72 | |
73 | static |
74 | u32 depth_to_u32(const depth &d) { |
75 | assert(d.is_reachable()); |
76 | if (d.is_infinite()) { |
77 | return REPEAT_INF; |
78 | } |
79 | |
80 | u32 d_val = d; |
81 | assert(d_val < REPEAT_INF); |
82 | return d_val; |
83 | } |
84 | |
85 | static |
86 | void writeCastleScanEngine(const CharReach &cr, Castle *c) { |
87 | if (cr.all()) { |
88 | c->type = CASTLE_DOT; |
89 | return; |
90 | } |
91 | |
92 | if (cr.count() == 1) { |
93 | c->type = CASTLE_NVERM; |
94 | c->u.verm.c = cr.find_first(); |
95 | return; |
96 | } |
97 | |
98 | const CharReach negated(~cr); |
99 | if (negated.count() == 1) { |
100 | c->type = CASTLE_VERM; |
101 | c->u.verm.c = negated.find_first(); |
102 | return; |
103 | } |
104 | |
105 | if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo, |
106 | (u8 *)&c->u.shuf.mask_hi) != -1) { |
107 | c->type = CASTLE_SHUFTI; |
108 | return; |
109 | } |
110 | |
111 | c->type = CASTLE_TRUFFLE; |
112 | truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1, |
113 | (u8 *)&c->u.truffle.mask2); |
114 | } |
115 | |
116 | static |
117 | bool literalOverlap(const vector<CharReach> &a, const vector<CharReach> &b, |
118 | const size_t dist) { |
119 | for (size_t i = 0; i < b.size(); i++) { |
120 | if (i > dist) { |
121 | return true; |
122 | } |
123 | size_t overlap_len = b.size() - i; |
124 | if (overlap_len <= a.size()) { |
125 | if (matches(a.end() - overlap_len, a.end(), b.begin(), |
126 | b.end() - i)) { |
127 | return false; |
128 | } |
129 | } else { |
130 | assert(overlap_len > a.size()); |
131 | if (matches(a.begin(), a.end(), b.end() - i - a.size(), |
132 | b.end() - i)) { |
133 | return false; |
134 | } |
135 | } |
136 | } |
137 | |
138 | return b.size() > dist; |
139 | } |
140 | |
141 | struct CliqueVertexProps { |
142 | CliqueVertexProps() {} |
143 | explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {} |
144 | |
145 | u32 stateId = ~0U; |
146 | }; |
147 | |
148 | typedef boost::adjacency_list<boost::listS, boost::listS, boost::undirectedS, |
149 | CliqueVertexProps> CliqueGraph; |
150 | typedef CliqueGraph::vertex_descriptor CliqueVertex; |
151 | |
152 | static |
153 | void getNeighborInfo(const CliqueGraph &g, vector<u32> &neighbor, |
154 | const CliqueVertex &cv, const set<u32> &group) { |
155 | u32 id = g[cv].stateId; |
156 | |
157 | // find neighbors for cv |
158 | for (const auto &v : adjacent_vertices_range(cv, g)) { |
159 | if (g[v].stateId != id && contains(group, g[v].stateId)) { |
160 | neighbor.push_back(g[v].stateId); |
161 | DEBUG_PRINTF("Neighbor:%u\n" , g[v].stateId); |
162 | } |
163 | } |
164 | } |
165 | |
166 | static |
167 | void findCliqueGroup(CliqueGraph &cg, vector<u32> &clique) { |
168 | stack<vector<u32>> gStack; |
169 | |
170 | // Create mapping between vertex and id |
171 | map<u32, CliqueVertex> vertexMap; |
172 | vector<u32> init; |
173 | for (const auto &v : vertices_range(cg)) { |
174 | vertexMap[cg[v].stateId] = v; |
175 | init.push_back(cg[v].stateId); |
176 | } |
177 | gStack.push(init); |
178 | |
179 | // Get the vertex to start from |
180 | CliqueGraph::vertex_iterator vi, ve; |
181 | tie(vi, ve) = vertices(cg); |
182 | while (!gStack.empty()) { |
183 | vector<u32> g = gStack.top(); |
184 | gStack.pop(); |
185 | |
186 | // Choose a vertex from the graph |
187 | u32 id = g[0]; |
188 | const CliqueVertex &n = vertexMap.at(id); |
189 | clique.push_back(id); |
190 | // Corresponding vertex in the original graph |
191 | vector<u32> neighbor; |
192 | set<u32> subgraphId(g.begin(), g.end()); |
193 | getNeighborInfo(cg, neighbor, n, subgraphId); |
194 | // Get graph consisting of neighbors for left branch |
195 | if (!neighbor.empty()) { |
196 | gStack.push(neighbor); |
197 | } |
198 | } |
199 | } |
200 | |
201 | template<typename Graph> |
202 | bool graph_empty(const Graph &g) { |
203 | typename Graph::vertex_iterator vi, ve; |
204 | tie(vi, ve) = vertices(g); |
205 | return vi == ve; |
206 | } |
207 | |
208 | static |
209 | vector<u32> removeClique(CliqueGraph &cg) { |
210 | vector<vector<u32>> cliquesVec(1); |
211 | DEBUG_PRINTF("graph size:%zu\n" , num_vertices(cg)); |
212 | findCliqueGroup(cg, cliquesVec[0]); |
213 | while (!graph_empty(cg)) { |
214 | const vector<u32> &c = cliquesVec.back(); |
215 | vector<CliqueVertex> dead; |
216 | for (const auto &v : vertices_range(cg)) { |
217 | if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) { |
218 | dead.push_back(v); |
219 | } |
220 | } |
221 | for (const auto &v : dead) { |
222 | clear_vertex(v, cg); |
223 | remove_vertex(v, cg); |
224 | } |
225 | if (graph_empty(cg)) { |
226 | break; |
227 | } |
228 | vector<u32> clique; |
229 | findCliqueGroup(cg, clique); |
230 | cliquesVec.push_back(clique); |
231 | } |
232 | |
233 | // get the independent set with max size |
234 | size_t max = 0; |
235 | size_t id = 0; |
236 | for (size_t j = 0; j < cliquesVec.size(); ++j) { |
237 | if (cliquesVec[j].size() > max) { |
238 | max = cliquesVec[j].size(); |
239 | id = j; |
240 | } |
241 | } |
242 | |
243 | DEBUG_PRINTF("clique size:%zu\n" , cliquesVec[id].size()); |
244 | return cliquesVec[id]; |
245 | } |
246 | |
247 | // if the location of any reset character in one literal are after |
248 | // the end locations where it overlaps with other literals, |
249 | // then the literals are mutual exclusive |
250 | static |
251 | bool findExclusivePair(const size_t id1, const size_t id2, |
252 | const size_t lower, |
253 | const vector<vector<size_t>> &min_reset_dist, |
254 | const vector<vector<vector<CharReach>>> &triggers) { |
255 | const auto &triggers1 = triggers[id1]; |
256 | const auto &triggers2 = triggers[id2]; |
257 | for (size_t i = 0; i < triggers1.size(); ++i) { |
258 | for (size_t j = 0; j < triggers2.size(); ++j) { |
259 | if (!literalOverlap(triggers1[i], triggers2[j], |
260 | min_reset_dist[id2 - lower][j]) || |
261 | !literalOverlap(triggers2[j], triggers1[i], |
262 | min_reset_dist[id1 - lower][i])) { |
263 | return false; |
264 | } |
265 | } |
266 | } |
267 | return true; |
268 | } |
269 | |
270 | static |
271 | vector<vector<u32>> checkExclusion(u32 &streamStateSize, |
272 | const CharReach &cr, |
273 | const vector<vector<vector<CharReach>>> &triggers, |
274 | enum ExclusiveType &exclusive, |
275 | const size_t numRepeats) { |
276 | vector<vector<u32>> groups; |
277 | size_t trigSize = triggers.size(); |
278 | DEBUG_PRINTF("trigSize %zu\n" , trigSize); |
279 | |
280 | size_t lower = 0; |
281 | size_t total = 0; |
282 | while (lower < trigSize) { |
283 | vector<CliqueVertex> vertices; |
284 | unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>(); |
285 | |
286 | vector<vector<size_t>> min_reset_dist; |
287 | size_t upper = min(lower + CLIQUE_GRAPH_MAX_SIZE, trigSize); |
288 | // get min reset distance for each repeat |
289 | for (size_t i = lower; i < upper; i++) { |
290 | CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); |
291 | vertices.push_back(v); |
292 | |
293 | const vector<size_t> &tmp_dist = |
294 | minResetDistToEnd(triggers[i], cr); |
295 | min_reset_dist.push_back(tmp_dist); |
296 | } |
297 | |
298 | // find exclusive pair for each repeat |
299 | for (size_t i = lower; i < upper; i++) { |
300 | CliqueVertex s = vertices[i - lower]; |
301 | for (size_t j = i + 1; j < upper; j++) { |
302 | if (findExclusivePair(i, j, lower, min_reset_dist, |
303 | triggers)) { |
304 | CliqueVertex d = vertices[j - lower]; |
305 | add_edge(s, d, *cg); |
306 | } |
307 | } |
308 | } |
309 | |
310 | // find the largest exclusive group |
311 | auto clique = removeClique(*cg); |
312 | size_t cliqueSize = clique.size(); |
313 | if (cliqueSize > 1) { |
314 | groups.push_back(clique); |
315 | exclusive = EXCLUSIVE; |
316 | total += cliqueSize; |
317 | } |
318 | |
319 | lower += CLIQUE_GRAPH_MAX_SIZE; |
320 | } |
321 | DEBUG_PRINTF("clique size %zu, num of repeats %zu\n" , |
322 | total, numRepeats); |
323 | if (total == numRepeats) { |
324 | exclusive = PURE_EXCLUSIVE; |
325 | streamStateSize = 0; |
326 | }; |
327 | |
328 | return groups; |
329 | } |
330 | |
331 | namespace { |
332 | struct ExclusiveInfo { |
333 | |
334 | /** Mapping between top and exclusive group id */ |
335 | map<u32, u32> groupId; |
336 | |
337 | /** Number of exclusive groups */ |
338 | u32 numGroups = 0; |
339 | }; |
340 | } |
341 | |
342 | static |
343 | void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs, |
344 | vector<RepeatInfo> &infos, vector<u64a> &patchSize, |
345 | const vector<pair<depth, bool>> &repeatInfoPair, |
346 | u32 &scratchStateSize, u32 &streamStateSize, |
347 | u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats, |
348 | const ExclusiveInfo &exclusiveInfo, |
349 | vector<u32> &may_stale, const ReportManager &rm) { |
350 | const bool remap_reports = has_managed_reports(proto.kind); |
351 | |
352 | u32 i = 0; |
353 | const auto &groupId = exclusiveInfo.groupId; |
354 | const auto &numGroups = exclusiveInfo.numGroups; |
355 | vector<u32> maxStreamSize(numGroups, 0); |
356 | |
357 | for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); |
358 | it != ite; ++it, ++i) { |
359 | const PureRepeat &pr = it->second; |
360 | depth min_period = repeatInfoPair[i].first; |
361 | bool is_reset = repeatInfoPair[i].second; |
362 | |
363 | enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max, |
364 | min_period, is_reset, true); |
365 | RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period); |
366 | |
367 | DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n" , i, |
368 | repeatTypeName(rtype), pr.bounds.str().c_str()); |
369 | |
370 | SubCastle &sub = subs[i]; |
371 | RepeatInfo &info = infos[i]; |
372 | |
373 | info.packedCtrlSize = rsi.packedCtrlSize; |
374 | u32 subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); |
375 | |
376 | // Handle stream/scratch space alloc for exclusive case differently. |
377 | if (contains(groupId, i)) { |
378 | u32 id = groupId.at(i); |
379 | maxStreamSize[id] = max(maxStreamSize[id], subStreamStateSize); |
380 | // SubCastle full/stream state offsets are written in for the group |
381 | // below. |
382 | } else { |
383 | sub.fullStateOffset = scratchStateSize; |
384 | sub.streamStateOffset = streamStateSize; |
385 | scratchStateSize += verify_u32(sizeof(RepeatControl)); |
386 | streamStateSize += subStreamStateSize; |
387 | } |
388 | |
389 | if (pr.bounds.max.is_finite()) { |
390 | may_stale.push_back(i); |
391 | } |
392 | |
393 | info.type = verify_u8(rtype); |
394 | info.repeatMin = depth_to_u32(pr.bounds.min); |
395 | info.repeatMax = depth_to_u32(pr.bounds.max); |
396 | info.stateSize = rsi.stateSize; |
397 | info.horizon = rsi.horizon; |
398 | info.minPeriod = min_period.is_finite() ? (u32)min_period : ~0U; |
399 | assert(rsi.packedFieldSizes.size() |
400 | <= ARRAY_LENGTH(info.packedFieldSizes)); |
401 | copy(rsi.packedFieldSizes.begin(), rsi.packedFieldSizes.end(), |
402 | info.packedFieldSizes); |
403 | info.patchCount = rsi.patchCount; |
404 | info.patchSize = rsi.patchSize; |
405 | info.encodingSize = rsi.encodingSize; |
406 | info.patchesOffset = rsi.patchesOffset; |
407 | |
408 | assert(pr.reports.size() == 1); |
409 | ReportID id = *pr.reports.begin(); |
410 | sub.report = remap_reports ? rm.getProgramOffset(id) : id; |
411 | |
412 | if (rtype == REPEAT_SPARSE_OPTIMAL_P) { |
413 | for (u32 j = 0; j < rsi.patchSize; j++) { |
414 | tables.push_back(rsi.table[j]); |
415 | } |
416 | sparseRepeats++; |
417 | patchSize[i] = rsi.patchSize; |
418 | tableSize += rsi.patchSize; |
419 | } |
420 | } |
421 | |
422 | vector<u32> scratchOffset(numGroups, 0); |
423 | vector<u32> streamOffset(numGroups, 0); |
424 | for (const auto &j : groupId) { |
425 | u32 top = j.first; |
426 | u32 id = j.second; |
427 | SubCastle &sub = subs[top]; |
428 | if (!scratchOffset[id]) { |
429 | sub.fullStateOffset = scratchStateSize; |
430 | sub.streamStateOffset = streamStateSize; |
431 | scratchOffset[id] = scratchStateSize; |
432 | streamOffset[id] = streamStateSize; |
433 | scratchStateSize += verify_u32(sizeof(RepeatControl)); |
434 | streamStateSize += maxStreamSize[id]; |
435 | } else { |
436 | sub.fullStateOffset = scratchOffset[id]; |
437 | sub.streamStateOffset = streamOffset[id]; |
438 | } |
439 | } |
440 | } |
441 | |
442 | bytecode_ptr<NFA> |
443 | buildCastle(const CastleProto &proto, |
444 | const map<u32, vector<vector<CharReach>>> &triggers, |
445 | const CompileContext &cc, const ReportManager &rm) { |
446 | assert(cc.grey.allowCastle); |
447 | |
448 | const size_t numRepeats = proto.repeats.size(); |
449 | assert(numRepeats > 0 && numRepeats <= proto.max_occupancy); |
450 | |
451 | const CharReach &cr = proto.reach(); |
452 | |
453 | DEBUG_PRINTF("reach %s, %zu repeats\n" , describeClass(cr).c_str(), |
454 | numRepeats); |
455 | |
456 | vector<SubCastle> subs(numRepeats); |
457 | memset(&subs[0], 0, sizeof(SubCastle) * numRepeats); |
458 | |
459 | vector<RepeatInfo> infos(numRepeats); |
460 | memset(&infos[0], 0, sizeof(RepeatInfo) * numRepeats); |
461 | |
462 | vector<u64a> patchSize(numRepeats); |
463 | memset(&patchSize[0], 0, sizeof(u64a) * numRepeats); |
464 | |
465 | vector<u64a> tables; |
466 | |
467 | // We start with enough stream state to store the active bitfield. |
468 | u32 streamStateSize = mmbit_size(numRepeats); |
469 | |
470 | // We have a copy of the stream state in scratch for castleMatchLoop. |
471 | u32 scratchStateSize = ROUNDUP_N(streamStateSize, alignof(RepeatControl)); |
472 | |
473 | depth minWidth(depth::infinity()); |
474 | depth maxWidth(0); |
475 | |
476 | u32 i = 0; |
477 | ExclusiveInfo exclusiveInfo; |
478 | vector<vector<vector<CharReach>>> candidateTriggers; |
479 | vector<u32> candidateRepeats; |
480 | vector<pair<depth, bool>> repeatInfoPair; |
481 | for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); |
482 | it != ite; ++it, ++i) { |
483 | const u32 top = it->first; |
484 | const PureRepeat &pr = it->second; |
485 | assert(pr.reach == cr); |
486 | assert(pr.reports.size() == 1); |
487 | |
488 | if (top != i) { |
489 | // Tops have not been remapped? |
490 | assert(0); |
491 | throw std::logic_error("Tops not remapped" ); |
492 | } |
493 | |
494 | minWidth = min(minWidth, pr.bounds.min); |
495 | maxWidth = max(maxWidth, pr.bounds.max); |
496 | |
497 | bool is_reset = false; |
498 | depth min_period = depth::infinity(); |
499 | |
500 | // If we've got a top in the castle without any trigger information, it |
501 | // possibly means that we've got a repeat that we can't trigger. We do |
502 | // need to cope with it though. |
503 | if (contains(triggers, top)) { |
504 | min_period = depth(minPeriod(triggers.at(top), cr, &is_reset)); |
505 | } |
506 | |
507 | if (min_period > pr.bounds.max) { |
508 | DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n" ); |
509 | is_reset = true; |
510 | } |
511 | |
512 | repeatInfoPair.push_back(make_pair(min_period, is_reset)); |
513 | |
514 | candidateTriggers.push_back(triggers.at(top)); |
515 | candidateRepeats.push_back(i); |
516 | } |
517 | |
518 | // Case 1: exclusive repeats |
519 | enum ExclusiveType exclusive = NOT_EXCLUSIVE; |
520 | u32 activeIdxSize = 0; |
521 | u32 groupIterOffset = 0; |
522 | if (cc.grey.castleExclusive) { |
523 | auto cliqueGroups = |
524 | checkExclusion(streamStateSize, cr, candidateTriggers, |
525 | exclusive, numRepeats); |
526 | for (const auto &group : cliqueGroups) { |
527 | // mutual exclusive repeats group found, |
528 | // update state sizes |
529 | activeIdxSize = calcPackedBytes(numRepeats + 1); |
530 | streamStateSize += activeIdxSize; |
531 | |
532 | // replace with top values |
533 | for (const auto &val : group) { |
534 | const u32 top = candidateRepeats[val]; |
535 | exclusiveInfo.groupId[top] = exclusiveInfo.numGroups; |
536 | } |
537 | exclusiveInfo.numGroups++; |
538 | } |
539 | |
540 | if (exclusive) { |
541 | groupIterOffset = streamStateSize; |
542 | streamStateSize += mmbit_size(exclusiveInfo.numGroups); |
543 | } |
544 | |
545 | DEBUG_PRINTF("num of groups:%u\n" , exclusiveInfo.numGroups); |
546 | } |
547 | candidateRepeats.clear(); |
548 | |
549 | DEBUG_PRINTF("reach %s exclusive %u\n" , describeClass(cr).c_str(), |
550 | exclusive); |
551 | |
552 | u32 tableSize = 0; |
553 | u32 sparseRepeats = 0; |
554 | vector<u32> may_stale; /* sub castles that may go stale */ |
555 | |
556 | buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, |
557 | scratchStateSize, streamStateSize, tableSize, |
558 | tables, sparseRepeats, exclusiveInfo, may_stale, rm); |
559 | |
560 | DEBUG_PRINTF("%zu subcastles may go stale\n" , may_stale.size()); |
561 | vector<mmbit_sparse_iter> stale_iter; |
562 | if (!may_stale.empty()) { |
563 | stale_iter = mmbBuildSparseIterator(may_stale, numRepeats); |
564 | } |
565 | |
566 | |
567 | size_t total_size = |
568 | sizeof(NFA) + // initial NFA structure |
569 | sizeof(Castle) + // Castle structure |
570 | sizeof(SubCastle) * subs.size() + // SubCastles themselves |
571 | sizeof(RepeatInfo) * subs.size() + // RepeatInfo structure |
572 | sizeof(u64a) * tableSize + // table size for |
573 | // REPEAT_SPARSE_OPTIMAL_P |
574 | sizeof(u64a) * sparseRepeats; // paddings for |
575 | // REPEAT_SPARSE_OPTIMAL_P tables |
576 | |
577 | total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter)); |
578 | total_size += byte_length(stale_iter); // stale sparse iter |
579 | |
580 | auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); |
581 | nfa->type = verify_u8(CASTLE_NFA); |
582 | nfa->length = verify_u32(total_size); |
583 | nfa->nPositions = verify_u32(subs.size()); |
584 | nfa->streamStateSize = streamStateSize; |
585 | nfa->scratchStateSize = scratchStateSize; |
586 | nfa->minWidth = verify_u32(minWidth); |
587 | nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0; |
588 | |
589 | char * const base_ptr = (char *)nfa.get() + sizeof(NFA); |
590 | char *ptr = base_ptr; |
591 | Castle *c = (Castle *)ptr; |
592 | c->numRepeats = verify_u32(subs.size()); |
593 | c->numGroups = exclusiveInfo.numGroups; |
594 | c->exclusive = verify_s8(exclusive); |
595 | c->activeIdxSize = verify_u8(activeIdxSize); |
596 | c->activeOffset = verify_u32(c->numGroups * activeIdxSize); |
597 | c->groupIterOffset = groupIterOffset; |
598 | |
599 | writeCastleScanEngine(cr, c); |
600 | |
601 | ptr += sizeof(Castle); |
602 | SubCastle *subCastles = ((SubCastle *)(ROUNDUP_PTR(ptr, alignof(u32)))); |
603 | copy(subs.begin(), subs.end(), subCastles); |
604 | |
605 | u32 length = 0; |
606 | u32 tableIdx = 0; |
607 | for (i = 0; i < numRepeats; i++) { |
608 | u32 offset = sizeof(SubCastle) * (numRepeats - i) + length; |
609 | SubCastle *sub = &subCastles[i]; |
610 | sub->repeatInfoOffset = offset; |
611 | |
612 | ptr = (char *)sub + offset; |
613 | memcpy(ptr, &infos[i], sizeof(RepeatInfo)); |
614 | |
615 | if (patchSize[i]) { |
616 | RepeatInfo *info = (RepeatInfo *)ptr; |
617 | u64a *table = ((u64a *)(ROUNDUP_PTR(((char *)(info) + |
618 | sizeof(*info)), alignof(u64a)))); |
619 | copy(tables.begin() + tableIdx, |
620 | tables.begin() + tableIdx + patchSize[i], table); |
621 | u32 diff = (char *)table - (char *)info + |
622 | sizeof(u64a) * patchSize[i]; |
623 | info->length = diff; |
624 | length += diff; |
625 | tableIdx += patchSize[i]; |
626 | } else { |
627 | length += sizeof(RepeatInfo); |
628 | } |
629 | |
630 | // set exclusive group info |
631 | if (contains(exclusiveInfo.groupId, i)) { |
632 | sub->exclusiveId = exclusiveInfo.groupId[i]; |
633 | } else { |
634 | sub->exclusiveId = numRepeats; |
635 | } |
636 | } |
637 | |
638 | ptr = base_ptr + total_size - sizeof(NFA) - byte_length(stale_iter); |
639 | |
640 | assert(ptr + byte_length(stale_iter) == base_ptr + total_size - sizeof(NFA)); |
641 | if (!stale_iter.empty()) { |
642 | c->staleIterOffset = verify_u32(ptr - base_ptr); |
643 | copy_bytes(ptr, stale_iter); |
644 | ptr += byte_length(stale_iter); |
645 | } |
646 | |
647 | return nfa; |
648 | } |
649 | |
650 | set<ReportID> all_reports(const CastleProto &proto) { |
651 | set<ReportID> reports; |
652 | for (const ReportID &report : proto.report_map | map_keys) { |
653 | reports.insert(report); |
654 | } |
655 | return reports; |
656 | } |
657 | |
658 | depth findMinWidth(const CastleProto &proto) { |
659 | depth min_width(depth::infinity()); |
660 | for (const PureRepeat &pr : proto.repeats | map_values) { |
661 | min_width = min(min_width, pr.bounds.min); |
662 | } |
663 | return min_width; |
664 | } |
665 | |
666 | depth findMaxWidth(const CastleProto &proto) { |
667 | depth max_width(0); |
668 | for (const PureRepeat &pr : proto.repeats | map_values) { |
669 | max_width = max(max_width, pr.bounds.max); |
670 | } |
671 | return max_width; |
672 | } |
673 | |
674 | depth findMinWidth(const CastleProto &proto, u32 top) { |
675 | if (!contains(proto.repeats, top)) { |
676 | assert(0); // should not happen |
677 | return depth::infinity(); |
678 | } |
679 | return proto.repeats.at(top).bounds.min; |
680 | } |
681 | |
682 | depth findMaxWidth(const CastleProto &proto, u32 top) { |
683 | if (!contains(proto.repeats, top)) { |
684 | assert(0); // should not happen |
685 | return depth(0); |
686 | } |
687 | return proto.repeats.at(top).bounds.max; |
688 | } |
689 | |
690 | CastleProto::CastleProto(nfa_kind k, const PureRepeat &pr) : kind(k) { |
691 | assert(pr.reach.any()); |
692 | assert(pr.reports.size() == 1); |
693 | u32 top = 0; |
694 | repeats.emplace(top, pr); |
695 | for (const auto &report : pr.reports) { |
696 | report_map[report].insert(top); |
697 | } |
698 | } |
699 | |
700 | const CharReach &CastleProto::reach() const { |
701 | assert(!repeats.empty()); |
702 | return repeats.begin()->second.reach; |
703 | } |
704 | |
705 | u32 CastleProto::add(const PureRepeat &pr) { |
706 | assert(repeats.size() < max_occupancy); |
707 | assert(pr.reach == reach()); |
708 | assert(pr.reports.size() == 1); |
709 | u32 top = next_top++; |
710 | DEBUG_PRINTF("selected unused top %u\n" , top); |
711 | assert(!contains(repeats, top)); |
712 | repeats.emplace(top, pr); |
713 | for (const auto &report : pr.reports) { |
714 | report_map[report].insert(top); |
715 | } |
716 | return top; |
717 | } |
718 | |
719 | void CastleProto::erase(u32 top) { |
720 | DEBUG_PRINTF("erase top %u\n" , top); |
721 | assert(contains(repeats, top)); |
722 | repeats.erase(top); |
723 | for (auto &m : report_map) { |
724 | m.second.erase(top); |
725 | } |
726 | } |
727 | |
728 | u32 CastleProto::merge(const PureRepeat &pr) { |
729 | assert(repeats.size() <= max_occupancy); |
730 | assert(pr.reach == reach()); |
731 | assert(pr.reports.size() == 1); |
732 | |
733 | // First, see if this repeat is already in this castle. |
734 | for (const auto &m : repeats) { |
735 | if (m.second == pr) { |
736 | DEBUG_PRINTF("repeat already present, with top %u\n" , m.first); |
737 | return m.first; |
738 | } |
739 | } |
740 | |
741 | if (repeats.size() == max_occupancy) { |
742 | DEBUG_PRINTF("this castle is full\n" ); |
743 | return max_occupancy; |
744 | } |
745 | |
746 | return add(pr); |
747 | } |
748 | |
749 | bool mergeCastle(CastleProto &c1, const CastleProto &c2, |
750 | map<u32, u32> &top_map) { |
751 | assert(&c1 != &c2); |
752 | assert(c1.kind == c2.kind); |
753 | |
754 | DEBUG_PRINTF("c1 has %zu repeats, c2 has %zu repeats\n" , c1.repeats.size(), |
755 | c2.repeats.size()); |
756 | |
757 | if (c1.reach() != c2.reach()) { |
758 | DEBUG_PRINTF("different reach!\n" ); |
759 | return false; |
760 | } |
761 | |
762 | if (c1.repeats.size() + c2.repeats.size() > c1.max_occupancy) { |
763 | DEBUG_PRINTF("too many repeats to merge\n" ); |
764 | return false; |
765 | } |
766 | |
767 | top_map.clear(); |
768 | |
769 | for (const auto &m : c2.repeats) { |
770 | const u32 top = m.first; |
771 | const PureRepeat &pr = m.second; |
772 | DEBUG_PRINTF("top %u\n" , top); |
773 | u32 new_top = c1.merge(pr); |
774 | top_map[top] = new_top; |
775 | DEBUG_PRINTF("adding repeat: map %u->%u\n" , top, new_top); |
776 | } |
777 | |
778 | assert(c1.repeats.size() <= c1.max_occupancy); |
779 | return true; |
780 | } |
781 | |
782 | void remapCastleTops(CastleProto &proto, map<u32, u32> &top_map) { |
783 | map<u32, PureRepeat> out; |
784 | top_map.clear(); |
785 | |
786 | for (const auto &m : proto.repeats) { |
787 | const u32 top = m.first; |
788 | const PureRepeat &pr = m.second; |
789 | u32 new_top = out.size(); |
790 | out.emplace(new_top, pr); |
791 | top_map[top] = new_top; |
792 | } |
793 | |
794 | proto.repeats.swap(out); |
795 | |
796 | // Remap report map. |
797 | proto.report_map.clear(); |
798 | for (const auto &m : proto.repeats) { |
799 | const u32 top = m.first; |
800 | const PureRepeat &pr = m.second; |
801 | for (const auto &report : pr.reports) { |
802 | proto.report_map[report].insert(top); |
803 | } |
804 | } |
805 | |
806 | assert(proto.repeats.size() <= proto.max_occupancy); |
807 | } |
808 | |
809 | namespace { |
810 | struct HasReport { |
811 | explicit HasReport(ReportID r) : report(r) {} |
812 | |
813 | bool operator()(const pair<u32, PureRepeat> &a) const { |
814 | return contains(a.second.reports, report); |
815 | } |
816 | |
817 | private: |
818 | ReportID report; |
819 | }; |
820 | } |
821 | |
822 | bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, |
823 | ReportID report2) { |
824 | assert(!c1.repeats.empty()); |
825 | assert(!c2.repeats.empty()); |
826 | assert(c1.kind == c2.kind); |
827 | |
828 | if (c1.reach() != c2.reach()) { |
829 | DEBUG_PRINTF("different reach\n" ); |
830 | return false; |
831 | } |
832 | |
833 | map<u32, PureRepeat>::const_iterator it = c1.repeats.begin(), |
834 | ite = c1.repeats.end(), |
835 | jt = c2.repeats.begin(), |
836 | jte = c2.repeats.end(); |
837 | |
838 | for (;; ++it, ++jt) { |
839 | it = find_if(it, ite, HasReport(report1)); |
840 | jt = find_if(jt, jte, HasReport(report2)); |
841 | |
842 | if (it == ite && jt == jte) { |
843 | DEBUG_PRINTF("success, cases are equivalent!\n" ); |
844 | return true; |
845 | } |
846 | |
847 | if (it == ite || jt == jte) { |
848 | DEBUG_PRINTF("no match for one repeat\n" ); |
849 | break; |
850 | } |
851 | |
852 | if (it->first != jt->first) { |
853 | DEBUG_PRINTF("different tops\n" ); |
854 | break; |
855 | } |
856 | |
857 | const PureRepeat &r1 = it->second; |
858 | const PureRepeat &r2 = jt->second; |
859 | assert(r1.reach == c1.reach()); |
860 | assert(r2.reach == c1.reach()); |
861 | if (r1.bounds != r2.bounds) { |
862 | DEBUG_PRINTF("different bounds\n" ); |
863 | break; |
864 | } |
865 | } |
866 | |
867 | return false; |
868 | } |
869 | |
870 | bool is_equal(const CastleProto &c1, const CastleProto &c2) { |
871 | assert(!c1.repeats.empty()); |
872 | assert(!c2.repeats.empty()); |
873 | assert(c1.kind == c2.kind); |
874 | |
875 | if (c1.reach() != c2.reach()) { |
876 | DEBUG_PRINTF("different reach\n" ); |
877 | return false; |
878 | } |
879 | |
880 | return c1.repeats == c2.repeats; |
881 | } |
882 | |
883 | bool requiresDedupe(const CastleProto &proto, |
884 | const flat_set<ReportID> &reports) { |
885 | for (const auto &report : reports) { |
886 | auto it = proto.report_map.find(report); |
887 | if (it == end(proto.report_map)) { |
888 | continue; |
889 | } |
890 | if (it->second.size() > 1) { |
891 | DEBUG_PRINTF("castle proto %p has dupe report %u\n" , &proto, |
892 | report); |
893 | return true; |
894 | } |
895 | } |
896 | return false; |
897 | } |
898 | |
899 | static |
900 | void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { |
901 | DEBUG_PRINTF("top %u -> repeat %s\n" , top, pr.bounds.str().c_str()); |
902 | NFAVertex u = g.start; |
903 | |
904 | // Mandatory repeats to min bound. |
905 | u32 min_bound = pr.bounds.min; // always finite |
906 | if (min_bound == 0) { // Vacuous case, we can only do this once. |
907 | assert(!edge(g.start, g.accept, g).second); |
908 | NFAEdge e = add_edge(g.start, g.accept, g); |
909 | g[e].tops.insert(top); |
910 | g[u].reports.insert(pr.reports.begin(), pr.reports.end()); |
911 | min_bound = 1; |
912 | } |
913 | |
914 | for (u32 i = 0; i < min_bound; i++) { |
915 | NFAVertex v = add_vertex(g); |
916 | g[v].char_reach = pr.reach; |
917 | NFAEdge e = add_edge(u, v, g); |
918 | if (u == g.start) { |
919 | g[e].tops.insert(top); |
920 | } |
921 | u = v; |
922 | } |
923 | |
924 | NFAVertex head = u; |
925 | |
926 | // Optional repeats to max bound. |
927 | if (pr.bounds.max.is_finite()) { |
928 | assert(pr.bounds.max > depth(0)); |
929 | const u32 max_bound = pr.bounds.max; |
930 | for (u32 i = 0; i < max_bound - min_bound; i++) { |
931 | NFAVertex v = add_vertex(g); |
932 | g[v].char_reach = pr.reach; |
933 | if (head != u) { |
934 | add_edge(head, v, g); |
935 | } |
936 | NFAEdge e = add_edge(u, v, g); |
937 | if (u == g.start) { |
938 | g[e].tops.insert(top); |
939 | } |
940 | u = v; |
941 | } |
942 | } else { |
943 | assert(pr.bounds.max.is_infinite()); |
944 | add_edge(u, u, g); |
945 | } |
946 | |
947 | // Connect to accept. |
948 | add_edge(u, g.accept, g); |
949 | g[u].reports.insert(pr.reports.begin(), pr.reports.end()); |
950 | if (u != head) { |
951 | add_edge(head, g.accept, g); |
952 | g[head].reports.insert(pr.reports.begin(), pr.reports.end()); |
953 | } |
954 | } |
955 | |
956 | static |
957 | bool hasZeroMinBound(const CastleProto &proto) { |
958 | const depth zero(0); |
959 | for (const PureRepeat &pr : proto.repeats | map_values) { |
960 | if (pr.bounds.min == zero) { |
961 | return true; |
962 | } |
963 | } |
964 | return false; |
965 | } |
966 | |
967 | unique_ptr<NGHolder> makeHolder(const CastleProto &proto, |
968 | const CompileContext &cc) { |
969 | assert(!proto.repeats.empty()); |
970 | |
971 | // Vacuous edges are only doable in the NGHolder if we are a single-top |
972 | // Castle. |
973 | if (hasZeroMinBound(proto)) { |
974 | if (proto.repeats.size() != 1 || proto.repeats.begin()->first != 0) { |
975 | DEBUG_PRINTF("can't build multi-top vacuous holder\n" ); |
976 | return nullptr; |
977 | } |
978 | } |
979 | |
980 | auto g = ue2::make_unique<NGHolder>(proto.kind); |
981 | |
982 | for (const auto &m : proto.repeats) { |
983 | addToHolder(*g, m.first, m.second); |
984 | } |
985 | |
986 | //dumpGraph("castle_holder.dot", *g); |
987 | |
988 | // Sanity checks. |
989 | assert(allMatchStatesHaveReports(*g)); |
990 | assert(!has_parallel_edge(*g)); |
991 | |
992 | reduceGraphEquivalences(*g, cc); |
993 | |
994 | removeRedundancy(*g, SOM_NONE); |
995 | |
996 | return g; |
997 | } |
998 | |
999 | } // namespace ue2 |
1000 | |