ng_limex.cpp source code [ClickHouse/contrib/hyperscan/src/nfagraph/ng_limex.cpp]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/**
30	* \file
31	* \brief Limex NFA construction code.
32	*/
33
34	#include "ng_limex.h"
35
36	#include "grey.h"
37	#include "ng_equivalence.h"
38	#include "ng_holder.h"
39	#include "ng_misc_opt.h"
40	#include "ng_prune.h"
41	#include "ng_redundancy.h"
42	#include "ng_repeat.h"
43	#include "ng_reports.h"
44	#include "ng_restructuring.h"
45	#include "ng_squash.h"
46	#include "ng_util.h"
47	#include "ng_width.h"
48	#include "ue2common.h"
49	#include "nfa/limex_compile.h"
50	#include "nfa/limex_limits.h"
51	#include "nfa/nfa_internal.h"
52	#include "util/compile_context.h"
53	#include "util/container.h"
54	#include "util/graph_range.h"
55	#include "util/report_manager.h"
56	#include "util/flat_containers.h"
57	#include "util/verify_types.h"
58
59	#include <algorithm>
60	#include <map>
61	#include <unordered_map>
62	#include <unordered_set>
63	#include <vector>
64
65	#include <boost/range/adaptor/map.hpp>
66
67	using namespace std;
68	using boost::adaptors::map_values;
69	using boost::adaptors::map_keys;
70
71	namespace ue2 {
72
73	#ifndef NDEBUG
74	// Some sanity checking for the graph; returns false if something is wrong.
75	// Only used in assertions.
76	static
77	bool sanityCheckGraph(const NGHolder &g,
78	const unordered_map<NFAVertex, u32> &state_ids) {
79	unordered_set<u32> seen_states;
80
81	for (auto v : vertices_range(g)) {
82	// Non-specials should have non-empty reachability.
83	if (!is_special(v, g)) {
84	if (g[v].char_reach.none()) {
85	DEBUG_PRINTF("vertex %zu has empty reach\n", g[v].index);
86	return false;
87	}
88	}
89
90	// Vertices with edges to accept or acceptEod must have reports and
91	// other vertices must not have them.
92	if (is_match_vertex(v, g) && v != g.accept) {
93	if (g[v].reports.empty()) {
94	DEBUG_PRINTF("vertex %zu has no reports\n", g[v].index);
95	return false;
96	}
97	} else if (!g[v].reports.empty()) {
98	DEBUG_PRINTF("vertex %zu has reports but no accept edge\n",
99	g[v].index);
100	return false;
101	}
102
103	// Participant vertices should have distinct state indices.
104	if (!contains(state_ids, v)) {
105	DEBUG_PRINTF("vertex %zu has no state index!\n", g[v].index);
106	return false;
107	}
108	u32 s = state_ids.at(v);
109	if (s != NO_STATE && !seen_states.insert(s).second) {
110	DEBUG_PRINTF("vertex %zu has dupe state %u\n", g[v].index, s);
111	return false;
112	}
113	}
114
115	return true;
116	}
117	#endif
118
119	static
120	unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g,
121	const vector<BoundedRepeatData> &repeats) {
122	auto squashMap = findSquashers(g);
123	filterSquashers(g, squashMap);
124
125	/ We also filter out the cyclic states representing bounded repeats, as*
126	* they are not really cyclic -- they may turn off unexpectedly. */
127	for (const auto &br : repeats) {
128	if (br.repeatMax.is_finite()) {
129	squashMap.erase(br.cyclic);
130	}
131	}
132
133	return squashMap;
134	}
135
136	/**
137	* \brief Drop edges from start to vertices that also have an edge from
138	* startDs.
139	*
140	* Note that this also includes the (start, startDs) edge, which is not
141	* necessary for actual NFA implementation (and is actually something we don't
142	* want to affect state numbering, etc).
143	*/
144	static
145	void dropRedundantStartEdges(NGHolder &g) {
146	remove_out_edge_if(g.start, [&](const NFAEdge &e) {
147	return edge(g.startDs, target(e, g), g).second;
148	}, g);
149
150	// Ensure that we always remove (start, startDs), even if startDs has had
151	// its self-loop removed as an optimization.
152	remove_edge(g.start, g.startDs, g);
153	}
154
155	static
156	CharReach calcTopVertexReach(const flat_set<u32> &tops,
157	const map<u32, CharReach> &top_reach) {
158	CharReach top_cr;
159	for (u32 t : tops) {
160	if (contains(top_reach, t)) {
161	top_cr \|= top_reach.at(t);
162	} else {
163	top_cr = CharReach::dot();
164	break;
165	}
166	}
167	return top_cr;
168	}
169
170	static
171	NFAVertex makeTopStartVertex(NGHolder &g, const flat_set<u32> &tops,
172	const flat_set<NFAVertex> &succs,
173	const map<u32, CharReach> &top_reach) {
174	assert(!succs.empty());
175	assert(!tops.empty());
176
177	bool reporter = false;
178
179	NFAVertex u = add_vertex(g [g.start], g);
180	CharReach top_cr = calcTopVertexReach(tops, top_reach);
181	g [u].char_reach = top_cr;
182
183	for (auto v : succs) {
184	if (v == g.accept \|\| v == g.acceptEod) {
185	reporter = true;
186	}
187	add_edge(u, v, g);
188	}
189
190	// Only retain reports (which we copied on add_vertex above) for new top
191	// vertices connected to accepts.
192	if (!reporter) {
193	g [u].reports.clear();
194	}
195
196	return u;
197	}
198
199	static
200	void pickNextTopStateToHandle(const map<u32, flat_set<NFAVertex>> &top_succs,
201	const map<NFAVertex, flat_set<u32>> &succ_tops,
202	flat_set<u32> *picked_tops,
203	flat_set<NFAVertex> *picked_succs) {
204	/ pick top or vertex we want to handle /
205	if (top_succs.size() < succ_tops.size()) {
206	auto best = top_succs.end();
207	for (auto it = top_succs.begin(); it != top_succs.end(); ++it) {
208	if (best == top_succs.end()
209	\|\| it ->second.size() < best ->second.size()) {
210	best = it;
211	}
212	}
213	assert(best != top_succs.end());
214	assert(!best->second.empty()); / should already been pruned /
215
216	*picked_tops = { best ->first };
217	*picked_succs = best ->second;
218	} else {
219	auto best = succ_tops.end();
220	for (auto it = succ_tops.begin(); it != succ_tops.end(); ++it) {
221	/ have to worry about determinism for this one /
222	if (best == succ_tops.end()
223	\|\| it ->second.size() < best ->second.size()
224	\|\| (it ->second.size() == best ->second.size()
225	&& it ->second < best ->second)) {
226	best = it;
227	}
228	}
229	assert(best != succ_tops.end());
230	assert(!best->second.empty()); / should already been pruned /
231
232	*picked_succs = { best ->first };
233	*picked_tops = best ->second;
234	}
235	}
236
237	static
238	void expandCbsByTops(const map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
239	const map<u32, flat_set<NFAVertex>> &top_succs,
240	const map<NFAVertex, flat_set<u32>> &succ_tops,
241	flat_set<u32> &picked_tops,
242	flat_set<NFAVertex> &picked_succs) {
243	NFAVertex v = picked_succs.begin(); /* arbitrary successor - all equiv /
244	const auto &cand_tops = succ_tops.at(v);
245
246	for (u32 t : cand_tops) {
247	if (!contains(unhandled_top_succs, t)) {
248	continue;
249	}
250	if (!has_intersection(unhandled_top_succs.at(t), picked_succs)) {
251	continue; / not adding any useful work that hasn't already been*
252	* done */
253	}
254	if (!is_subset_of(picked_succs, top_succs.at(t))) {
255	continue; / will not form a cbs /
256	}
257	picked_tops.insert(t);
258	}
259	}
260
261	static
262	void expandCbsBySuccs(const map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
263	const map<u32, flat_set<NFAVertex>> &top_succs,
264	const map<NFAVertex, flat_set<u32>> &succ_tops,
265	flat_set<u32> &picked_tops,
266	flat_set<NFAVertex> &picked_succs) {
267	u32 t = picked_tops.begin(); /* arbitrary top - all equiv /
268	const auto &cand_succs = top_succs.at(t);
269
270	for (NFAVertex v : cand_succs) {
271	if (!contains(unhandled_succ_tops, v)) {
272	continue;
273	}
274	if (!has_intersection(unhandled_succ_tops.at(v), picked_tops)) {
275	continue; / not adding any useful work that hasn't already been*
276	* done */
277	}
278	if (!is_subset_of(picked_tops, succ_tops.at(v))) {
279	continue; / will not form a cbs /
280	}
281	picked_succs.insert(v);
282	}
283	}
284
285	/ See if we can expand the complete bipartite subgraph (cbs) specified by the*
286	* picked tops/succs by adding more to either of the tops or succs.
287	*/
288	static
289	void expandTopSuccCbs(const map<u32, flat_set<NFAVertex>> &top_succs,
290	const map<NFAVertex, flat_set<u32>> &succ_tops,
291	const map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
292	const map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
293	flat_set<u32> &picked_tops,
294	flat_set<NFAVertex> &picked_succs) {
295	/ Note: all picked (tops\|succs) are equivalent /
296
297	/ Try to expand first (as we are more likely to succeed) on the side*
298	* with fewest remaining things to be handled */
299
300	if (unhandled_top_succs.size() < unhandled_succ_tops.size()) {
301	expandCbsByTops(unhandled_top_succs, top_succs, succ_tops,
302	picked_tops, picked_succs);
303	expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops,
304	picked_tops, picked_succs);
305	} else {
306	expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops,
307	picked_tops, picked_succs);
308	expandCbsByTops(unhandled_top_succs, top_succs, succ_tops,
309	picked_tops, picked_succs);
310	}
311	}
312
313	static
314	void markTopSuccAsHandled(NFAVertex start_v,
315	const flat_set<u32> &handled_tops,
316	const flat_set<NFAVertex> &handled_succs,
317	map<u32, set<NFAVertex>> &tops_out,
318	map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
319	map<NFAVertex, flat_set<u32>> &unhandled_succ_tops) {
320	for (u32 t : handled_tops) {
321	tops_out [t].insert(start_v);
322	assert(contains(unhandled_top_succs, t));
323	erase_all(&unhandled_top_succs [t], handled_succs);
324	if (unhandled_top_succs [t].empty()) {
325	unhandled_top_succs.erase(t);
326	}
327	}
328
329	for (NFAVertex v : handled_succs) {
330	assert(contains(unhandled_succ_tops, v));
331	erase_all(&unhandled_succ_tops [v], handled_tops);
332	if (unhandled_succ_tops [v].empty()) {
333	unhandled_succ_tops.erase(v);
334	}
335	}
336	}
337
338	static
339	void attemptToUseAsStart(const NGHolder &g, NFAVertex u,
340	const map<u32, CharReach> &top_reach,
341	map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
342	map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
343	map<u32, set<NFAVertex>> &tops_out) {
344	flat_set<u32> top_inter = unhandled_succ_tops.at(u);
345	flat_set<NFAVertex> succs;
346	for (NFAVertex v : adjacent_vertices_range(u, g)) {
347	if (!contains(unhandled_succ_tops, v)) {
348	return;
349	}
350	/ if it has vacuous reports we need to make sure that the report sets*
351	* are the same */
352	if ((v == g.accept \|\| v == g.acceptEod)
353	&& g [g.start].reports != g [u].reports) {
354	DEBUG_PRINTF("different report behaviour\n");
355	return;
356	}
357	const flat_set<u32> &v_tops = unhandled_succ_tops.at(v);
358	flat_set<u32> new_inter;
359	auto ni_inserter = inserter(new_inter, new_inter.end());
360	set_intersection(top_inter.begin(), top_inter.end(),
361	v_tops.begin(), v_tops.end(), ni_inserter);
362	top_inter = std::move(new_inter);
363	succs.insert(v);
364	}
365
366	if (top_inter.empty()) {
367	return;
368	}
369
370	auto top_cr = calcTopVertexReach(top_inter, top_reach);
371	if (!top_cr.isSubsetOf(g [u].char_reach)) {
372	return;
373	}
374
375	DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index);
376	markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs,
377	unhandled_succ_tops);
378	}
379
380	/ We may have cases where a top triggers something that starts with a .* (or*
381	* similar state). In these cases we can make use of that state as a start
382	* state.
383	*/
384	static
385	void reusePredsAsStarts(const NGHolder &g, const map<u32, CharReach> &top_reach,
386	map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
387	map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
388	map<u32, set<NFAVertex>> &tops_out) {
389	/ create list of candidates first, to avoid issues of iter invalidation /
390	DEBUG_PRINTF("attempting to reuse vertices for top starts\n");
391	vector<NFAVertex> cand_starts;
392	for (NFAVertex u : unhandled_succ_tops \| map_keys) {
393	if (hasSelfLoop(u, g)) {
394	cand_starts.push_back(u);
395	}
396	}
397
398	for (NFAVertex u : cand_starts) {
399	if (!contains(unhandled_succ_tops, u)) {
400	continue;
401	}
402	attemptToUseAsStart(g, u, top_reach, unhandled_top_succs,
403	unhandled_succ_tops, tops_out);
404	}
405	}
406
407	static
408	void makeTopStates(NGHolder &g, map<u32, set<NFAVertex>> &tops_out,
409	const map<u32, CharReach> &top_reach) {
410	/ Ideally, we want to add the smallest number of states to the graph for*
411	* tops to turn on so that they can accurately trigger their successors.
412	*
413	* The relationships between tops and their successors forms a bipartite
414	* graph. Finding the optimal number of start states to add is equivalent to
415	* finding a minimal biclique coverings. Unfortunately, this is known to be
416	* NP-complete.
417	*
418	* Given this, we will just do something simple to avoid creating something
419	* truly wasteful:
420	* 1) Try to find any cyclic states which can act as their own start states
421	* 2) Pick a top or a succ to create a start state for and then try to find
422	* the largest complete bipartite subgraph that it is part of.
423	*/
424
425	map<u32, flat_set<NFAVertex>> top_succs;
426	map<NFAVertex, flat_set<u32>> succ_tops;
427	for (const auto &e : out_edges_range(g.start, g)) {
428	NFAVertex v = target(e, g);
429	for (u32 t : g [e].tops) {
430	top_succs [t].insert(v);
431	succ_tops [v].insert(t);
432	}
433	}
434
435	auto unhandled_top_succs = top_succs;
436	auto unhandled_succ_tops = succ_tops;
437
438	reusePredsAsStarts(g, top_reach, unhandled_top_succs, unhandled_succ_tops,
439	tops_out);
440
441	/ Note: there may be successors which are equivalent (in terms of*
442	top-triggering), it may be more efficient to discover this and treat them
443	as a unit. TODO /*
444
445	while (!unhandled_succ_tops.empty()) {
446	assert(!unhandled_top_succs.empty());
447	DEBUG_PRINTF("creating top start vertex\n");
448	flat_set<u32> u_tops;
449	flat_set<NFAVertex> u_succs;
450	pickNextTopStateToHandle(unhandled_top_succs, unhandled_succ_tops,
451	&u_tops, &u_succs);
452
453	expandTopSuccCbs(top_succs, succ_tops, unhandled_top_succs,
454	unhandled_succ_tops, u_tops, u_succs);
455
456	/ create start vertex to handle this top/succ combination /
457	NFAVertex u = makeTopStartVertex(g, u_tops, u_succs, top_reach);
458
459	/ update maps /
460	markTopSuccAsHandled(u, u_tops, u_succs, tops_out, unhandled_top_succs,
461	unhandled_succ_tops);
462	}
463	assert(unhandled_top_succs.empty());
464
465	// We are completely replacing the start vertex, so clear its reports.
466	clear_out_edges(g.start, g);
467	add_edge(g.start, g.startDs, g);
468	g [g.start].reports.clear();
469	}
470
471	static
472	set<NFAVertex> findZombies(const NGHolder &h,
473	const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
474	const unordered_map<NFAVertex, u32> &state_ids,
475	const CompileContext &cc) {
476	set<NFAVertex> zombies;
477	if (!cc.grey.allowZombies) {
478	return zombies;
479	}
480
481	// We only use zombie masks in streaming mode.
482	if (!cc.streaming) {
483	return zombies;
484	}
485
486	if (in_degree(h.acceptEod, h) != `1` \|\| all_reports(h).size() != `1`) {
487	DEBUG_PRINTF("cannot be made undead - bad reports\n");
488	return zombies;
489	}
490
491	for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
492	assert(h[u].reports.size() == `1`);
493	for (auto v : adjacent_vertices_range(u, h)) {
494	if (edge(v, h.accept, h).second
495	&& h [v].char_reach.all()) {
496	if (!contains(br_cyclic, v)) {
497	goto ok;
498	}
499
500	const BoundedRepeatSummary &sum = br_cyclic.at(v);
501
502	if (u == v && sum.repeatMax.is_infinite()) {
503	goto ok;
504	}
505
506	}
507	}
508	DEBUG_PRINTF("does not go to dot accept\n");
509	return zombies;
510	ok:;
511	}
512
513	for (const auto &v : inv_adjacent_vertices_range(h.accept, h)) {
514	if (state_ids.at(v) != NO_STATE) {
515	zombies.insert(v);
516	}
517	}
518	return zombies;
519	}
520
521	static
522	void reverseStateOrdering(unordered_map<NFAVertex, u32> &state_ids) {
523	vector<NFAVertex> ordering;
524	for (auto &e : state_ids) {
525	if (e.second == NO_STATE) {
526	continue;
527	}
528	ordering.push_back(e.first);
529	}
530
531	// Sort in reverse order by state ID.
532	sort(ordering.begin(), ordering.end(),
533	[&state_ids](NFAVertex a, NFAVertex b) {
534	return state_ids.at(a) > state_ids.at(b);
535	});
536
537	u32 stateNum = `0`;
538
539	for (const auto &v : ordering) {
540	DEBUG_PRINTF("renumber, %u -> %u\n", state_ids.at(v), stateNum);
541	state_ids [v] = stateNum++;
542	}
543	}
544
545	static
546	map<u32, CharReach>
547	findTopReach(const map<u32, vector<vector<CharReach>>> &triggers) {
548	map<u32, CharReach> top_reach;
549
550	for (const auto &m : triggers) {
551	const auto top = m.first;
552	CharReach cr;
553	for (const auto &trigger : m.second) {
554	if (trigger.empty()) {
555	// We don't know anything about this trigger. Assume it can
556	// have any reach.
557	cr.setall();
558	break;
559	}
560	cr \|= *trigger.rbegin();
561	}
562
563	top_reach.emplace(top, cr);
564	}
565
566	return top_reach;
567	}
568
569	static
570	unique_ptr<NGHolder>
571	prepareGraph(const NGHolder &h_in, const ReportManager *rm,
572	const map<u32, u32> &fixed_depth_tops,
573	const map<u32, vector<vector<CharReach>>> &triggers,
574	bool impl_test_only, const CompileContext &cc,
575	unordered_map<NFAVertex, u32> &state_ids,
576	vector<BoundedRepeatData> &repeats,
577	map<u32, set<NFAVertex>> &tops) {
578	assert(is_triggered(h_in) \|\| fixed_depth_tops.empty());
579
580	unique_ptr<NGHolder> h = cloneHolder(h_in);
581
582	// Bounded repeat handling.
583	analyseRepeats(*h, rm, fixed_depth_tops, triggers, &repeats, cc.streaming,
584	impl_test_only, cc.grey);
585
586	// If we're building a rose/suffix, do the top dance.
587	flat_set<NFAVertex> topVerts;
588	if (is_triggered(*h)) {
589	makeTopStates(*h, tops, findTopReach(triggers));
590
591	for (const auto &vv : tops \| map_values) {
592	insert(&topVerts, vv);
593	}
594	}
595
596	dropRedundantStartEdges(*h);
597
598	// Do state numbering
599	state_ids = numberStates(*h, topVerts);
600
601	// In debugging, we sometimes like to reverse the state numbering to stress
602	// the NFA construction code.
603	if (cc.grey.numberNFAStatesWrong) {
604	reverseStateOrdering(state_ids);
605	}
606
607	assert(sanityCheckGraph(*h, state_ids));
608	return h;
609	}
610
611	static
612	void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) {
613	for (const auto &v : vertices_range(h)) {
614	auto &reports = h [v].reports;
615	if (reports.empty()) {
616	continue;
617	}
618	auto old_reports = reports;
619	reports.clear();
620	for (const ReportID &id : old_reports) {
621	u32 program = rm.getProgramOffset(id);
622	reports.insert(program);
623	}
624	DEBUG_PRINTF("vertex %zu: remapped reports {%s} to programs {%s}\n",
625	h[v].index, as_string_list(old_reports).c_str(),
626	as_string_list(reports).c_str());
627	}
628	}
629
630	static
631	bytecode_ptr<NFA>
632	constructNFA(const NGHolder &h_in, const ReportManager *rm,
633	const map<u32, u32> &fixed_depth_tops,
634	const map<u32, vector<vector<CharReach>>> &triggers,
635	bool compress_state, bool do_accel, bool impl_test_only, u32 hint,
636	const CompileContext &cc) {
637	if (!has_managed_reports(h_in)) {
638	rm = nullptr;
639	} else {
640	assert(rm);
641	}
642
643	unordered_map<NFAVertex, u32> state_ids;
644	vector<BoundedRepeatData> repeats;
645	map<u32, set<NFAVertex>> tops;
646	unique_ptr<NGHolder> h
647	= prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc,
648	state_ids, repeats, tops);
649
650	// Quick exit: if we've got an embarrassment of riches, i.e. more states
651	// than we can implement in our largest NFA model, bail here.
652	u32 numStates = countStates(state_ids);
653	if (numStates > NFA_MAX_STATES) {
654	DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
655	return nullptr;
656	}
657
658	map<NFAVertex, BoundedRepeatSummary> br_cyclic;
659	for (const auto &br : repeats) {
660	br_cyclic [br.cyclic] = BoundedRepeatSummary (br.repeatMin, br.repeatMax);
661	}
662
663	unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
664	unordered_map<NFAVertex, NFAStateSet> squashMap;
665
666	// build map of squashed and squashers
667	if (cc.grey.squashNFA) {
668	squashMap = findSquashStates(*h, repeats);
669
670	if (rm && cc.grey.highlanderSquash) {
671	reportSquashMap = findHighlanderSquashers(h, rm);
672	}
673	}
674
675	set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
676
677	if (has_managed_reports(*h)) {
678	assert(rm);
679	remapReportsToPrograms(h, rm);
680	}
681
682	if (!cc.streaming \|\| !cc.grey.compressNFAState) {
683	compress_state = false;
684	}
685
686	return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops,
687	zombies, do_accel, compress_state, hint, cc);
688	}
689
690	bytecode_ptr<NFA>
691	constructNFA(const NGHolder &h_in, const ReportManager *rm,
692	const map<u32, u32> &fixed_depth_tops,
693	const map<u32, vector<vector<CharReach>>> &triggers,
694	bool compress_state, const CompileContext &cc) {
695	const u32 hint = INVALID_NFA;
696	const bool do_accel = cc.grey.accelerateNFA;
697	const bool impl_test_only = false;
698	return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
699	do_accel, impl_test_only, hint, cc);
700	}
701
702	#ifndef RELEASE_BUILD
703	// Variant that allows a hint to be specified.
704	bytecode_ptr<NFA>
705	constructNFA(const NGHolder &h_in, const ReportManager *rm,
706	const map<u32, u32> &fixed_depth_tops,
707	const map<u32, vector<vector<CharReach>>> &triggers,
708	bool compress_state, u32 hint, const CompileContext &cc) {
709	const bool do_accel = cc.grey.accelerateNFA;
710	const bool impl_test_only = false;
711	return constructNFA(h_in, rm, fixed_depth_tops, triggers,
712	compress_state, do_accel, impl_test_only, hint, cc);
713	}
714	#endif // RELEASE_BUILD
715
716	static
717	bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
718	const CompileContext &cc) {
719	// Make a mutable copy of the graph that we can renumber etc.
720	NGHolder h;
721	cloneHolder(h, h_in);
722	assert(h.kind == NFA_REV_PREFIX); / triggered, raises internal callbacks /
723
724	// Do state numbering.
725	auto state_ids = numberStates(h, {});
726
727	// Quick exit: if we've got an embarrassment of riches, i.e. more states
728	// than we can implement in our largest NFA model, bail here.
729	u32 numStates = countStates(state_ids);
730	if (numStates > NFA_MAX_STATES) {
731	DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
732	return nullptr;
733	}
734
735	assert(sanityCheckGraph(h, state_ids));
736
737	map<u32, set<NFAVertex>> tops; / only the standards tops for nfas /
738	set<NFAVertex> zombies;
739	vector<BoundedRepeatData> repeats;
740	unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
741	unordered_map<NFAVertex, NFAStateSet> squashMap;
742
743	return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
744	zombies, false, false, hint, cc);
745	}
746
747	bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in,
748	const CompileContext &cc) {
749	u32 hint = INVALID_NFA; // no hint
750	return constructReversedNFA_i(h_in, hint, cc);
751	}
752
753	#ifndef RELEASE_BUILD
754	// Variant that allows a hint to be specified.
755	bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in, u32 hint,
756	const CompileContext &cc) {
757	return constructReversedNFA_i(h_in, hint, cc);
758	}
759	#endif // RELEASE_BUILD
760
761	u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
762	const CompileContext &cc) {
763	if (!cc.grey.allowLimExNFA) {
764	return false;
765	}
766
767	assert(!can_never_match(g));
768
769	// Quick check: we can always implement an NFA with less than NFA_MAX_STATES
770	// states. Note that top masks can generate extra states, so we account for
771	// those here too.
772	if (num_vertices(g) + getTops(g).size() < NFA_MAX_STATES) {
773	return true;
774	}
775
776	if (!has_managed_reports(g)) {
777	rm = nullptr;
778	} else {
779	assert(rm);
780	}
781
782	// The BEST way to tell if an NFA is implementable is to implement it!
783	const bool impl_test_only = true;
784	const map<u32, u32> fixed_depth_tops; // empty
785	const map<u32, vector<vector<CharReach>>> triggers; // empty
786
787	/ Perform the first part of the construction process and see if the*
788	* resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can
789	* implement it as an NFA. */
790
791	unordered_map<NFAVertex, u32> state_ids;
792	vector<BoundedRepeatData> repeats;
793	map<u32, set<NFAVertex>> tops;
794	unique_ptr<NGHolder> h
795	= prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
796	state_ids, repeats, tops);
797	assert(h);
798	u32 numStates = countStates(state_ids);
799	if (numStates <= NFA_MAX_STATES) {
800	return numStates;
801	}
802
803	return `0`;
804	}
805
806	void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm,
807	const CompileContext &cc) {
808	NGHolder g_pristine;
809	cloneHolder(g_pristine, g);
810
811	reduceGraphEquivalences(g, cc);
812
813	removeRedundancy(g, som);
814
815	if (rm && has_managed_reports(g)) {
816	pruneHighlanderDominated(g, *rm);
817	}
818
819	if (!isImplementableNFA(g, rm, cc)) {
820	DEBUG_PRINTF("reductions made graph unimplementable, roll back\n");
821	clear_graph(g);
822	cloneHolder(g, g_pristine);
823	}
824	}
825
826	u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
827	const CompileContext &cc) {
828	if (!has_managed_reports(g)) {
829	rm = nullptr;
830	} else {
831	assert(rm);
832	}
833
834	const bool impl_test_only = true;
835	const map<u32, u32> fixed_depth_tops; // empty
836	const map<u32, vector<vector<CharReach>>> triggers; // empty
837
838	unordered_map<NFAVertex, u32> state_ids;
839	vector<BoundedRepeatData> repeats;
840	map<u32, set<NFAVertex>> tops;
841	unique_ptr<NGHolder> h
842	= prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
843	state_ids, repeats, tops);
844
845	if (!h \|\| countStates(state_ids) > NFA_MAX_STATES) {
846	DEBUG_PRINTF("not constructible\n");
847	return NFA_MAX_ACCEL_STATES + `1`;
848	}
849
850	assert(h->kind == g.kind);
851
852	// Should have no bearing on accel calculation, so we leave these empty.
853	const set<NFAVertex> zombies;
854	unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
855	unordered_map<NFAVertex, NFAStateSet> squashMap;
856
857	return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
858	tops, zombies, cc);
859	}
860
861	} // namespace ue2
862

Browse the source code of ClickHouse/contrib/hyperscan/src/nfagraph/ng_limex.cpp