ng_fuzzy.cpp source code [ClickHouse/contrib/hyperscan/src/nfagraph/ng_fuzzy.cpp]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief Graph fuzzer for approximate matching
31	*/
32
33	#include "ng_fuzzy.h"
34
35	#include "ng.h"
36	#include "ng_depth.h"
37	#include "ng_util.h"
38
39	#include <map>
40	#include <vector>
41	using namespace std;
42
43	namespace ue2 {
44
45	// returns all successors up to a given depth in a vector of sets, indexed by
46	// zero-based depth from source vertex
47	static
48	vector<flat_set<NFAVertex>> gatherSuccessorsByDepth(const NGHolder &g,
49	NFAVertex src, u32 depth) {
50	vector<flat_set<NFAVertex>> result(depth);
51	flat_set<NFAVertex> cur, next;
52
53	assert(depth > `0`);
54
55	// populate current set of successors
56	for (auto v : adjacent_vertices_range(src, g)) {
57	// ignore self-loops
58	if (src == v) {
59	continue;
60	}
61	DEBUG_PRINTF("Node %zu depth 1\n", g[v].index);
62	cur.insert(v);
63	}
64	result [`0`] = cur;
65
66	for (unsigned d = `1`; d < depth; d++) {
67	// collect all successors for all current level vertices
68	for (auto v : cur) {
69	// don't go past special nodes
70	if (is_special(v, g)) {
71	continue;
72	}
73
74	for (auto succ : adjacent_vertices_range(v, g)) {
75	// ignore self-loops
76	if (v == succ) {
77	continue;
78	}
79	DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + `1`);
80	next.insert(succ);
81	}
82	}
83	result [d] = next;
84	next.swap(cur);
85	next.clear();
86	}
87
88	return result;
89	}
90
91	// returns all predecessors up to a given depth in a vector of sets, indexed by
92	// zero-based depth from source vertex
93	static
94	vector<flat_set<NFAVertex>> gatherPredecessorsByDepth(const NGHolder &g,
95	NFAVertex src,
96	u32 depth) {
97	vector<flat_set<NFAVertex>> result(depth);
98	flat_set<NFAVertex> cur, next;
99
100	assert(depth > `0`);
101
102	// populate current set of successors
103	for (auto v : inv_adjacent_vertices_range(src, g)) {
104	// ignore self-loops
105	if (src == v) {
106	continue;
107	}
108	DEBUG_PRINTF("Node %zu depth 1\n", g[v].index);
109	cur.insert(v);
110	}
111	result [`0`] = cur;
112
113	for (unsigned d = `1`; d < depth; d++) {
114	// collect all successors for all current level vertices
115	for (auto v : cur) {
116	for (auto pred : inv_adjacent_vertices_range(v, g)) {
117	// ignore self-loops
118	if (v == pred) {
119	continue;
120	}
121	DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + `1`);
122	next.insert(pred);
123	}
124	}
125	result [d] = next;
126	next.swap(cur);
127	next.clear();
128	}
129
130	return result;
131	}
132
133	/*
134	* This struct produces a fuzzed graph; that is, a graph that is able to match
135	* the original pattern, as well as input data within a certain edit distance.
136	* Construct the struct, then call fuzz_graph() to transform the graph.
137	*
138	* Terminology used:
139	* - Shadow vertices: vertices mirroring the original graph at various edit
140	* distances
141	* - Shadow graph level: edit distance of a particular shadow graph
142	* - Helpers: dot vertices assigned to shadow vertices, used for insert/replace
143	*/
144	struct ShadowGraph {
145	NGHolder &g;
146	u32 edit_distance;
147	bool hamming;
148	map<pair<NFAVertex, u32>, NFAVertex> shadow_map;
149	map<pair<NFAVertex, u32>, NFAVertex> helper_map;
150	map<NFAVertex, NFAVertex> clones;
151	// edge creation is deferred
152	vector<pair<NFAVertex, NFAVertex>> edges_to_be_added;
153	flat_set<NFAVertex> orig;
154
155	ShadowGraph(NGHolder &g_in, u32 ed_in, bool hamm_in)
156	: g(g_in), edit_distance(ed_in), hamming(hamm_in) {}
157
158	void fuzz_graph() {
159	if (edit_distance == `0`) {
160	return;
161	}
162
163	DEBUG_PRINTF("edit distance = %u hamming = %s\n", edit_distance,
164	hamming ? "true" : "false");
165
166	// step 1: prepare the vertices, helpers and shadows according to
167	// the original graph
168	prepare_graph();
169
170	// step 2: add shadow and helper nodes
171	build_shadow_graph();
172
173	// step 3: set up reports for newly created vertices (and make clones
174	// if necessary)
175	if (!hamming) {
176	create_reports();
177	}
178
179	// step 4: wire up shadow graph and helpers for insert/replace/remove
180	connect_shadow_graph();
181
182	// step 5: commit all the edge wirings
183	DEBUG_PRINTF("Committing edge wirings\n");
184	for (const auto &p : edges_to_be_added) {
185	add_edge_if_not_present(p.first, p.second, g);
186	}
187
188	DEBUG_PRINTF("Done!\n");
189	}
190
191	private:
192	const NFAVertex& get_clone(const NFAVertex &v) {
193	return contains(clones, v) ?
194	clones [v] : v;
195	}
196
197	void connect_to_clones(const NFAVertex &u, const NFAVertex &v) {
198	const NFAVertex &clone_u = get_clone(u);
199	const NFAVertex &clone_v = get_clone(v);
200
201	edges_to_be_added.emplace_back(u, v);
202	DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[u].index, g[v].index);
203
204	// do not connect clones to accepts, we do it during cloning
205	if (is_any_accept(clone_v, g)) {
206	return;
207	}
208	edges_to_be_added.emplace_back(clone_u, clone_v);
209	DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[clone_u].index,
210	g[clone_v].index);
211	}
212
213	void prepare_graph() {
214	DEBUG_PRINTF("Building shadow graphs\n");
215
216	for (auto v : vertices_range(g)) {
217	// all level 0 vertices are their own helpers and their own shadows
218	helper_map [make_pair(v, `0`)] = v;
219	shadow_map [make_pair(v, `0`)] = v;
220
221	// find special nodes
222	if (is_any_accept(v, g)) {
223	DEBUG_PRINTF("Node %zu is a special node\n", g[v].index);
224	for (unsigned edit = `1`; edit <= edit_distance; edit++) {
225	// all accepts are their own shadows and helpers at all
226	// levels
227	shadow_map [make_pair(v, edit)] = v;
228	helper_map [make_pair(v, edit)] = v;
229	}
230	continue;
231	}
232	DEBUG_PRINTF("Node %zu is to be shadowed\n", g[v].index);
233	orig.insert(v);
234	}
235	}
236
237	void build_shadow_graph() {
238	for (auto v : orig) {
239	DEBUG_PRINTF("Adding shadow/helper nodes for node %zu\n",
240	g[v].index);
241	for (unsigned dist = `1`; dist <= edit_distance; dist++) {
242	auto shadow_v = v;
243
244	// start and startDs cannot have shadows but do have helpers
245	if (!is_any_start(v, g)) {
246	shadow_v = clone_vertex(g, v);
247	DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n",
248	g[shadow_v].index, dist);
249	}
250	shadow_map [make_pair(v, dist)] = shadow_v;
251
252	// if there's nowhere to go from this vertex, no helper needed
253	if (proper_out_degree(v, g) < `1`) {
254	DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n",
255	g[shadow_v].index, dist);
256	helper_map [make_pair(v, dist)] = shadow_v;
257	continue;
258	}
259
260	// start and startDs only have helpers for insert, so not Hamming
261	if (hamming && is_any_start(v, g)) {
262	DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n",
263	g[shadow_v].index, dist);
264	helper_map [make_pair(v, dist)] = shadow_v;
265	continue;
266	}
267
268	auto helper_v = clone_vertex(g, v);
269	DEBUG_PRINTF("New helper node ID: %zu (level %u)\n",
270	g[helper_v].index, dist);
271
272	// this is a helper, so make it a dot
273	g [helper_v].char_reach = CharReach::dot();
274	// do not copy virtual start's assert flags
275	if (is_virtual_start(v, g)) {
276	DEBUG_PRINTF("Helper node ID is virtual start: %zu (level %u)\n",
277	g[helper_v].index, dist);
278	g [helper_v].assert_flags = `0`;
279	}
280	helper_map [make_pair(v, dist)] = helper_v;
281	}
282	}
283	}
284
285	// wire up successors according to the original graph, wire helpers
286	// to shadow successors (insert/replace)
287	void connect_succs(NFAVertex v, u32 dist) {
288	DEBUG_PRINTF("Wiring up successors for node %zu shadow level %u\n",
289	g[v].index, dist);
290	const auto &cur_shadow_v = shadow_map [make_pair(v, dist)];
291	const auto &cur_shadow_helper = helper_map [make_pair(v, dist)];
292
293	// multiple insert
294	if (!hamming && dist > `1`) {
295	const auto &prev_level_helper = helper_map [make_pair(v, dist - `1`)];
296	connect_to_clones(prev_level_helper, cur_shadow_helper);
297	}
298
299	for (auto orig_dst : adjacent_vertices_range(v, g)) {
300	const auto &shadow_dst = shadow_map [make_pair(orig_dst, dist)];
301
302	connect_to_clones(cur_shadow_v, shadow_dst);
303
304	// ignore startDs for insert/replace
305	if (orig_dst == g.startDs) {
306	continue;
307	}
308
309	connect_to_clones(cur_shadow_helper, shadow_dst);
310	}
311	}
312
313	// wire up predecessors according to the original graph, wire
314	// predecessors to helpers (replace), wire predecessor helpers to
315	// helpers (multiple replace)
316	void connect_preds(NFAVertex v, u32 dist) {
317	DEBUG_PRINTF("Wiring up predecessors for node %zu shadow level %u\n",
318	g[v].index, dist);
319	const auto &cur_shadow_v = shadow_map [make_pair(v, dist)];
320	const auto &cur_shadow_helper = helper_map [make_pair(v, dist)];
321
322	auto orig_src_vertices = inv_adjacent_vertices_range(v, g);
323	for (auto orig_src : orig_src_vertices) {
324	// ignore edges from start to startDs
325	if (v == g.startDs && orig_src == g.start) {
326	continue;
327	}
328	// ignore self-loops for replace
329	if (orig_src != v) {
330	// do not wire a replace node for start vertices if we
331	// have a virtual start
332	if (is_virtual_start(v, g) && is_any_start(orig_src, g)) {
333	continue;
334	}
335
336	if (dist) {
337	const auto &prev_level_src =
338	shadow_map [make_pair(orig_src, dist - `1`)];
339	const auto &prev_level_helper =
340	helper_map [make_pair(orig_src, dist - `1`)];
341
342	connect_to_clones(prev_level_src, cur_shadow_helper);
343	connect_to_clones(prev_level_helper, cur_shadow_helper);
344	}
345	}
346	// wire predecessor according to original graph
347	const auto &shadow_src = shadow_map [make_pair(orig_src, dist)];
348
349	connect_to_clones(shadow_src, cur_shadow_v);
350	}
351	}
352
353	// wire up previous level helper to current shadow (insert)
354	void connect_helpers(NFAVertex v, u32 dist) {
355	DEBUG_PRINTF("Wiring up helpers for node %zu shadow level %u\n",
356	g[v].index, dist);
357	const auto &cur_shadow_helper = helper_map [make_pair(v, dist)];
358	auto prev_level_v = shadow_map [make_pair(v, dist - `1`)];
359
360	connect_to_clones(prev_level_v, cur_shadow_helper);
361	}
362
363	/*
364	* wiring edges for removal is a special case.
365	*
366	* when wiring edges for removal, as well as wiring up immediate
367	* predecessors to immediate successors, we also need to wire up more
368	* distant successors to their respective shadow graph levels.
369	*
370	* for example, consider graph start->a->b->c->d->accept.
371	*
372	* at edit distance 1, we need remove edges start->b, a->c, b->d, and
373	* c->accept, all going from original graph (level 0) to shadow graph
374	* level 1.
375	*
376	* at edit distance 2, we also need edges start->c, a->d and b->accept,
377	* all going from level 0 to shadow graph level 2.
378	*
379	* this is propagated to all shadow levels; that is, given edit
380	* distance 3, we will have edges from shadow levels 0->1, 0->2,
381	* 0->3, 1->2, 1->3, and 2->3.
382	*
383	* therefore, we wire them in steps: first wire with step 1 (0->1, 1->2,
384	* 2->3) at depth 1, then wire with step 2 (0->2, 1->3) at depth 2, etc.
385	*
386	* we also have to wire helpers to their removal successors, to
387	* accommodate for a replace followed by a remove, on all shadow levels.
388	*
389	* and finally, we also have to wire source shadows into removal
390	* successor helpers on a level above, to accommodate for a remove
391	* followed by a replace.
392	*/
393	void connect_removals(NFAVertex v) {
394	DEBUG_PRINTF("Wiring up remove edges for node %zu\n", g[v].index);
395
396	// vertices returned by this function don't include self-loops
397	auto dst_vertices_by_depth =
398	gatherSuccessorsByDepth(g, v, edit_distance);
399	auto orig_src_vertices = inv_adjacent_vertices_range(v, g);
400	for (auto orig_src : orig_src_vertices) {
401	// ignore self-loops
402	if (orig_src == v) {
403	continue;
404	}
405	for (unsigned step = `1`; step <= edit_distance; step++) {
406	for (unsigned dist = step; dist <= edit_distance; dist++) {
407	auto &dst_vertices = dst_vertices_by_depth [step - `1`];
408	for (auto &orig_dst : dst_vertices) {
409	const auto &shadow_src =
410	shadow_map [make_pair(orig_src, dist - step)];
411	const auto &shadow_helper =
412	helper_map [make_pair(orig_src, dist - step)];
413	const auto &shadow_dst =
414	shadow_map [make_pair(orig_dst, dist)];
415
416	// removal
417	connect_to_clones(shadow_src, shadow_dst);
418
419	// removal from helper vertex
420	connect_to_clones(shadow_helper, shadow_dst);
421
422	// removal into helper, requires additional edit
423	if ((dist + `1`) <= edit_distance) {
424	const auto &next_level_helper =
425	helper_map [make_pair(orig_dst, dist + `1`)];
426
427	connect_to_clones(shadow_src, next_level_helper);
428	}
429	}
430	}
431	}
432	}
433	}
434
435	void connect_shadow_graph() {
436	DEBUG_PRINTF("Wiring up the graph\n");
437
438	for (auto v : orig) {
439
440	DEBUG_PRINTF("Wiring up edges for node %zu\n", g[v].index);
441
442	for (unsigned dist = `0`; dist <= edit_distance; dist++) {
443
444	// handle insert/replace
445	connect_succs(v, dist);
446
447	// handle replace/multiple insert
448	connect_preds(v, dist);
449
450	// handle helpers
451	if (!hamming && dist > `0`) {
452	connect_helpers(v, dist);
453	}
454	}
455
456	// handle removals
457	if (!hamming) {
458	connect_removals(v);
459	}
460	}
461	}
462
463	void connect_to_targets(NFAVertex src, const flat_set<NFAVertex> &targets) {
464	for (auto dst : targets) {
465	DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[src].index,
466	g[dst].index);
467	edges_to_be_added.emplace_back(src, dst);
468	}
469	}
470
471	// create a clone of the vertex, but overwrite its report set
472	void create_clone(NFAVertex v, const flat_set<ReportID> &reports,
473	unsigned max_edit_distance,
474	const flat_set<NFAVertex> &targets) {
475	// some vertices may have the same reports, but different successors;
476	// therefore, we may need to connect them multiple times, but still only
477	// clone once
478	bool needs_cloning = !contains(clones, v);
479
480	DEBUG_PRINTF("Cloning node %zu\n", g[v].index);
481	// go through all shadows and helpers, including
482	// original vertex
483	for (unsigned d = `0`; d < max_edit_distance; d++) {
484	auto shadow_v = shadow_map [make_pair(v, d)];
485	auto helper_v = helper_map [make_pair(v, d)];
486
487	NFAVertex new_shadow_v, new_helper_v;
488
489	// make sure we don't clone the same vertex twice
490	if (needs_cloning) {
491	new_shadow_v = clone_vertex(g, shadow_v);
492	DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n",
493	g[new_shadow_v].index, d);
494	clones [shadow_v] = new_shadow_v;
495	} else {
496	new_shadow_v = clones [shadow_v];
497	}
498	g [new_shadow_v].reports = reports;
499
500	connect_to_targets(new_shadow_v, targets);
501
502	if (shadow_v == helper_v) {
503	continue;
504	}
505	if (needs_cloning) {
506	new_helper_v = clone_vertex(g, helper_v);
507	DEBUG_PRINTF("New helper node ID: %zu (level %u)\n",
508	g[new_helper_v].index, d);
509	clones [helper_v] = new_helper_v;
510	} else {
511	new_helper_v = clones [helper_v];
512	}
513	g [new_helper_v].reports = reports;
514
515	connect_to_targets(new_helper_v, targets);
516	}
517	}
518
519	void write_reports(NFAVertex v, const flat_set<ReportID> &reports,
520	unsigned max_edit_distance,
521	const flat_set<NFAVertex> &targets) {
522	// we're overwriting reports, but we're not losing any
523	// information as we already cached all the different report
524	// sets, so vertices having different reports will be cloned and set up
525	// with the correct report set
526
527	// go through all shadows and helpers, including original
528	// vertex
529	for (unsigned d = `0`; d < max_edit_distance; d++) {
530	auto shadow_v = shadow_map [make_pair(v, d)];
531	auto helper_v = helper_map [make_pair(v, d)];
532	DEBUG_PRINTF("Setting up reports for shadow node: %zu "
533	"(level %u)\n",
534	g[shadow_v].index, d);
535	DEBUG_PRINTF("Setting up reports for helper node: %zu "
536	"(level %u)\n",
537	g[helper_v].index, d);
538	g [shadow_v].reports = reports;
539	g [helper_v].reports = reports;
540
541	connect_to_targets(shadow_v, targets);
542	connect_to_targets(helper_v, targets);
543	}
544	}
545
546	/*
547	* we may have multiple report sets per graph. that means, whenever we
548	* construct additional paths through the graph (alternations, removals), we
549	* have to account for the fact that some vertices are predecessors to
550	* vertices with different report sets.
551	*
552	* whenever that happens, we have to clone the paths for both report sets,
553	* and set up these new vertices with their respective report sets as well.
554	*
555	* in order to do that, we first have to get all the predecessors for accept
556	* and acceptEod vertices. then, go through them one by one, and take note
557	* of the report lists. the first report set we find, wins, the rest we
558	* clone.
559	*
560	* we also have to do this in two passes, because there may be vertices that
561	* are predecessors to vertices with different report sets, so to avoid
562	* overwriting reports we will be caching reports info instead.
563	*/
564	void create_reports() {
565	map<flat_set<ReportID>, flat_set<NFAVertex>> reports_to_vertices;
566	flat_set<NFAVertex> accepts{g.accept, g.acceptEod};
567
568	// gather reports info from all vertices connected to accept
569	for (auto accept : accepts) {
570	for (auto src : inv_adjacent_vertices_range(accept, g)) {
571	// skip special vertices
572	if (is_special(src, g)) {
573	continue;
574	}
575	reports_to_vertices [g [src].reports].insert(src);
576	}
577	}
578
579	// we expect to see at most two report sets
580	assert(reports_to_vertices.size() > `0` &&
581	reports_to_vertices.size() <= `2`);
582
583	// set up all reports
584	bool clone = false;
585	for (auto &pair : reports_to_vertices) {
586	const auto &reports = pair.first;
587	const auto &vertices = pair.second;
588
589	for (auto src : vertices) {
590	// get all predecessors up to edit distance
591	auto src_vertices_by_depth =
592	gatherPredecessorsByDepth(g, src, edit_distance);
593
594	// find which accepts source vertex connects to
595	flat_set<NFAVertex> targets;
596	for (const auto &accept : accepts) {
597	NFAEdge e = edge(src, accept, g);
598	if (e) {
599	targets.insert(accept);
600	}
601	}
602	assert(targets.size());
603
604	for (unsigned d = `0`; d < src_vertices_by_depth.size(); d++) {
605	const auto &preds = src_vertices_by_depth [d];
606	for (auto v : preds) {
607	// only clone a node if it already contains reports
608	if (clone && !g [v].reports.empty()) {
609	create_clone(v, reports, edit_distance - d,
610	targets);
611	} else {
612	write_reports(v, reports, edit_distance - d,
613	targets);
614	}
615	}
616	}
617	}
618	// clone vertices only if it's not our first report set
619	clone = true;
620	}
621	}
622	};
623
624	// check if we will edit our way into a vacuous pattern
625	static
626	bool will_turn_vacuous(const NGHolder &g, u32 edit_distance) {
627	auto depths = calcRevDepths(g);
628
629	depth min_depth = depth::infinity();
630	auto idx = g [g.start].index;
631
632	// check distance from start to accept/acceptEod
633	if (depths [idx].toAccept.min.is_finite()) {
634	min_depth = min(depths [idx].toAccept.min, min_depth);
635	}
636	if (depths [idx].toAcceptEod.min.is_finite()) {
637	min_depth = min(depths [idx].toAcceptEod.min, min_depth);
638	}
639
640	idx = g [g.startDs].index;
641
642	// check distance from startDs to accept/acceptEod
643	if (depths [idx].toAccept.min.is_finite()) {
644	min_depth = min(depths [idx].toAccept.min, min_depth);
645	}
646	if (depths [idx].toAcceptEod.min.is_finite()) {
647	min_depth = min(depths [idx].toAcceptEod.min, min_depth);
648	}
649
650	assert(min_depth.is_finite());
651
652	// now, check if we can edit our way into a vacuous pattern
653	if (min_depth <= (u64a) edit_distance + `1`) {
654	DEBUG_PRINTF("Pattern will turn vacuous if approximately matched\n");
655	return true;
656	}
657	return false;
658	}
659
660	void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool hamming,
661	bool utf8, const Grey &grey) {
662	if (edit_distance == `0`) {
663	return;
664	}
665	if (!grey.allowApproximateMatching) {
666	throw CompileError ("Approximate matching is disabled.");
667	}
668	if (edit_distance > grey.maxEditDistance) {
669	throw CompileError ("Edit distance is too big.");
670	}
671	if (utf8) {
672	throw CompileError ("UTF-8 is disallowed for approximate matching.");
673	}
674	// graph isn't fuzzable if there are edge assertions anywhere in the graph
675	for (auto e : edges_range(g)) {
676	if (g [e].assert_flags) {
677	throw CompileError ("Zero-width assertions are disallowed for "
678	"approximate matching.");
679	}
680	}
681	if (!hamming && will_turn_vacuous(g, edit_distance)) {
682	throw CompileError ("Approximate matching patterns that reduce to "
683	"vacuous patterns are disallowed.");
684	}
685	}
686
687	void make_fuzzy(NGHolder &g, u32 edit_distance, bool hamming,
688	const Grey &grey) {
689	if (edit_distance == `0`) {
690	return;
691	}
692
693	assert(grey.allowApproximateMatching);
694	assert(grey.maxEditDistance >= edit_distance);
695
696	ShadowGraph sg(g, edit_distance, hamming);
697	sg.fuzz_graph();
698
699	// For safety, enforce limit on actual vertex count.
700	if (num_vertices(g) > grey.limitApproxMatchingVertices) {
701	DEBUG_PRINTF("built %zu vertices > limit of %u\n", num_vertices(g),
702	grey.limitApproxMatchingVertices);
703	throw ResourceLimitError ();
704	}
705	}
706
707	} // namespace ue2
708

Browse the source code of ClickHouse/contrib/hyperscan/src/nfagraph/ng_fuzzy.cpp