1/*
2 * Copyright (c) 2015-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "grey.h"
30#include "ue2common.h"
31
32#include <algorithm>
33#include <cstdlib> // exit
34#include <string>
35#include <vector>
36
37#define DEFAULT_MAX_HISTORY 110
38
39using namespace std;
40
41namespace ue2 {
42
43Grey::Grey(void) :
44 optimiseComponentTree(true),
45 calcComponents(true),
46 performGraphSimplification(true),
47 prefilterReductions(true),
48 removeEdgeRedundancy(true),
49 allowGough(true),
50 allowHaigLit(true),
51 allowLitHaig(true),
52 allowLbr(true),
53 allowMcClellan(true),
54 allowSheng(true),
55 allowMcSheng(true),
56 allowPuff(true),
57 allowLiteral(true),
58 allowViolet(true),
59 allowExtendedNFA(true), /* bounded repeats of course */
60 allowLimExNFA(true),
61 allowAnchoredAcyclic(true),
62 allowSmallLiteralSet(true),
63 allowCastle(true),
64 allowDecoratedLiteral(true),
65 allowApproximateMatching(true),
66 allowNoodle(true),
67 fdrAllowTeddy(true),
68 fdrAllowFlood(true),
69 violetAvoidSuffixes(true),
70 violetAvoidWeakInfixes(true),
71 violetDoubleCut(true),
72 violetExtractStrongLiterals(true),
73 violetLiteralChains(true),
74 violetDoubleCutLiteralLen(3),
75 violetEarlyCleanLiteralLen(6),
76 puffImproveHead(true),
77 castleExclusive(true),
78 mergeSEP(true), /* short exhaustible passthroughs */
79 mergeRose(true), // roses inside rose
80 mergeSuffixes(true), // suffix nfas inside rose
81 mergeOutfixes(true),
82 onlyOneOutfix(false),
83 allowShermanStates(true),
84 allowMcClellan8(true),
85 allowWideStates(true), // enable wide state for McClellan8
86 highlanderPruneDFA(true),
87 minimizeDFA(true),
88 accelerateDFA(true),
89 accelerateNFA(true),
90 reverseAccelerate(true),
91 squashNFA(true),
92 compressNFAState(true),
93 numberNFAStatesWrong(false), /* debugging only */
94 highlanderSquash(true),
95 allowZombies(true),
96 floodAsPuffette(false),
97 nfaForceSize(0),
98 maxHistoryAvailable(DEFAULT_MAX_HISTORY),
99 minHistoryAvailable(0), /* debugging only */
100 maxAnchoredRegion(63), /* for rose's atable to run over */
101 minRoseLiteralLength(3),
102 minRoseNetflowLiteralLength(2),
103 maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
104 maxEditDistance(16),
105 minExtBoundedRepeatSize(32),
106 goughCopyPropagate(true),
107 goughRegisterAllocate(true),
108 shortcutLiterals(true),
109 roseGraphReduction(true),
110 roseRoleAliasing(true),
111 roseMasks(true),
112 roseConvertFloodProneSuffixes(true),
113 roseMergeRosesDuringAliasing(true),
114 roseMultiTopRoses(true),
115 roseHamsterMasks(true),
116 roseLookaroundMasks(true),
117 roseMcClellanPrefix(1),
118 roseMcClellanSuffix(1),
119 roseMcClellanOutfix(2),
120 roseTransformDelay(true),
121 earlyMcClellanPrefix(true),
122 earlyMcClellanInfix(true),
123 earlyMcClellanSuffix(true),
124 allowCountingMiracles(true),
125 allowSomChain(true),
126 somMaxRevNfaLength(126),
127 hamsterAccelForward(true),
128 hamsterAccelReverse(false),
129 miracleHistoryBonus(16),
130 equivalenceEnable(true),
131
132 allowSmallWrite(true), // McClellan dfas for small patterns
133 allowSmallWriteSheng(false), // allow use of Sheng for SMWR
134
135 smallWriteLargestBuffer(70), // largest buffer that can be
136 // considered a small write
137 // all blocks larger than this
138 // are given to rose &co
139 smallWriteLargestBufferBad(35),
140 limitSmallWriteOutfixSize(1048576), // 1 MB
141 smallWriteMaxPatterns(10000),
142 smallWriteMaxLiterals(10000),
143 smallWriteMergeBatchSize(20),
144 allowTamarama(true), // Tamarama engine
145 tamaChunkSize(100),
146 dumpFlags(0),
147 limitPatternCount(8000000), // 8M patterns
148 limitPatternLength(16000), // 16K bytes
149 limitGraphVertices(500000), // 500K vertices
150 limitGraphEdges(1000000), // 1M edges
151 limitReportCount(4*8000000),
152 limitLiteralCount(8000000), // 8M literals
153 limitLiteralLength(16000),
154 limitLiteralMatcherChars(1073741824), // 1 GB
155 limitLiteralMatcherSize(1073741824), // 1 GB
156 limitRoseRoleCount(4*8000000),
157 limitRoseEngineCount(8000000), // 8M engines
158 limitRoseAnchoredSize(1073741824), // 1 GB
159 limitEngineSize(1073741824), // 1 GB
160 limitDFASize(1073741824), // 1 GB
161 limitNFASize(1048576), // 1 MB
162 limitLBRSize(1048576), // 1 MB
163 limitApproxMatchingVertices(5000)
164{
165 assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
166}
167
168} // namespace ue2
169
170#ifndef RELEASE_BUILD
171
172#include <boost/lexical_cast.hpp>
173using boost::lexical_cast;
174
175namespace ue2 {
176
177void applyGreyOverrides(Grey *g, const string &s) {
178 string::const_iterator p = s.begin();
179 string::const_iterator pe = s.end();
180 string help = "help:0";
181 bool invalid_key_seen = false;
182 Grey defaultg;
183
184 if (s == "help" || s == "help:") {
185 printf("Valid grey overrides:\n");
186 p = help.begin();
187 pe = help.end();
188 }
189
190 while (p != pe) {
191 string::const_iterator ke = find(p, pe, ':');
192
193 if (ke == pe) {
194 break;
195 }
196
197 string key(p, ke);
198
199 string::const_iterator ve = find(ke, pe, ';');
200
201 unsigned int value = 0;
202 try {
203 value = lexical_cast<unsigned int>(string(ke + 1, ve));
204 } catch (boost::bad_lexical_cast &e) {
205 printf("Invalid grey override key %s:%s\n", key.c_str(),
206 string(ke + 1, ve).c_str());
207 invalid_key_seen = true;
208 break;
209 }
210 bool done = false;
211
212 /* surely there exists a nice template to go with this macro to make
213 * all the boring code disappear */
214#define G_UPDATE(k) do { \
215 if (key == ""#k) { g->k = value; done = 1;} \
216 if (key == "help") { \
217 printf("\t%-30s\tdefault: %s\n", #k, \
218 lexical_cast<string>(defaultg.k).c_str()); \
219 } \
220 } while (0)
221
222 G_UPDATE(optimiseComponentTree);
223 G_UPDATE(calcComponents);
224 G_UPDATE(performGraphSimplification);
225 G_UPDATE(prefilterReductions);
226 G_UPDATE(removeEdgeRedundancy);
227 G_UPDATE(allowGough);
228 G_UPDATE(allowHaigLit);
229 G_UPDATE(allowLitHaig);
230 G_UPDATE(allowLbr);
231 G_UPDATE(allowMcClellan);
232 G_UPDATE(allowSheng);
233 G_UPDATE(allowMcSheng);
234 G_UPDATE(allowPuff);
235 G_UPDATE(allowLiteral);
236 G_UPDATE(allowViolet);
237 G_UPDATE(allowExtendedNFA);
238 G_UPDATE(allowLimExNFA);
239 G_UPDATE(allowAnchoredAcyclic);
240 G_UPDATE(allowSmallLiteralSet);
241 G_UPDATE(allowCastle);
242 G_UPDATE(allowDecoratedLiteral);
243 G_UPDATE(allowNoodle);
244 G_UPDATE(allowApproximateMatching);
245 G_UPDATE(fdrAllowTeddy);
246 G_UPDATE(fdrAllowFlood);
247 G_UPDATE(violetAvoidSuffixes);
248 G_UPDATE(violetAvoidWeakInfixes);
249 G_UPDATE(violetDoubleCut);
250 G_UPDATE(violetExtractStrongLiterals);
251 G_UPDATE(violetLiteralChains);
252 G_UPDATE(violetDoubleCutLiteralLen);
253 G_UPDATE(violetEarlyCleanLiteralLen);
254 G_UPDATE(puffImproveHead);
255 G_UPDATE(castleExclusive);
256 G_UPDATE(mergeSEP);
257 G_UPDATE(mergeRose);
258 G_UPDATE(mergeSuffixes);
259 G_UPDATE(mergeOutfixes);
260 G_UPDATE(onlyOneOutfix);
261 G_UPDATE(allowShermanStates);
262 G_UPDATE(allowMcClellan8);
263 G_UPDATE(allowWideStates);
264 G_UPDATE(highlanderPruneDFA);
265 G_UPDATE(minimizeDFA);
266 G_UPDATE(accelerateDFA);
267 G_UPDATE(accelerateNFA);
268 G_UPDATE(reverseAccelerate);
269 G_UPDATE(squashNFA);
270 G_UPDATE(compressNFAState);
271 G_UPDATE(numberNFAStatesWrong);
272 G_UPDATE(allowZombies);
273 G_UPDATE(floodAsPuffette);
274 G_UPDATE(nfaForceSize);
275 G_UPDATE(highlanderSquash);
276 G_UPDATE(maxHistoryAvailable);
277 G_UPDATE(minHistoryAvailable);
278 G_UPDATE(maxAnchoredRegion);
279 G_UPDATE(minRoseLiteralLength);
280 G_UPDATE(minRoseNetflowLiteralLength);
281 G_UPDATE(maxRoseNetflowEdges);
282 G_UPDATE(maxEditDistance);
283 G_UPDATE(minExtBoundedRepeatSize);
284 G_UPDATE(goughCopyPropagate);
285 G_UPDATE(goughRegisterAllocate);
286 G_UPDATE(shortcutLiterals);
287 G_UPDATE(roseGraphReduction);
288 G_UPDATE(roseRoleAliasing);
289 G_UPDATE(roseMasks);
290 G_UPDATE(roseConvertFloodProneSuffixes);
291 G_UPDATE(roseMergeRosesDuringAliasing);
292 G_UPDATE(roseMultiTopRoses);
293 G_UPDATE(roseHamsterMasks);
294 G_UPDATE(roseLookaroundMasks);
295 G_UPDATE(roseMcClellanPrefix);
296 G_UPDATE(roseMcClellanSuffix);
297 G_UPDATE(roseMcClellanOutfix);
298 G_UPDATE(roseTransformDelay);
299 G_UPDATE(earlyMcClellanPrefix);
300 G_UPDATE(earlyMcClellanInfix);
301 G_UPDATE(earlyMcClellanSuffix);
302 G_UPDATE(allowSomChain);
303 G_UPDATE(allowCountingMiracles);
304 G_UPDATE(somMaxRevNfaLength);
305 G_UPDATE(hamsterAccelForward);
306 G_UPDATE(hamsterAccelReverse);
307 G_UPDATE(miracleHistoryBonus);
308 G_UPDATE(equivalenceEnable);
309 G_UPDATE(allowSmallWrite);
310 G_UPDATE(allowSmallWriteSheng);
311 G_UPDATE(smallWriteLargestBuffer);
312 G_UPDATE(smallWriteLargestBufferBad);
313 G_UPDATE(limitSmallWriteOutfixSize);
314 G_UPDATE(smallWriteMaxPatterns);
315 G_UPDATE(smallWriteMaxLiterals);
316 G_UPDATE(smallWriteMergeBatchSize);
317 G_UPDATE(allowTamarama);
318 G_UPDATE(tamaChunkSize);
319 G_UPDATE(limitPatternCount);
320 G_UPDATE(limitPatternLength);
321 G_UPDATE(limitGraphVertices);
322 G_UPDATE(limitGraphEdges);
323 G_UPDATE(limitReportCount);
324 G_UPDATE(limitLiteralCount);
325 G_UPDATE(limitLiteralLength);
326 G_UPDATE(limitLiteralMatcherChars);
327 G_UPDATE(limitLiteralMatcherSize);
328 G_UPDATE(limitRoseRoleCount);
329 G_UPDATE(limitRoseEngineCount);
330 G_UPDATE(limitRoseAnchoredSize);
331 G_UPDATE(limitEngineSize);
332 G_UPDATE(limitDFASize);
333 G_UPDATE(limitNFASize);
334 G_UPDATE(limitLBRSize);
335 G_UPDATE(limitApproxMatchingVertices);
336
337#undef G_UPDATE
338 if (key == "simple_som") {
339 g->allowHaigLit = false;
340 g->allowLitHaig = false;
341 g->allowSomChain = false;
342 g->somMaxRevNfaLength = 0;
343 done = true;
344 }
345 if (key == "forceOutfixesNFA") {
346 g->allowAnchoredAcyclic = false;
347 g->allowCastle = false;
348 g->allowDecoratedLiteral = false;
349 g->allowGough = false;
350 g->allowHaigLit = false;
351 g->allowLbr = false;
352 g->allowLimExNFA = true;
353 g->allowLitHaig = false;
354 g->allowMcClellan = false;
355 g->allowPuff = false;
356 g->allowLiteral = false;
357 g->allowViolet = false;
358 g->allowSmallLiteralSet = false;
359 g->roseMasks = false;
360 done = true;
361 }
362 if (key == "forceOutfixesDFA") {
363 g->allowAnchoredAcyclic = false;
364 g->allowCastle = false;
365 g->allowDecoratedLiteral = false;
366 g->allowGough = false;
367 g->allowHaigLit = false;
368 g->allowLbr = false;
369 g->allowLimExNFA = false;
370 g->allowLitHaig = false;
371 g->allowMcClellan = true;
372 g->allowPuff = false;
373 g->allowLiteral = false;
374 g->allowViolet = false;
375 g->allowSmallLiteralSet = false;
376 g->roseMasks = false;
377 done = true;
378 }
379 if (key == "forceOutfixes") {
380 g->allowAnchoredAcyclic = false;
381 g->allowCastle = false;
382 g->allowDecoratedLiteral = false;
383 g->allowGough = true;
384 g->allowHaigLit = false;
385 g->allowLbr = false;
386 g->allowLimExNFA = true;
387 g->allowLitHaig = false;
388 g->allowMcClellan = true;
389 g->allowPuff = false;
390 g->allowLiteral = false;
391 g->allowViolet = false;
392 g->allowSmallLiteralSet = false;
393 g->roseMasks = false;
394 done = true;
395 }
396
397 if (!done && key != "help") {
398 printf("Invalid grey override key %s:%u\n", key.c_str(), value);
399 invalid_key_seen = true;
400 }
401
402 p = ve;
403
404 if (p != pe) {
405 ++p;
406 }
407 }
408
409 if (invalid_key_seen) {
410 applyGreyOverrides(g, "help");
411 exit(1);
412 }
413
414 assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
415}
416
417} // namespace ue2
418
419#endif
420