1 | /* |
2 | * Copyright (c) 2015-2018, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "grey.h" |
30 | #include "ue2common.h" |
31 | |
32 | #include <algorithm> |
33 | #include <cstdlib> // exit |
34 | #include <string> |
35 | #include <vector> |
36 | |
37 | #define DEFAULT_MAX_HISTORY 110 |
38 | |
39 | using namespace std; |
40 | |
41 | namespace ue2 { |
42 | |
43 | Grey::Grey(void) : |
44 | optimiseComponentTree(true), |
45 | calcComponents(true), |
46 | performGraphSimplification(true), |
47 | prefilterReductions(true), |
48 | removeEdgeRedundancy(true), |
49 | allowGough(true), |
50 | allowHaigLit(true), |
51 | allowLitHaig(true), |
52 | allowLbr(true), |
53 | allowMcClellan(true), |
54 | allowSheng(true), |
55 | allowMcSheng(true), |
56 | allowPuff(true), |
57 | allowLiteral(true), |
58 | allowViolet(true), |
59 | allowExtendedNFA(true), /* bounded repeats of course */ |
60 | allowLimExNFA(true), |
61 | allowAnchoredAcyclic(true), |
62 | allowSmallLiteralSet(true), |
63 | allowCastle(true), |
64 | allowDecoratedLiteral(true), |
65 | allowApproximateMatching(true), |
66 | allowNoodle(true), |
67 | fdrAllowTeddy(true), |
68 | fdrAllowFlood(true), |
69 | violetAvoidSuffixes(true), |
70 | violetAvoidWeakInfixes(true), |
71 | violetDoubleCut(true), |
72 | violetExtractStrongLiterals(true), |
73 | violetLiteralChains(true), |
74 | violetDoubleCutLiteralLen(3), |
75 | violetEarlyCleanLiteralLen(6), |
76 | puffImproveHead(true), |
77 | castleExclusive(true), |
78 | mergeSEP(true), /* short exhaustible passthroughs */ |
79 | mergeRose(true), // roses inside rose |
80 | mergeSuffixes(true), // suffix nfas inside rose |
81 | mergeOutfixes(true), |
82 | onlyOneOutfix(false), |
83 | allowShermanStates(true), |
84 | allowMcClellan8(true), |
85 | allowWideStates(true), // enable wide state for McClellan8 |
86 | highlanderPruneDFA(true), |
87 | minimizeDFA(true), |
88 | accelerateDFA(true), |
89 | accelerateNFA(true), |
90 | reverseAccelerate(true), |
91 | squashNFA(true), |
92 | compressNFAState(true), |
93 | numberNFAStatesWrong(false), /* debugging only */ |
94 | highlanderSquash(true), |
95 | allowZombies(true), |
96 | floodAsPuffette(false), |
97 | nfaForceSize(0), |
98 | maxHistoryAvailable(DEFAULT_MAX_HISTORY), |
99 | minHistoryAvailable(0), /* debugging only */ |
100 | maxAnchoredRegion(63), /* for rose's atable to run over */ |
101 | minRoseLiteralLength(3), |
102 | minRoseNetflowLiteralLength(2), |
103 | maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */ |
104 | maxEditDistance(16), |
105 | minExtBoundedRepeatSize(32), |
106 | goughCopyPropagate(true), |
107 | goughRegisterAllocate(true), |
108 | shortcutLiterals(true), |
109 | roseGraphReduction(true), |
110 | roseRoleAliasing(true), |
111 | roseMasks(true), |
112 | roseConvertFloodProneSuffixes(true), |
113 | roseMergeRosesDuringAliasing(true), |
114 | roseMultiTopRoses(true), |
115 | roseHamsterMasks(true), |
116 | roseLookaroundMasks(true), |
117 | roseMcClellanPrefix(1), |
118 | roseMcClellanSuffix(1), |
119 | roseMcClellanOutfix(2), |
120 | roseTransformDelay(true), |
121 | earlyMcClellanPrefix(true), |
122 | earlyMcClellanInfix(true), |
123 | earlyMcClellanSuffix(true), |
124 | allowCountingMiracles(true), |
125 | allowSomChain(true), |
126 | somMaxRevNfaLength(126), |
127 | hamsterAccelForward(true), |
128 | hamsterAccelReverse(false), |
129 | miracleHistoryBonus(16), |
130 | equivalenceEnable(true), |
131 | |
132 | allowSmallWrite(true), // McClellan dfas for small patterns |
133 | allowSmallWriteSheng(false), // allow use of Sheng for SMWR |
134 | |
135 | smallWriteLargestBuffer(70), // largest buffer that can be |
136 | // considered a small write |
137 | // all blocks larger than this |
138 | // are given to rose &co |
139 | smallWriteLargestBufferBad(35), |
140 | limitSmallWriteOutfixSize(1048576), // 1 MB |
141 | smallWriteMaxPatterns(10000), |
142 | smallWriteMaxLiterals(10000), |
143 | smallWriteMergeBatchSize(20), |
144 | allowTamarama(true), // Tamarama engine |
145 | tamaChunkSize(100), |
146 | dumpFlags(0), |
147 | limitPatternCount(8000000), // 8M patterns |
148 | limitPatternLength(16000), // 16K bytes |
149 | limitGraphVertices(500000), // 500K vertices |
150 | limitGraphEdges(1000000), // 1M edges |
151 | limitReportCount(4*8000000), |
152 | limitLiteralCount(8000000), // 8M literals |
153 | limitLiteralLength(16000), |
154 | limitLiteralMatcherChars(1073741824), // 1 GB |
155 | limitLiteralMatcherSize(1073741824), // 1 GB |
156 | limitRoseRoleCount(4*8000000), |
157 | limitRoseEngineCount(8000000), // 8M engines |
158 | limitRoseAnchoredSize(1073741824), // 1 GB |
159 | limitEngineSize(1073741824), // 1 GB |
160 | limitDFASize(1073741824), // 1 GB |
161 | limitNFASize(1048576), // 1 MB |
162 | limitLBRSize(1048576), // 1 MB |
163 | limitApproxMatchingVertices(5000) |
164 | { |
165 | assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */ |
166 | } |
167 | |
168 | } // namespace ue2 |
169 | |
170 | #ifndef RELEASE_BUILD |
171 | |
172 | #include <boost/lexical_cast.hpp> |
173 | using boost::lexical_cast; |
174 | |
175 | namespace ue2 { |
176 | |
177 | void applyGreyOverrides(Grey *g, const string &s) { |
178 | string::const_iterator p = s.begin(); |
179 | string::const_iterator pe = s.end(); |
180 | string help = "help:0" ; |
181 | bool invalid_key_seen = false; |
182 | Grey defaultg; |
183 | |
184 | if (s == "help" || s == "help:" ) { |
185 | printf("Valid grey overrides:\n" ); |
186 | p = help.begin(); |
187 | pe = help.end(); |
188 | } |
189 | |
190 | while (p != pe) { |
191 | string::const_iterator ke = find(p, pe, ':'); |
192 | |
193 | if (ke == pe) { |
194 | break; |
195 | } |
196 | |
197 | string key(p, ke); |
198 | |
199 | string::const_iterator ve = find(ke, pe, ';'); |
200 | |
201 | unsigned int value = 0; |
202 | try { |
203 | value = lexical_cast<unsigned int>(string(ke + 1, ve)); |
204 | } catch (boost::bad_lexical_cast &e) { |
205 | printf("Invalid grey override key %s:%s\n" , key.c_str(), |
206 | string(ke + 1, ve).c_str()); |
207 | invalid_key_seen = true; |
208 | break; |
209 | } |
210 | bool done = false; |
211 | |
212 | /* surely there exists a nice template to go with this macro to make |
213 | * all the boring code disappear */ |
214 | #define G_UPDATE(k) do { \ |
215 | if (key == ""#k) { g->k = value; done = 1;} \ |
216 | if (key == "help") { \ |
217 | printf("\t%-30s\tdefault: %s\n", #k, \ |
218 | lexical_cast<string>(defaultg.k).c_str()); \ |
219 | } \ |
220 | } while (0) |
221 | |
222 | G_UPDATE(optimiseComponentTree); |
223 | G_UPDATE(calcComponents); |
224 | G_UPDATE(performGraphSimplification); |
225 | G_UPDATE(prefilterReductions); |
226 | G_UPDATE(removeEdgeRedundancy); |
227 | G_UPDATE(allowGough); |
228 | G_UPDATE(allowHaigLit); |
229 | G_UPDATE(allowLitHaig); |
230 | G_UPDATE(allowLbr); |
231 | G_UPDATE(allowMcClellan); |
232 | G_UPDATE(allowSheng); |
233 | G_UPDATE(allowMcSheng); |
234 | G_UPDATE(allowPuff); |
235 | G_UPDATE(allowLiteral); |
236 | G_UPDATE(allowViolet); |
237 | G_UPDATE(allowExtendedNFA); |
238 | G_UPDATE(allowLimExNFA); |
239 | G_UPDATE(allowAnchoredAcyclic); |
240 | G_UPDATE(allowSmallLiteralSet); |
241 | G_UPDATE(allowCastle); |
242 | G_UPDATE(allowDecoratedLiteral); |
243 | G_UPDATE(allowNoodle); |
244 | G_UPDATE(allowApproximateMatching); |
245 | G_UPDATE(fdrAllowTeddy); |
246 | G_UPDATE(fdrAllowFlood); |
247 | G_UPDATE(violetAvoidSuffixes); |
248 | G_UPDATE(violetAvoidWeakInfixes); |
249 | G_UPDATE(violetDoubleCut); |
250 | G_UPDATE(violetExtractStrongLiterals); |
251 | G_UPDATE(violetLiteralChains); |
252 | G_UPDATE(violetDoubleCutLiteralLen); |
253 | G_UPDATE(violetEarlyCleanLiteralLen); |
254 | G_UPDATE(puffImproveHead); |
255 | G_UPDATE(castleExclusive); |
256 | G_UPDATE(mergeSEP); |
257 | G_UPDATE(mergeRose); |
258 | G_UPDATE(mergeSuffixes); |
259 | G_UPDATE(mergeOutfixes); |
260 | G_UPDATE(onlyOneOutfix); |
261 | G_UPDATE(allowShermanStates); |
262 | G_UPDATE(allowMcClellan8); |
263 | G_UPDATE(allowWideStates); |
264 | G_UPDATE(highlanderPruneDFA); |
265 | G_UPDATE(minimizeDFA); |
266 | G_UPDATE(accelerateDFA); |
267 | G_UPDATE(accelerateNFA); |
268 | G_UPDATE(reverseAccelerate); |
269 | G_UPDATE(squashNFA); |
270 | G_UPDATE(compressNFAState); |
271 | G_UPDATE(numberNFAStatesWrong); |
272 | G_UPDATE(allowZombies); |
273 | G_UPDATE(floodAsPuffette); |
274 | G_UPDATE(nfaForceSize); |
275 | G_UPDATE(highlanderSquash); |
276 | G_UPDATE(maxHistoryAvailable); |
277 | G_UPDATE(minHistoryAvailable); |
278 | G_UPDATE(maxAnchoredRegion); |
279 | G_UPDATE(minRoseLiteralLength); |
280 | G_UPDATE(minRoseNetflowLiteralLength); |
281 | G_UPDATE(maxRoseNetflowEdges); |
282 | G_UPDATE(maxEditDistance); |
283 | G_UPDATE(minExtBoundedRepeatSize); |
284 | G_UPDATE(goughCopyPropagate); |
285 | G_UPDATE(goughRegisterAllocate); |
286 | G_UPDATE(shortcutLiterals); |
287 | G_UPDATE(roseGraphReduction); |
288 | G_UPDATE(roseRoleAliasing); |
289 | G_UPDATE(roseMasks); |
290 | G_UPDATE(roseConvertFloodProneSuffixes); |
291 | G_UPDATE(roseMergeRosesDuringAliasing); |
292 | G_UPDATE(roseMultiTopRoses); |
293 | G_UPDATE(roseHamsterMasks); |
294 | G_UPDATE(roseLookaroundMasks); |
295 | G_UPDATE(roseMcClellanPrefix); |
296 | G_UPDATE(roseMcClellanSuffix); |
297 | G_UPDATE(roseMcClellanOutfix); |
298 | G_UPDATE(roseTransformDelay); |
299 | G_UPDATE(earlyMcClellanPrefix); |
300 | G_UPDATE(earlyMcClellanInfix); |
301 | G_UPDATE(earlyMcClellanSuffix); |
302 | G_UPDATE(allowSomChain); |
303 | G_UPDATE(allowCountingMiracles); |
304 | G_UPDATE(somMaxRevNfaLength); |
305 | G_UPDATE(hamsterAccelForward); |
306 | G_UPDATE(hamsterAccelReverse); |
307 | G_UPDATE(miracleHistoryBonus); |
308 | G_UPDATE(equivalenceEnable); |
309 | G_UPDATE(allowSmallWrite); |
310 | G_UPDATE(allowSmallWriteSheng); |
311 | G_UPDATE(smallWriteLargestBuffer); |
312 | G_UPDATE(smallWriteLargestBufferBad); |
313 | G_UPDATE(limitSmallWriteOutfixSize); |
314 | G_UPDATE(smallWriteMaxPatterns); |
315 | G_UPDATE(smallWriteMaxLiterals); |
316 | G_UPDATE(smallWriteMergeBatchSize); |
317 | G_UPDATE(allowTamarama); |
318 | G_UPDATE(tamaChunkSize); |
319 | G_UPDATE(limitPatternCount); |
320 | G_UPDATE(limitPatternLength); |
321 | G_UPDATE(limitGraphVertices); |
322 | G_UPDATE(limitGraphEdges); |
323 | G_UPDATE(limitReportCount); |
324 | G_UPDATE(limitLiteralCount); |
325 | G_UPDATE(limitLiteralLength); |
326 | G_UPDATE(limitLiteralMatcherChars); |
327 | G_UPDATE(limitLiteralMatcherSize); |
328 | G_UPDATE(limitRoseRoleCount); |
329 | G_UPDATE(limitRoseEngineCount); |
330 | G_UPDATE(limitRoseAnchoredSize); |
331 | G_UPDATE(limitEngineSize); |
332 | G_UPDATE(limitDFASize); |
333 | G_UPDATE(limitNFASize); |
334 | G_UPDATE(limitLBRSize); |
335 | G_UPDATE(limitApproxMatchingVertices); |
336 | |
337 | #undef G_UPDATE |
338 | if (key == "simple_som" ) { |
339 | g->allowHaigLit = false; |
340 | g->allowLitHaig = false; |
341 | g->allowSomChain = false; |
342 | g->somMaxRevNfaLength = 0; |
343 | done = true; |
344 | } |
345 | if (key == "forceOutfixesNFA" ) { |
346 | g->allowAnchoredAcyclic = false; |
347 | g->allowCastle = false; |
348 | g->allowDecoratedLiteral = false; |
349 | g->allowGough = false; |
350 | g->allowHaigLit = false; |
351 | g->allowLbr = false; |
352 | g->allowLimExNFA = true; |
353 | g->allowLitHaig = false; |
354 | g->allowMcClellan = false; |
355 | g->allowPuff = false; |
356 | g->allowLiteral = false; |
357 | g->allowViolet = false; |
358 | g->allowSmallLiteralSet = false; |
359 | g->roseMasks = false; |
360 | done = true; |
361 | } |
362 | if (key == "forceOutfixesDFA" ) { |
363 | g->allowAnchoredAcyclic = false; |
364 | g->allowCastle = false; |
365 | g->allowDecoratedLiteral = false; |
366 | g->allowGough = false; |
367 | g->allowHaigLit = false; |
368 | g->allowLbr = false; |
369 | g->allowLimExNFA = false; |
370 | g->allowLitHaig = false; |
371 | g->allowMcClellan = true; |
372 | g->allowPuff = false; |
373 | g->allowLiteral = false; |
374 | g->allowViolet = false; |
375 | g->allowSmallLiteralSet = false; |
376 | g->roseMasks = false; |
377 | done = true; |
378 | } |
379 | if (key == "forceOutfixes" ) { |
380 | g->allowAnchoredAcyclic = false; |
381 | g->allowCastle = false; |
382 | g->allowDecoratedLiteral = false; |
383 | g->allowGough = true; |
384 | g->allowHaigLit = false; |
385 | g->allowLbr = false; |
386 | g->allowLimExNFA = true; |
387 | g->allowLitHaig = false; |
388 | g->allowMcClellan = true; |
389 | g->allowPuff = false; |
390 | g->allowLiteral = false; |
391 | g->allowViolet = false; |
392 | g->allowSmallLiteralSet = false; |
393 | g->roseMasks = false; |
394 | done = true; |
395 | } |
396 | |
397 | if (!done && key != "help" ) { |
398 | printf("Invalid grey override key %s:%u\n" , key.c_str(), value); |
399 | invalid_key_seen = true; |
400 | } |
401 | |
402 | p = ve; |
403 | |
404 | if (p != pe) { |
405 | ++p; |
406 | } |
407 | } |
408 | |
409 | if (invalid_key_seen) { |
410 | applyGreyOverrides(g, "help" ); |
411 | exit(1); |
412 | } |
413 | |
414 | assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */ |
415 | } |
416 | |
417 | } // namespace ue2 |
418 | |
419 | #endif |
420 | |