1 | /* |
2 | * Copyright (c) 2015, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief State for corpus generator. |
31 | */ |
32 | |
33 | #ifndef NG_CORPUS_PROPERTIES_H |
34 | #define NG_CORPUS_PROPERTIES_H |
35 | |
36 | #include <utility> // for std::pair |
37 | #include <boost/random/mersenne_twister.hpp> |
38 | |
39 | #include "ue2common.h" |
40 | |
41 | #define DEFAULT_CORPUS_GENERATOR_LIMIT 500000 |
42 | |
43 | struct min_max { |
44 | min_max(u32 min_in, u32 max_in) : min(min_in), max(max_in) { |
45 | assert(min <= max); |
46 | } |
47 | u32 min; |
48 | u32 max; |
49 | }; |
50 | |
51 | class CorpusProperties { |
52 | public: |
53 | /** |
54 | * Default constructor with default properties: |
55 | * - generate match char with 100% probability |
56 | * - generate unmatch char with 0% probability |
57 | * - generate random char with 0% probability |
58 | * - follow cycles once |
59 | * - do not expand character classes (including case classes) |
60 | * - generate data for all possible paths through graph |
61 | * - pick random characters from the full ASCII alphabet |
62 | */ |
63 | CorpusProperties(); |
64 | |
65 | /** |
66 | * Set probabilities (as percentages). Returns true if sum == 100, |
67 | * else returns false and no changes are made to current probabilities. |
68 | */ |
69 | bool setPercentages(unsigned int match, unsigned int unmatch, |
70 | unsigned int random); |
71 | |
72 | unsigned percentMatch() const { return matchness; } |
73 | unsigned percentUnmatch() const { return unmatchness; } |
74 | unsigned percentRandom() const { return randomness; } |
75 | |
76 | // The number of times a cycle is followed |
77 | void setCycleLimit(unsigned int min, unsigned int max) { |
78 | cycleMin = min; |
79 | cycleMax = max; |
80 | } |
81 | std::pair<unsigned int, unsigned int> getCycleLimit() const { |
82 | return std::make_pair(cycleMin, cycleMax); |
83 | } |
84 | |
85 | // Roll for initiative |
86 | enum RollResult { |
87 | ROLLED_MATCH, |
88 | ROLLED_UNMATCH, |
89 | ROLLED_RANDOM, |
90 | }; |
91 | RollResult throwDice(); |
92 | |
93 | /** \brief Set the PRNG seed. */ |
94 | void seed(unsigned val); |
95 | unsigned int getSeed() const; |
96 | |
97 | /** \brief Retrieve a value from the PRNG in the closed range [n, m]. */ |
98 | unsigned rand(unsigned n, unsigned m); |
99 | |
100 | private: |
101 | // Percentages |
102 | unsigned int matchness; |
103 | unsigned int unmatchness; |
104 | unsigned int randomness; |
105 | |
106 | public: |
107 | // Extra data |
108 | min_max prefixRange; |
109 | min_max suffixRange; |
110 | |
111 | private: |
112 | // Behaviours |
113 | unsigned int cycleMin; |
114 | unsigned int cycleMax; |
115 | |
116 | public: |
117 | // FIXME: Limit the number of corpus files generated to the first 'limit' |
118 | // number of paths - note that this means the corpus will not be a complete |
119 | // representation of the pattern. |
120 | unsigned int corpusLimit; |
121 | |
122 | unsigned int editDistance; |
123 | unsigned int alphabetSize; |
124 | |
125 | private: |
126 | // PRNG. |
127 | boost::random::mt19937 randomGen; |
128 | unsigned int rngSeed; |
129 | }; |
130 | |
131 | #endif |
132 | |