1/*
2 * Copyright (c) 2015, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief State for corpus generator.
31 */
32
33#ifndef NG_CORPUS_PROPERTIES_H
34#define NG_CORPUS_PROPERTIES_H
35
36#include <utility> // for std::pair
37#include <boost/random/mersenne_twister.hpp>
38
39#include "ue2common.h"
40
41#define DEFAULT_CORPUS_GENERATOR_LIMIT 500000
42
43struct min_max {
44 min_max(u32 min_in, u32 max_in) : min(min_in), max(max_in) {
45 assert(min <= max);
46 }
47 u32 min;
48 u32 max;
49};
50
51class CorpusProperties {
52public:
53 /**
54 * Default constructor with default properties:
55 * - generate match char with 100% probability
56 * - generate unmatch char with 0% probability
57 * - generate random char with 0% probability
58 * - follow cycles once
59 * - do not expand character classes (including case classes)
60 * - generate data for all possible paths through graph
61 * - pick random characters from the full ASCII alphabet
62 */
63 CorpusProperties();
64
65 /**
66 * Set probabilities (as percentages). Returns true if sum == 100,
67 * else returns false and no changes are made to current probabilities.
68 */
69 bool setPercentages(unsigned int match, unsigned int unmatch,
70 unsigned int random);
71
72 unsigned percentMatch() const { return matchness; }
73 unsigned percentUnmatch() const { return unmatchness; }
74 unsigned percentRandom() const { return randomness; }
75
76 // The number of times a cycle is followed
77 void setCycleLimit(unsigned int min, unsigned int max) {
78 cycleMin = min;
79 cycleMax = max;
80 }
81 std::pair<unsigned int, unsigned int> getCycleLimit() const {
82 return std::make_pair(cycleMin, cycleMax);
83 }
84
85 // Roll for initiative
86 enum RollResult {
87 ROLLED_MATCH,
88 ROLLED_UNMATCH,
89 ROLLED_RANDOM,
90 };
91 RollResult throwDice();
92
93 /** \brief Set the PRNG seed. */
94 void seed(unsigned val);
95 unsigned int getSeed() const;
96
97 /** \brief Retrieve a value from the PRNG in the closed range [n, m]. */
98 unsigned rand(unsigned n, unsigned m);
99
100private:
101 // Percentages
102 unsigned int matchness;
103 unsigned int unmatchness;
104 unsigned int randomness;
105
106public:
107 // Extra data
108 min_max prefixRange;
109 min_max suffixRange;
110
111private:
112 // Behaviours
113 unsigned int cycleMin;
114 unsigned int cycleMax;
115
116public:
117 // FIXME: Limit the number of corpus files generated to the first 'limit'
118 // number of paths - note that this means the corpus will not be a complete
119 // representation of the pattern.
120 unsigned int corpusLimit;
121
122 unsigned int editDistance;
123 unsigned int alphabetSize;
124
125private:
126 // PRNG.
127 boost::random::mt19937 randomGen;
128 unsigned int rngSeed;
129};
130
131#endif
132