1 | /* |
2 | * Copyright (c) 2015-2019, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Hamster Wheel Literal Matcher: literal representation at build time. |
31 | */ |
32 | |
33 | #ifndef HWLM_LITERAL_H |
34 | #define HWLM_LITERAL_H |
35 | |
36 | #include "hwlm.h" |
37 | #include "ue2common.h" |
38 | |
39 | #include <string> |
40 | #include <tuple> |
41 | #include <vector> |
42 | |
43 | namespace ue2 { |
44 | |
45 | /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ |
46 | #define HWLM_MASKLEN 8 |
47 | |
48 | #define INVALID_LIT_ID ~0U |
49 | |
50 | /** \brief Class representing a literal, fed to \ref hwlmBuild. */ |
51 | struct hwlmLiteral { |
52 | std::string s; //!< \brief The literal itself. |
53 | |
54 | /** \brief The ID to pass to the callback if this literal matches. |
55 | * |
56 | * Note that the special value 0xFFFFFFFF is reserved for internal use and |
57 | * should not be used. */ |
58 | u32 id; |
59 | |
60 | bool nocase; //!< \brief True if literal is case-insensitive. |
61 | |
62 | /** \brief Matches for runs of this literal can be quashed. |
63 | * |
64 | * Advisory flag meaning that there is no value in returning runs of |
65 | * additional matches for a literal after the first one, so such matches |
66 | * can be quashed by the literal matcher. */ |
67 | bool noruns; |
68 | |
69 | /** \brief included literal id. */ |
70 | u32 included_id = INVALID_LIT_ID; |
71 | |
72 | /** \brief Squash mask for FDR's confirm mask for included literals. |
73 | * |
74 | * In FDR confirm, if we have included literal in another bucket, |
75 | * we can use this mask to squash the bit for the bucket in FDR confirm |
76 | * mask and then run programs of included literal directly and avoid |
77 | * confirm work. |
78 | * |
79 | * This value is calculated in FDR compile code once bucket assignment is |
80 | * completed |
81 | */ |
82 | u8 squash = 0; |
83 | |
84 | /** \brief Set of groups that literal belongs to. |
85 | * |
86 | * Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of |
87 | * the groups that are switched on. */ |
88 | hwlm_group_t groups; |
89 | |
90 | /** \brief Supplementary comparison mask. |
91 | * |
92 | * These two values add a supplementary comparison that is done over the |
93 | * final 8 bytes of the string -- if v is those bytes, then the string must |
94 | * match as well as (v & msk) == cmp. |
95 | * |
96 | * An empty msk is the safe way of not adding any comparison to the string |
97 | * unnecessarily filling in msk may turn off optimizations. |
98 | * |
99 | * The msk/cmp mechanism must NOT place a value into the literal that |
100 | * conflicts with the contents of the string, but can be allowed to add |
101 | * additional power within the string -- for example, to allow some case |
102 | * sensitivity within a case-insensitive string. |
103 | |
104 | * Values are stored in memory order -- i.e. the last byte of the mask |
105 | * corresponds to the last byte of the string. Both vectors must be the |
106 | * same size, and must not exceed \ref HWLM_MASKLEN in length. |
107 | */ |
108 | std::vector<u8> msk; |
109 | |
110 | /** \brief Supplementary comparison value. |
111 | * |
112 | * See documentation for \ref msk. |
113 | */ |
114 | std::vector<u8> cmp; |
115 | |
116 | bool pure; //!< \brief The pass-on of pure flag from LitFragment. |
117 | |
118 | /** \brief Complete constructor, takes group information and msk/cmp. |
119 | * |
120 | * This constructor takes a msk/cmp pair. Both must be vectors of length <= |
121 | * \ref HWLM_MASKLEN. */ |
122 | hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in, |
123 | u32 id_in, hwlm_group_t groups_in, |
124 | const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in, |
125 | bool pure_in = false); |
126 | |
127 | /** \brief Simple constructor: no group information, no msk/cmp. |
128 | * |
129 | * This constructor is only used in internal unit test. */ |
130 | hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in) |
131 | : hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {} |
132 | }; |
133 | |
134 | inline |
135 | bool operator<(const hwlmLiteral &a, const hwlmLiteral &b) { |
136 | return std::tie(a.id, a.s, a.nocase, a.noruns, a.groups, a.msk, a.cmp) < |
137 | std::tie(b.id, b.s, b.nocase, b.noruns, b.groups, b.msk, b.cmp); |
138 | } |
139 | |
140 | inline |
141 | bool operator==(const hwlmLiteral &a, const hwlmLiteral &b) { |
142 | return a.id == b.id && a.s == b.s && a.nocase == b.nocase && |
143 | a.noruns == b.noruns && a.groups == b.groups && a.msk == b.msk && |
144 | a.cmp == b.cmp; |
145 | } |
146 | |
147 | /** |
148 | * Consistency test; returns false if the given msk/cmp test can never match |
149 | * the literal string s. |
150 | */ |
151 | bool maskIsConsistent(const std::string &s, bool nocase, |
152 | const std::vector<u8> &msk, const std::vector<u8> &cmp); |
153 | |
154 | } // namespace ue2 |
155 | |
156 | #endif // HWLM_LITERAL_H |
157 | |