1/*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Parse and build ParsedLogical::logicalTree and combInfoMap.
31 */
32#include "logical_combination.h"
33#include "parser/parse_error.h"
34#include "util/container.h"
35#include "hs_compile.h"
36
37#include <vector>
38
39using namespace std;
40
41namespace ue2 {
42
43u32 ParsedLogical::getLogicalKey(u32 a) {
44 auto it = toLogicalKeyMap.find(a);
45 if (it == toLogicalKeyMap.end()) {
46 // get size before assigning to avoid wacky LHS shenanigans
47 u32 size = toLogicalKeyMap.size();
48 bool inserted;
49 tie(it, inserted) = toLogicalKeyMap.emplace(a, size);
50 assert(inserted);
51 }
52 DEBUG_PRINTF("%u -> lkey %u\n", it->first, it->second);
53 return it->second;
54}
55
56u32 ParsedLogical::getCombKey(u32 a) {
57 auto it = toCombKeyMap.find(a);
58 if (it == toCombKeyMap.end()) {
59 u32 size = toCombKeyMap.size();
60 bool inserted;
61 tie(it, inserted) = toCombKeyMap.emplace(a, size);
62 assert(inserted);
63 }
64 DEBUG_PRINTF("%u -> ckey %u\n", it->first, it->second);
65 return it->second;
66}
67
68void ParsedLogical::addRelateCKey(u32 lkey, u32 ckey) {
69 auto it = lkey2ckeys.find(lkey);
70 if (it == lkey2ckeys.end()) {
71 bool inserted;
72 tie(it, inserted) = lkey2ckeys.emplace(lkey, set<u32>());
73 assert(inserted);
74 }
75 it->second.insert(ckey);
76 DEBUG_PRINTF("lkey %u belongs to combination key %u\n",
77 it->first, ckey);
78}
79
80#define TRY_RENUM_OP(ckey) \
81do { \
82 if (ckey & LOGICAL_OP_BIT) { \
83 ckey = (ckey & ~LOGICAL_OP_BIT) + toLogicalKeyMap.size(); \
84 } \
85} while(0)
86
87u32 ParsedLogical::logicalTreeAdd(u32 op, u32 left, u32 right) {
88 LogicalOp lop;
89 assert((LOGICAL_OP_BIT & (u32)logicalTree.size()) == 0);
90 lop.id = LOGICAL_OP_BIT | (u32)logicalTree.size();
91 lop.op = op;
92 lop.lo = left;
93 lop.ro = right;
94 logicalTree.push_back(lop);
95 return lop.id;
96}
97
98void ParsedLogical::combinationInfoAdd(UNUSED u32 ckey, u32 id, u32 ekey,
99 u32 lkey_start, u32 lkey_result,
100 u64a min_offset, u64a max_offset) {
101 assert(ckey == combInfoMap.size());
102 CombInfo ci;
103 ci.id = id;
104 ci.ekey = ekey;
105 ci.start = lkey_start;
106 ci.result = lkey_result;
107 ci.min_offset = min_offset;
108 ci.max_offset = max_offset;
109 combInfoMap.push_back(ci);
110
111 DEBUG_PRINTF("ckey %u (id %u) -> lkey %u..%u, ekey=0x%x\n", ckey, ci.id,
112 ci.start, ci.result, ci.ekey);
113}
114
115void ParsedLogical::validateSubIDs(const unsigned *ids,
116 const char *const *expressions,
117 const unsigned *flags,
118 unsigned elements) {
119 for (const auto &it : toLogicalKeyMap) {
120 bool unknown = true;
121 u32 i = 0;
122 for (i = 0; i < elements; i++) {
123 if ((ids ? ids[i] : 0) == it.first) {
124 unknown = false;
125 break;
126 }
127 }
128 if (unknown) {
129 throw CompileError("Unknown sub-expression id.");
130 }
131 if (contains(toCombKeyMap, it.first)) {
132 throw CompileError("Have combination of combination.");
133 }
134 if (flags && (flags[i] & HS_FLAG_SOM_LEFTMOST)) {
135 throw CompileError("Have SOM flag in sub-expression.");
136 }
137 if (flags && (flags[i] & HS_FLAG_PREFILTER)) {
138 throw CompileError("Have PREFILTER flag in sub-expression.");
139 }
140 hs_compile_error_t *compile_err = NULL;
141 hs_expr_info_t *info = NULL;
142 hs_error_t err = hs_expression_info(expressions[i], flags[i], &info,
143 &compile_err);
144 if (err != HS_SUCCESS) {
145 hs_free_compile_error(compile_err);
146 throw CompileError("Run hs_expression_info() failed.");
147 }
148 if (!info) {
149 throw CompileError("Get hs_expr_info_t failed.");
150 } else {
151 if (info->unordered_matches) {
152 throw CompileError("Have unordered match in sub-expressions.");
153 }
154 free(info);
155 }
156 }
157}
158
159void ParsedLogical::logicalKeyRenumber() {
160 // renumber operation lkey in op vector
161 for (auto &op : logicalTree) {
162 TRY_RENUM_OP(op.id);
163 TRY_RENUM_OP(op.lo);
164 TRY_RENUM_OP(op.ro);
165 }
166 // renumber operation lkey in info map
167 for (auto &ci : combInfoMap) {
168 TRY_RENUM_OP(ci.start);
169 TRY_RENUM_OP(ci.result);
170 }
171}
172
173struct LogicalOperator {
174 LogicalOperator(u32 op_in, u32 paren_in)
175 : op(op_in), paren(paren_in) {}
176 u32 op;
177 u32 paren;
178};
179
180static
181u32 toOperator(char c) {
182 u32 op = UNKNOWN_OP;
183 switch (c) {
184 case '!' :
185 op = LOGICAL_OP_NOT;
186 break;
187 case '&' :
188 op = LOGICAL_OP_AND;
189 break;
190 case '|' :
191 op = LOGICAL_OP_OR;
192 break;
193 default:
194 break;
195 };
196 return op;
197}
198
199static
200bool cmpOperator(const LogicalOperator &op1, const LogicalOperator &op2) {
201 if (op1.paren < op2.paren) {
202 return false;
203 }
204 if (op1.paren > op2.paren) {
205 return true;
206 }
207 assert(op1.paren == op2.paren);
208 if (op1.op > op2.op) {
209 return false;
210 }
211 if (op1.op < op2.op) {
212 return true;
213 }
214 return true;
215}
216
217static
218u32 fetchSubID(const char *logical, u32 &digit, u32 end) {
219 if (digit == (u32)-1) { // no digit parsing in progress
220 return (u32)-1;
221 }
222 assert(end > digit);
223 if (end - digit > 9) {
224 throw LocatedParseError("Expression id too large");
225 }
226 u32 mult = 1;
227 u32 sum = 0;
228 for (u32 j = end - 1; (j >= digit) && (j != (u32)-1) ; j--) {
229 assert(isdigit(logical[j]));
230 sum += (logical[j] - '0') * mult;
231 mult *= 10;
232 }
233 digit = (u32)-1;
234 return sum;
235}
236
237static
238void popOperator(vector<LogicalOperator> &op_stack, vector<u32> &subid_stack,
239 ParsedLogical &pl) {
240 if (subid_stack.empty()) {
241 throw LocatedParseError("Not enough operand");
242 }
243 u32 right = subid_stack.back();
244 subid_stack.pop_back();
245 u32 left = 0;
246 if (op_stack.back().op != LOGICAL_OP_NOT) {
247 if (subid_stack.empty()) {
248 throw LocatedParseError("Not enough operand");
249 }
250 left = subid_stack.back();
251 subid_stack.pop_back();
252 }
253 subid_stack.push_back(pl.logicalTreeAdd(op_stack.back().op, left, right));
254 op_stack.pop_back();
255}
256
257static
258char getValue(const vector<char> &lv, u32 ckey) {
259 if (ckey & LOGICAL_OP_BIT) {
260 return lv[ckey & ~LOGICAL_OP_BIT];
261 } else {
262 return 0;
263 }
264}
265
266static
267bool hasMatchFromPurelyNegative(const vector<LogicalOp> &tree,
268 u32 start, u32 result) {
269 vector<char> lv(tree.size());
270 assert(start <= result);
271 for (u32 i = start; i <= result; i++) {
272 assert(i & LOGICAL_OP_BIT);
273 const LogicalOp &op = tree[i & ~LOGICAL_OP_BIT];
274 assert(i == op.id);
275 switch (op.op) {
276 case LOGICAL_OP_NOT:
277 lv[op.id & ~LOGICAL_OP_BIT] = !getValue(lv, op.ro);
278 break;
279 case LOGICAL_OP_AND:
280 lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) &
281 getValue(lv, op.ro);
282 break;
283 case LOGICAL_OP_OR:
284 lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) |
285 getValue(lv, op.ro);
286 break;
287 default:
288 assert(0);
289 break;
290 }
291 }
292 return lv[result & ~LOGICAL_OP_BIT];
293}
294
295void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
296 u32 ekey, u64a min_offset,
297 u64a max_offset) {
298 u32 ckey = getCombKey(id);
299 vector<LogicalOperator> op_stack;
300 vector<u32> subid_stack;
301 u32 lkey_start = INVALID_LKEY; // logical operation's lkey
302 u32 paren = 0; // parentheses
303 u32 digit = (u32)-1; // digit start offset, invalid offset is -1
304 u32 subid = (u32)-1;
305 u32 i;
306 try {
307 for (i = 0; logical[i]; i++) {
308 if (isdigit(logical[i])) {
309 if (digit == (u32)-1) { // new digit start
310 digit = i;
311 }
312 } else {
313 if ((subid = fetchSubID(logical, digit, i)) != (u32)-1) {
314 subid_stack.push_back(getLogicalKey(subid));
315 addRelateCKey(subid_stack.back(), ckey);
316 }
317 if (logical[i] == ' ') { // skip whitespace
318 continue;
319 }
320 if (logical[i] == '(') {
321 paren += 1;
322 } else if (logical[i] == ')') {
323 if (paren <= 0) {
324 throw LocatedParseError("Not enough left parentheses");
325 }
326 paren -= 1;
327 } else {
328 u32 prio = toOperator(logical[i]);
329 if (prio != UNKNOWN_OP) {
330 LogicalOperator op(prio, paren);
331 while (!op_stack.empty()
332 && cmpOperator(op_stack.back(), op)) {
333 popOperator(op_stack, subid_stack, *this);
334 if (lkey_start == INVALID_LKEY) {
335 lkey_start = subid_stack.back();
336 }
337 }
338 op_stack.push_back(op);
339 } else {
340 throw LocatedParseError("Unknown character");
341 }
342 }
343 }
344 }
345 if (paren != 0) {
346 throw LocatedParseError("Not enough right parentheses");
347 }
348 if ((subid = fetchSubID(logical, digit, i)) != (u32)-1) {
349 subid_stack.push_back(getLogicalKey(subid));
350 addRelateCKey(subid_stack.back(), ckey);
351 }
352 while (!op_stack.empty()) {
353 popOperator(op_stack, subid_stack, *this);
354 if (lkey_start == INVALID_LKEY) {
355 lkey_start = subid_stack.back();
356 }
357 }
358 if (subid_stack.size() != 1) {
359 throw LocatedParseError("Not enough operator");
360 }
361 } catch (LocatedParseError &error) {
362 error.locate(i);
363 throw;
364 }
365 u32 lkey_result = subid_stack.back(); // logical operation's lkey
366 if (lkey_start == INVALID_LKEY) {
367 throw CompileError("No logical operation.");
368 }
369 if (hasMatchFromPurelyNegative(logicalTree, lkey_start, lkey_result)) {
370 throw CompileError("Has match from purely negative sub-expressions.");
371 }
372 combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result,
373 min_offset, max_offset);
374}
375
376} // namespace ue2
377