1/*
2 * Copyright (c) 2015-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Compiler front-end, including public API calls for compilation.
31 */
32#include "allocator.h"
33#include "ue2common.h"
34#include "grey.h"
35#include "hs_compile.h"
36#include "hs_internal.h"
37#include "database.h"
38#include "compiler/compiler.h"
39#include "compiler/error.h"
40#include "nfagraph/ng.h"
41#include "nfagraph/ng_expr_info.h"
42#include "parser/Parser.h"
43#include "parser/parse_error.h"
44#include "parser/prefilter.h"
45#include "parser/unsupported.h"
46#include "util/compile_error.h"
47#include "util/cpuid_flags.h"
48#include "util/cpuid_inline.h"
49#include "util/depth.h"
50#include "util/popcount.h"
51#include "util/target_info.h"
52
53#include <cassert>
54#include <cstddef>
55#include <cstring>
56#include <limits.h>
57#include <string>
58#include <vector>
59
60using namespace std;
61using namespace ue2;
62
63/** \brief Cheap check that no unexpected mode flags are on. */
64static
65bool validModeFlags(unsigned int mode) {
66 static const unsigned allModeFlags = HS_MODE_BLOCK
67 | HS_MODE_STREAM
68 | HS_MODE_VECTORED
69 | HS_MODE_SOM_HORIZON_LARGE
70 | HS_MODE_SOM_HORIZON_MEDIUM
71 | HS_MODE_SOM_HORIZON_SMALL;
72
73 return !(mode & ~allModeFlags);
74}
75
76/** \brief Validate mode flags. */
77static
78bool checkMode(unsigned int mode, hs_compile_error **comp_error) {
79 // First, check that only bits with meaning are on.
80 if (!validModeFlags(mode)) {
81 *comp_error = generateCompileError("Invalid parameter: "
82 "unrecognised mode flags.", -1);
83 return false;
84 }
85
86 // Our mode must be ONE of (block, streaming, vectored).
87 unsigned checkmode
88 = mode & (HS_MODE_STREAM | HS_MODE_BLOCK | HS_MODE_VECTORED);
89 if (popcount32(checkmode) != 1) {
90 *comp_error = generateCompileError(
91 "Invalid parameter: mode must have one "
92 "(and only one) of HS_MODE_BLOCK, HS_MODE_STREAM or "
93 "HS_MODE_VECTORED set.",
94 -1);
95 return false;
96 }
97
98 // If you specify SOM precision, you must be in streaming mode and you only
99 // get to have one.
100 unsigned somMode = mode & (HS_MODE_SOM_HORIZON_LARGE |
101 HS_MODE_SOM_HORIZON_MEDIUM |
102 HS_MODE_SOM_HORIZON_SMALL);
103 if (somMode) {
104 if (!(mode & HS_MODE_STREAM)) {
105 *comp_error = generateCompileError("Invalid parameter: the "
106 "HS_MODE_SOM_HORIZON_ mode flags may only be set in "
107 "streaming mode.", -1);
108 return false;
109
110 }
111 if ((somMode & (somMode - 1)) != 0) {
112 *comp_error = generateCompileError("Invalid parameter: only one "
113 "HS_MODE_SOM_HORIZON_ mode flag can be set.", -1);
114 return false;
115 }
116 }
117
118 return true;
119}
120
121static
122bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
123 static constexpr u32 HS_TUNE_LAST = HS_TUNE_FAMILY_GLM;
124 static constexpr u32 HS_CPU_FEATURES_ALL =
125 HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512;
126
127 if (!p) {
128 return true;
129 }
130
131 if (p->cpu_features & ~HS_CPU_FEATURES_ALL) {
132 *comp_error = generateCompileError("Invalid cpu features specified in "
133 "the platform information.", -1);
134 return false;
135 }
136
137 if (p->tune > HS_TUNE_LAST) {
138 *comp_error = generateCompileError("Invalid tuning value specified in "
139 "the platform information.", -1);
140 return false;
141 }
142
143 return true;
144}
145
146/** \brief Convert from SOM mode to bytes of precision. */
147static
148unsigned getSomPrecision(unsigned mode) {
149 if (mode & HS_MODE_VECTORED) {
150 /* always assume full precision for vectoring */
151 return 8;
152 }
153
154 if (mode & HS_MODE_SOM_HORIZON_LARGE) {
155 return 8;
156 } else if (mode & HS_MODE_SOM_HORIZON_MEDIUM) {
157 return 4;
158 } else if (mode & HS_MODE_SOM_HORIZON_SMALL) {
159 return 2;
160 }
161 return 0;
162}
163
164namespace ue2 {
165
166hs_error_t
167hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
168 const unsigned *ids, const hs_expr_ext *const *ext,
169 unsigned elements, unsigned mode,
170 const hs_platform_info_t *platform, hs_database_t **db,
171 hs_compile_error_t **comp_error, const Grey &g) {
172 // Check the args: note that it's OK for flags, ids or ext to be null.
173 if (!comp_error) {
174 if (db) {
175 *db = nullptr;
176 }
177 // nowhere to write the string, but we can still report an error code
178 return HS_COMPILER_ERROR;
179 }
180 if (!db) {
181 *comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
182 return HS_COMPILER_ERROR;
183 }
184 if (!expressions) {
185 *db = nullptr;
186 *comp_error
187 = generateCompileError("Invalid parameter: expressions is NULL",
188 -1);
189 return HS_COMPILER_ERROR;
190 }
191 if (elements == 0) {
192 *db = nullptr;
193 *comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
194 return HS_COMPILER_ERROR;
195 }
196
197#if defined(FAT_RUNTIME)
198 if (!check_ssse3()) {
199 *db = nullptr;
200 *comp_error = generateCompileError("Unsupported architecture", -1);
201 return HS_ARCH_ERROR;
202 }
203#endif
204
205 if (!checkMode(mode, comp_error)) {
206 *db = nullptr;
207 assert(*comp_error); // set by checkMode.
208 return HS_COMPILER_ERROR;
209 }
210
211 if (!checkPlatform(platform, comp_error)) {
212 *db = nullptr;
213 assert(*comp_error); // set by checkPlatform.
214 return HS_COMPILER_ERROR;
215 }
216
217 if (elements > g.limitPatternCount) {
218 *db = nullptr;
219 *comp_error = generateCompileError("Number of patterns too large", -1);
220 return HS_COMPILER_ERROR;
221 }
222
223 // This function is simply a wrapper around both the parser and compiler
224 bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
225 bool isVectored = mode & HS_MODE_VECTORED;
226 unsigned somPrecision = getSomPrecision(mode);
227
228 target_t target_info = platform ? target_t(*platform)
229 : get_current_target();
230
231 try {
232 CompileContext cc(isStreaming, isVectored, target_info, g);
233 NG ng(cc, elements, somPrecision);
234
235 for (unsigned int i = 0; i < elements; i++) {
236 // Add this expression to the compiler
237 try {
238 addExpression(ng, i, expressions[i], flags ? flags[i] : 0,
239 ext ? ext[i] : nullptr, ids ? ids[i] : 0);
240 } catch (CompileError &e) {
241 /* Caught a parse error:
242 * throw it upstream as a CompileError with a specific index */
243 e.setExpressionIndex(i);
244 throw; /* do not slice */
245 }
246 }
247
248 // Check sub-expression ids
249 ng.rm.pl.validateSubIDs(ids, expressions, flags, elements);
250 // Renumber and assign lkey to reports
251 ng.rm.logicalKeyRenumber();
252
253 unsigned length = 0;
254 struct hs_database *out = build(ng, &length);
255
256 assert(out); // should have thrown exception on error
257 assert(length);
258
259 *db = out;
260 *comp_error = nullptr;
261
262 return HS_SUCCESS;
263 }
264 catch (const CompileError &e) {
265 // Compiler error occurred
266 *db = nullptr;
267 *comp_error = generateCompileError(e.reason,
268 e.hasIndex ? (int)e.index : -1);
269 return HS_COMPILER_ERROR;
270 }
271 catch (const std::bad_alloc &) {
272 *db = nullptr;
273 *comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
274 return HS_COMPILER_ERROR;
275 }
276 catch (...) {
277 assert(!"Internal error, unexpected exception");
278 *db = nullptr;
279 *comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
280 return HS_COMPILER_ERROR;
281 }
282}
283
284} // namespace ue2
285
286extern "C" HS_PUBLIC_API
287hs_error_t HS_CDECL hs_compile(const char *expression, unsigned flags,
288 unsigned mode,
289 const hs_platform_info_t *platform,
290 hs_database_t **db, hs_compile_error_t **error) {
291 if (expression == nullptr) {
292 *db = nullptr;
293 *error = generateCompileError("Invalid parameter: expression is NULL",
294 -1);
295 return HS_COMPILER_ERROR;
296 }
297
298 unsigned id = 0; // single expressions get zero as an ID
299 const hs_expr_ext * const *ext = nullptr; // unused for this call.
300
301 return hs_compile_multi_int(&expression, &flags, &id, ext, 1, mode,
302 platform, db, error, Grey());
303}
304
305extern "C" HS_PUBLIC_API
306hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions,
307 const unsigned *flags, const unsigned *ids,
308 unsigned elements, unsigned mode,
309 const hs_platform_info_t *platform,
310 hs_database_t **db,
311 hs_compile_error_t **error) {
312 const hs_expr_ext * const *ext = nullptr; // unused for this call.
313 return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
314 platform, db, error, Grey());
315}
316
317extern "C" HS_PUBLIC_API
318hs_error_t HS_CDECL hs_compile_ext_multi(const char * const *expressions,
319 const unsigned *flags, const unsigned *ids,
320 const hs_expr_ext * const *ext,
321 unsigned elements, unsigned mode,
322 const hs_platform_info_t *platform,
323 hs_database_t **db,
324 hs_compile_error_t **error) {
325 return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
326 platform, db, error, Grey());
327}
328
329static
330hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
331 const hs_expr_ext_t *ext, unsigned int mode,
332 hs_expr_info_t **info,
333 hs_compile_error_t **error) {
334 if (!error) {
335 // nowhere to write an error, but we can still return an error code.
336 return HS_COMPILER_ERROR;
337 }
338
339#if defined(FAT_RUNTIME)
340 if (!check_ssse3()) {
341 *error = generateCompileError("Unsupported architecture", -1);
342 return HS_ARCH_ERROR;
343 }
344#endif
345
346 if (!info) {
347 *error = generateCompileError("Invalid parameter: info is NULL", -1);
348 return HS_COMPILER_ERROR;
349 }
350
351 if (!expression) {
352 *error = generateCompileError("Invalid parameter: expression is NULL",
353 -1);
354 return HS_COMPILER_ERROR;
355 }
356
357 *info = nullptr;
358 *error = nullptr;
359
360 hs_expr_info local_info;
361 memset(&local_info, 0, sizeof(local_info));
362
363 try {
364 bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
365 bool isVectored = mode & HS_MODE_VECTORED;
366
367 CompileContext cc(isStreaming, isVectored, get_current_target(),
368 Grey());
369
370 // Ensure that our pattern isn't too long (in characters).
371 if (strlen(expression) > cc.grey.limitPatternLength) {
372 throw ParseError("Pattern length exceeds limit.");
373 }
374
375 ReportManager rm(cc.grey);
376 ParsedExpression pe(0, expression, flags, 0, ext);
377 assert(pe.component);
378
379 // Apply prefiltering transformations if desired.
380 if (pe.expr.prefilter) {
381 prefilterTree(pe.component, ParseMode(flags));
382 }
383
384 // Expressions containing zero-width assertions and other extended pcre
385 // types aren't supported yet. This call will throw a ParseError
386 // exception if the component tree contains such a construct.
387 checkUnsupported(*pe.component);
388
389 pe.component->checkEmbeddedStartAnchor(true);
390 pe.component->checkEmbeddedEndAnchor(true);
391
392 auto built_expr = buildGraph(rm, cc, pe);
393 unique_ptr<NGHolder> &g = built_expr.g;
394 ExpressionInfo &expr = built_expr.expr;
395
396 if (!g) {
397 DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
398 throw ParseError("Internal error.");
399 }
400
401 fillExpressionInfo(rm, cc, *g, expr, &local_info);
402 }
403 catch (const CompileError &e) {
404 // Compiler error occurred
405 *error = generateCompileError(e);
406 return HS_COMPILER_ERROR;
407 }
408 catch (std::bad_alloc &) {
409 *error = const_cast<hs_compile_error_t *>(&hs_enomem);
410 return HS_COMPILER_ERROR;
411 }
412 catch (...) {
413 assert(!"Internal error, unexpected exception");
414 *error = const_cast<hs_compile_error_t *>(&hs_einternal);
415 return HS_COMPILER_ERROR;
416 }
417
418 hs_expr_info *rv = (hs_expr_info *)hs_misc_alloc(sizeof(*rv));
419 if (!rv) {
420 *error = const_cast<hs_compile_error_t *>(&hs_enomem);
421 return HS_COMPILER_ERROR;
422 }
423
424 *rv = local_info;
425 *info = rv;
426 return HS_SUCCESS;
427}
428
429extern "C" HS_PUBLIC_API
430hs_error_t HS_CDECL hs_expression_info(const char *expression,
431 unsigned int flags,
432 hs_expr_info_t **info,
433 hs_compile_error_t **error) {
434 return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK,
435 info, error);
436}
437
438extern "C" HS_PUBLIC_API
439hs_error_t HS_CDECL hs_expression_ext_info(const char *expression,
440 unsigned int flags,
441 const hs_expr_ext_t *ext,
442 hs_expr_info_t **info,
443 hs_compile_error_t **error) {
444 return hs_expression_info_int(expression, flags, ext, HS_MODE_BLOCK, info,
445 error);
446}
447
448extern "C" HS_PUBLIC_API
449hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform) {
450 if (!platform) {
451 return HS_INVALID;
452 }
453
454 memset(platform, 0, sizeof(*platform));
455
456 platform->cpu_features = cpuid_flags();
457 platform->tune = cpuid_tune();
458
459 return HS_SUCCESS;
460}
461
462extern "C" HS_PUBLIC_API
463hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error) {
464#if defined(FAT_RUNTIME)
465 if (!check_ssse3()) {
466 return HS_ARCH_ERROR;
467 }
468#endif
469 freeCompileError(error);
470 return HS_SUCCESS;
471}
472