1 | |
2 | #line 1 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
3 | /* |
4 | * Copyright (c) 2015-2018, Intel Corporation |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions are met: |
8 | * |
9 | * * Redistributions of source code must retain the above copyright notice, |
10 | * this list of conditions and the following disclaimer. |
11 | * * Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * * Neither the name of Intel Corporation nor the names of its contributors |
15 | * may be used to endorse or promote products derived from this software |
16 | * without specific prior written permission. |
17 | * |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
22 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
23 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
24 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
25 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
26 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
27 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
28 | * POSSIBILITY OF SUCH DAMAGE. |
29 | */ |
30 | |
31 | #include "config.h" |
32 | |
33 | #include "ExpressionParser.h" |
34 | |
35 | #include <cassert> |
36 | #include <cstdio> |
37 | #include <cstdlib> |
38 | #include <cstring> |
39 | #include <string> |
40 | |
41 | #include "ue2common.h" |
42 | #include "hs_compile.h" |
43 | |
44 | |
45 | using std::string; |
46 | |
47 | namespace { // anon |
48 | |
49 | enum ParamKey { |
50 | PARAM_NONE, |
51 | PARAM_MIN_OFFSET, |
52 | PARAM_MAX_OFFSET, |
53 | PARAM_MIN_LENGTH, |
54 | PARAM_EDIT_DISTANCE, |
55 | PARAM_HAMM_DISTANCE |
56 | }; |
57 | |
58 | |
59 | #line 60 "ExpressionParser.cpp" |
60 | #if defined(__GNUC__) |
61 | static __attribute__((used)) const char _ExpressionParser_actions[] = { |
62 | #else |
63 | static const char _ExpressionParser_actions[] = { |
64 | #endif |
65 | 0, 1, 0, 1, 1, 1, 2, 1, |
66 | 3, 1, 4, 1, 5, 1, 6, 1, |
67 | 7, 1, 9, 1, 10, 2, 8, 0 |
68 | |
69 | }; |
70 | |
71 | #if defined(__GNUC__) |
72 | static __attribute__((used)) const char _ExpressionParser_key_offsets[] = { |
73 | #else |
74 | static const char _ExpressionParser_key_offsets[] = { |
75 | #endif |
76 | 0, 0, 4, 8, 9, 10, 11, 12, |
77 | 13, 14, 15, 16, 17, 18, 19, 20, |
78 | 21, 23, 28, 31, 32, 33, 34, 35, |
79 | 36, 37, 38, 39, 40, 41, 42, 43, |
80 | 44, 45, 46, 48, 49, 50, 51, 52, |
81 | 53, 54, 55, 56, 57, 58, 60, 61, |
82 | 62, 63, 64, 65, 66, 67, 68, 69, |
83 | 70, 82 |
84 | }; |
85 | |
86 | #if defined(__GNUC__) |
87 | static __attribute__((used)) const char _ExpressionParser_trans_keys[] = { |
88 | #else |
89 | static const char _ExpressionParser_trans_keys[] = { |
90 | #endif |
91 | 32, 101, 104, 109, 32, 101, 104, 109, |
92 | 100, 105, 116, 95, 100, 105, 115, 116, |
93 | 97, 110, 99, 101, 61, 48, 57, 32, |
94 | 44, 125, 48, 57, 32, 44, 125, 97, |
95 | 109, 109, 105, 110, 103, 95, 100, 105, |
96 | 115, 116, 97, 110, 99, 101, 97, 105, |
97 | 120, 95, 111, 102, 102, 115, 101, 116, |
98 | 110, 95, 108, 111, 101, 110, 103, 116, |
99 | 104, 102, 102, 115, 101, 116, 56, 67, |
100 | 72, 76, 105, 109, 115, 123, 79, 81, |
101 | 86, 87, 0 |
102 | }; |
103 | |
104 | #if defined(__GNUC__) |
105 | static __attribute__((used)) const char _ExpressionParser_single_lengths[] = { |
106 | #else |
107 | static const char _ExpressionParser_single_lengths[] = { |
108 | #endif |
109 | 0, 4, 4, 1, 1, 1, 1, 1, |
110 | 1, 1, 1, 1, 1, 1, 1, 1, |
111 | 0, 3, 3, 1, 1, 1, 1, 1, |
112 | 1, 1, 1, 1, 1, 1, 1, 1, |
113 | 1, 1, 2, 1, 1, 1, 1, 1, |
114 | 1, 1, 1, 1, 1, 2, 1, 1, |
115 | 1, 1, 1, 1, 1, 1, 1, 1, |
116 | 8, 0 |
117 | }; |
118 | |
119 | #if defined(__GNUC__) |
120 | static __attribute__((used)) const char _ExpressionParser_range_lengths[] = { |
121 | #else |
122 | static const char _ExpressionParser_range_lengths[] = { |
123 | #endif |
124 | 0, 0, 0, 0, 0, 0, 0, 0, |
125 | 0, 0, 0, 0, 0, 0, 0, 0, |
126 | 1, 1, 0, 0, 0, 0, 0, 0, |
127 | 0, 0, 0, 0, 0, 0, 0, 0, |
128 | 0, 0, 0, 0, 0, 0, 0, 0, |
129 | 0, 0, 0, 0, 0, 0, 0, 0, |
130 | 0, 0, 0, 0, 0, 0, 0, 0, |
131 | 2, 0 |
132 | }; |
133 | |
134 | #if defined(__GNUC__) |
135 | static __attribute__((used)) const unsigned char _ExpressionParser_index_offsets[] = { |
136 | #else |
137 | static const unsigned char _ExpressionParser_index_offsets[] = { |
138 | #endif |
139 | 0, 0, 5, 10, 12, 14, 16, 18, |
140 | 20, 22, 24, 26, 28, 30, 32, 34, |
141 | 36, 38, 43, 47, 49, 51, 53, 55, |
142 | 57, 59, 61, 63, 65, 67, 69, 71, |
143 | 73, 75, 77, 80, 82, 84, 86, 88, |
144 | 90, 92, 94, 96, 98, 100, 103, 105, |
145 | 107, 109, 111, 113, 115, 117, 119, 121, |
146 | 123, 134 |
147 | }; |
148 | |
149 | #if defined(__GNUC__) |
150 | static __attribute__((used)) const char _ExpressionParser_trans_targs[] = { |
151 | #else |
152 | static const char _ExpressionParser_trans_targs[] = { |
153 | #endif |
154 | 2, 3, 19, 34, 0, 2, 3, 19, |
155 | 34, 0, 4, 0, 5, 0, 6, 0, |
156 | 7, 0, 8, 0, 9, 0, 10, 0, |
157 | 11, 0, 12, 0, 13, 0, 14, 0, |
158 | 15, 0, 16, 0, 17, 0, 18, 1, |
159 | 57, 17, 0, 18, 1, 57, 0, 20, |
160 | 0, 21, 0, 22, 0, 23, 0, 24, |
161 | 0, 25, 0, 26, 0, 27, 0, 28, |
162 | 0, 29, 0, 30, 0, 31, 0, 32, |
163 | 0, 33, 0, 15, 0, 35, 43, 0, |
164 | 36, 0, 37, 0, 38, 0, 39, 0, |
165 | 40, 0, 41, 0, 42, 0, 15, 0, |
166 | 44, 0, 45, 0, 46, 51, 0, 47, |
167 | 0, 48, 0, 49, 0, 50, 0, 15, |
168 | 0, 52, 0, 53, 0, 54, 0, 55, |
169 | 0, 15, 0, 56, 56, 56, 56, 56, |
170 | 56, 56, 1, 56, 56, 0, 0, 0 |
171 | }; |
172 | |
173 | #if defined(__GNUC__) |
174 | static __attribute__((used)) const char _ExpressionParser_trans_actions[] = { |
175 | #else |
176 | static const char _ExpressionParser_trans_actions[] = { |
177 | #endif |
178 | 17, 17, 17, 17, 19, 0, 0, 0, |
179 | 0, 19, 0, 19, 0, 19, 0, 19, |
180 | 0, 19, 0, 19, 0, 19, 0, 19, |
181 | 0, 19, 0, 19, 0, 19, 0, 19, |
182 | 13, 19, 0, 19, 21, 19, 0, 5, |
183 | 5, 1, 19, 0, 5, 5, 19, 0, |
184 | 19, 0, 19, 0, 19, 0, 19, 0, |
185 | 19, 0, 19, 0, 19, 0, 19, 0, |
186 | 19, 0, 19, 0, 19, 0, 19, 0, |
187 | 19, 0, 19, 15, 19, 0, 0, 19, |
188 | 0, 19, 0, 19, 0, 19, 0, 19, |
189 | 0, 19, 0, 19, 0, 19, 9, 19, |
190 | 0, 19, 0, 19, 0, 0, 19, 0, |
191 | 19, 0, 19, 0, 19, 0, 19, 11, |
192 | 19, 0, 19, 0, 19, 0, 19, 0, |
193 | 19, 7, 19, 3, 3, 3, 3, 3, |
194 | 3, 3, 0, 3, 3, 19, 19, 0 |
195 | }; |
196 | |
197 | #if defined(__GNUC__) |
198 | static __attribute__((used)) const char _ExpressionParser_eof_actions[] = { |
199 | #else |
200 | static const char _ExpressionParser_eof_actions[] = { |
201 | #endif |
202 | 0, 19, 19, 19, 19, 19, 19, 19, |
203 | 19, 19, 19, 19, 19, 19, 19, 19, |
204 | 19, 19, 19, 19, 19, 19, 19, 19, |
205 | 19, 19, 19, 19, 19, 19, 19, 19, |
206 | 19, 19, 19, 19, 19, 19, 19, 19, |
207 | 19, 19, 19, 19, 19, 19, 19, 19, |
208 | 19, 19, 19, 19, 19, 19, 19, 19, |
209 | 0, 0 |
210 | }; |
211 | |
212 | enum {ExpressionParser_start = 56}; |
213 | enum {ExpressionParser_first_final = 56}; |
214 | enum {ExpressionParser_error = 0}; |
215 | |
216 | enum {ExpressionParser_en_main = 56}; |
217 | |
218 | |
219 | #line 115 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
220 | |
221 | |
222 | } // namespace |
223 | |
224 | static |
225 | void initExt(hs_expr_ext *ext) { |
226 | memset(ext, 0, sizeof(*ext)); |
227 | ext->max_offset = MAX_OFFSET; |
228 | } |
229 | |
230 | bool HS_CDECL readExpression(const std::string &input, std::string &expr, |
231 | unsigned int *flags, hs_expr_ext *ext, |
232 | bool *must_be_ordered) { |
233 | assert(flags); |
234 | assert(ext); |
235 | |
236 | // Init flags and ext params. |
237 | *flags = 0; |
238 | initExt(ext); |
239 | if (must_be_ordered) { |
240 | *must_be_ordered = false; |
241 | } |
242 | |
243 | // Extract expr, which is easier to do in straight C++ than with Ragel. |
244 | if (input.empty() || input[0] != '/') { |
245 | return false; |
246 | } |
247 | size_t end = input.find_last_of('/'); |
248 | if (end == string::npos || end == 0) { |
249 | return false; |
250 | } |
251 | expr = input.substr(1, end - 1); |
252 | |
253 | // Use a Ragel scanner to handle flags and params. |
254 | const char *p = input.c_str() + end + 1; |
255 | const char *pe = input.c_str() + input.size(); |
256 | UNUSED const char *eof = pe; |
257 | UNUSED const char *ts = p, *te = p; |
258 | int cs; |
259 | UNUSED int act; |
260 | |
261 | assert(p); |
262 | assert(pe); |
263 | |
264 | // For storing integers as they're scanned. |
265 | u64a num = 0; |
266 | enum ParamKey key = PARAM_NONE; |
267 | |
268 | |
269 | #line 270 "ExpressionParser.cpp" |
270 | { |
271 | cs = ExpressionParser_start; |
272 | } |
273 | |
274 | #line 275 "ExpressionParser.cpp" |
275 | { |
276 | int _klen; |
277 | unsigned int _trans; |
278 | const char *_acts; |
279 | unsigned int _nacts; |
280 | const char *_keys; |
281 | |
282 | if ( p == pe ) |
283 | goto _test_eof; |
284 | if ( cs == 0 ) |
285 | goto _out; |
286 | _resume: |
287 | _keys = _ExpressionParser_trans_keys + _ExpressionParser_key_offsets[cs]; |
288 | _trans = _ExpressionParser_index_offsets[cs]; |
289 | |
290 | _klen = _ExpressionParser_single_lengths[cs]; |
291 | if ( _klen > 0 ) { |
292 | const char *_lower = _keys; |
293 | const char *_mid; |
294 | const char *_upper = _keys + _klen - 1; |
295 | while (1) { |
296 | if ( _upper < _lower ) |
297 | break; |
298 | |
299 | _mid = _lower + ((_upper-_lower) >> 1); |
300 | if ( (*p) < *_mid ) |
301 | _upper = _mid - 1; |
302 | else if ( (*p) > *_mid ) |
303 | _lower = _mid + 1; |
304 | else { |
305 | _trans += (unsigned int)(_mid - _keys); |
306 | goto _match; |
307 | } |
308 | } |
309 | _keys += _klen; |
310 | _trans += _klen; |
311 | } |
312 | |
313 | _klen = _ExpressionParser_range_lengths[cs]; |
314 | if ( _klen > 0 ) { |
315 | const char *_lower = _keys; |
316 | const char *_mid; |
317 | const char *_upper = _keys + (_klen<<1) - 2; |
318 | while (1) { |
319 | if ( _upper < _lower ) |
320 | break; |
321 | |
322 | _mid = _lower + (((_upper-_lower) >> 1) & ~1); |
323 | if ( (*p) < _mid[0] ) |
324 | _upper = _mid - 2; |
325 | else if ( (*p) > _mid[1] ) |
326 | _lower = _mid + 2; |
327 | else { |
328 | _trans += (unsigned int)((_mid - _keys)>>1); |
329 | goto _match; |
330 | } |
331 | } |
332 | _trans += _klen; |
333 | } |
334 | |
335 | _match: |
336 | cs = _ExpressionParser_trans_targs[_trans]; |
337 | |
338 | if ( _ExpressionParser_trans_actions[_trans] == 0 ) |
339 | goto _again; |
340 | |
341 | _acts = _ExpressionParser_actions + _ExpressionParser_trans_actions[_trans]; |
342 | _nacts = (unsigned int) *_acts++; |
343 | while ( _nacts-- > 0 ) |
344 | { |
345 | switch ( *_acts++ ) |
346 | { |
347 | case 0: |
348 | #line 59 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
349 | { |
350 | num = (num * 10) + ((*p) - '0'); |
351 | } |
352 | break; |
353 | case 1: |
354 | #line 63 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
355 | { |
356 | switch ((*p)) { |
357 | case 'i': *flags |= HS_FLAG_CASELESS; break; |
358 | case 's': *flags |= HS_FLAG_DOTALL; break; |
359 | case 'm': *flags |= HS_FLAG_MULTILINE; break; |
360 | case 'H': *flags |= HS_FLAG_SINGLEMATCH; break; |
361 | case 'O': |
362 | if (must_be_ordered) { |
363 | *must_be_ordered = true; |
364 | } |
365 | break; |
366 | case 'V': *flags |= HS_FLAG_ALLOWEMPTY; break; |
367 | case 'W': *flags |= HS_FLAG_UCP; break; |
368 | case '8': *flags |= HS_FLAG_UTF8; break; |
369 | case 'P': *flags |= HS_FLAG_PREFILTER; break; |
370 | case 'L': *flags |= HS_FLAG_SOM_LEFTMOST; break; |
371 | case 'C': *flags |= HS_FLAG_COMBINATION; break; |
372 | case 'Q': *flags |= HS_FLAG_QUIET; break; |
373 | default: {p++; goto _out; } |
374 | } |
375 | } |
376 | break; |
377 | case 2: |
378 | #line 85 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
379 | { |
380 | switch (key) { |
381 | case PARAM_MIN_OFFSET: |
382 | ext->flags |= HS_EXT_FLAG_MIN_OFFSET; |
383 | ext->min_offset = num; |
384 | break; |
385 | case PARAM_MAX_OFFSET: |
386 | ext->flags |= HS_EXT_FLAG_MAX_OFFSET; |
387 | ext->max_offset = num; |
388 | break; |
389 | case PARAM_MIN_LENGTH: |
390 | ext->flags |= HS_EXT_FLAG_MIN_LENGTH; |
391 | ext->min_length = num; |
392 | break; |
393 | case PARAM_EDIT_DISTANCE: |
394 | ext->flags |= HS_EXT_FLAG_EDIT_DISTANCE; |
395 | ext->edit_distance = num; |
396 | break; |
397 | case PARAM_HAMM_DISTANCE: |
398 | ext->flags |= HS_EXT_FLAG_HAMMING_DISTANCE; |
399 | ext->hamming_distance = num; |
400 | break; |
401 | case PARAM_NONE: |
402 | default: |
403 | // No key specified, syntax invalid. |
404 | return false; |
405 | } |
406 | } |
407 | break; |
408 | case 3: |
409 | #line 165 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
410 | { key = PARAM_MIN_OFFSET; } |
411 | break; |
412 | case 4: |
413 | #line 166 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
414 | { key = PARAM_MAX_OFFSET; } |
415 | break; |
416 | case 5: |
417 | #line 167 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
418 | { key = PARAM_MIN_LENGTH; } |
419 | break; |
420 | case 6: |
421 | #line 168 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
422 | { key = PARAM_EDIT_DISTANCE; } |
423 | break; |
424 | case 7: |
425 | #line 169 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
426 | { key = PARAM_HAMM_DISTANCE; } |
427 | break; |
428 | case 8: |
429 | #line 171 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
430 | {num = 0;} |
431 | break; |
432 | case 9: |
433 | #line 172 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
434 | { key = PARAM_NONE; } |
435 | break; |
436 | case 10: |
437 | #line 177 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
438 | { return false; } |
439 | break; |
440 | #line 441 "ExpressionParser.cpp" |
441 | } |
442 | } |
443 | |
444 | _again: |
445 | if ( cs == 0 ) |
446 | goto _out; |
447 | if ( ++p != pe ) |
448 | goto _resume; |
449 | _test_eof: {} |
450 | if ( p == eof ) |
451 | { |
452 | const char *__acts = _ExpressionParser_actions + _ExpressionParser_eof_actions[cs]; |
453 | unsigned int __nacts = (unsigned int) *__acts++; |
454 | while ( __nacts-- > 0 ) { |
455 | switch ( *__acts++ ) { |
456 | case 10: |
457 | #line 177 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
458 | { return false; } |
459 | break; |
460 | #line 461 "ExpressionParser.cpp" |
461 | } |
462 | } |
463 | } |
464 | |
465 | _out: {} |
466 | } |
467 | |
468 | #line 182 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl" |
469 | |
470 | |
471 | DEBUG_PRINTF("expr='%s', flags=%u\n" , expr.c_str(), *flags); |
472 | |
473 | return (cs != ExpressionParser_error) && (p == pe); |
474 | } |
475 | |