1
2#line 1 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
3/*
4 * Copyright (c) 2015-2018, Intel Corporation
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of Intel Corporation nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32
33#include "ExpressionParser.h"
34
35#include <cassert>
36#include <cstdio>
37#include <cstdlib>
38#include <cstring>
39#include <string>
40
41#include "ue2common.h"
42#include "hs_compile.h"
43
44
45using std::string;
46
47namespace { // anon
48
49enum ParamKey {
50 PARAM_NONE,
51 PARAM_MIN_OFFSET,
52 PARAM_MAX_OFFSET,
53 PARAM_MIN_LENGTH,
54 PARAM_EDIT_DISTANCE,
55 PARAM_HAMM_DISTANCE
56};
57
58
59#line 60 "ExpressionParser.cpp"
60#if defined(__GNUC__)
61static __attribute__((used)) const char _ExpressionParser_actions[] = {
62#else
63static const char _ExpressionParser_actions[] = {
64#endif
65 0, 1, 0, 1, 1, 1, 2, 1,
66 3, 1, 4, 1, 5, 1, 6, 1,
67 7, 1, 9, 1, 10, 2, 8, 0
68
69};
70
71#if defined(__GNUC__)
72static __attribute__((used)) const char _ExpressionParser_key_offsets[] = {
73#else
74static const char _ExpressionParser_key_offsets[] = {
75#endif
76 0, 0, 4, 8, 9, 10, 11, 12,
77 13, 14, 15, 16, 17, 18, 19, 20,
78 21, 23, 28, 31, 32, 33, 34, 35,
79 36, 37, 38, 39, 40, 41, 42, 43,
80 44, 45, 46, 48, 49, 50, 51, 52,
81 53, 54, 55, 56, 57, 58, 60, 61,
82 62, 63, 64, 65, 66, 67, 68, 69,
83 70, 82
84};
85
86#if defined(__GNUC__)
87static __attribute__((used)) const char _ExpressionParser_trans_keys[] = {
88#else
89static const char _ExpressionParser_trans_keys[] = {
90#endif
91 32, 101, 104, 109, 32, 101, 104, 109,
92 100, 105, 116, 95, 100, 105, 115, 116,
93 97, 110, 99, 101, 61, 48, 57, 32,
94 44, 125, 48, 57, 32, 44, 125, 97,
95 109, 109, 105, 110, 103, 95, 100, 105,
96 115, 116, 97, 110, 99, 101, 97, 105,
97 120, 95, 111, 102, 102, 115, 101, 116,
98 110, 95, 108, 111, 101, 110, 103, 116,
99 104, 102, 102, 115, 101, 116, 56, 67,
100 72, 76, 105, 109, 115, 123, 79, 81,
101 86, 87, 0
102};
103
104#if defined(__GNUC__)
105static __attribute__((used)) const char _ExpressionParser_single_lengths[] = {
106#else
107static const char _ExpressionParser_single_lengths[] = {
108#endif
109 0, 4, 4, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1,
111 0, 3, 3, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 2, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 2, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 8, 0
117};
118
119#if defined(__GNUC__)
120static __attribute__((used)) const char _ExpressionParser_range_lengths[] = {
121#else
122static const char _ExpressionParser_range_lengths[] = {
123#endif
124 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0,
126 1, 1, 0, 0, 0, 0, 0, 0,
127 0, 0, 0, 0, 0, 0, 0, 0,
128 0, 0, 0, 0, 0, 0, 0, 0,
129 0, 0, 0, 0, 0, 0, 0, 0,
130 0, 0, 0, 0, 0, 0, 0, 0,
131 2, 0
132};
133
134#if defined(__GNUC__)
135static __attribute__((used)) const unsigned char _ExpressionParser_index_offsets[] = {
136#else
137static const unsigned char _ExpressionParser_index_offsets[] = {
138#endif
139 0, 0, 5, 10, 12, 14, 16, 18,
140 20, 22, 24, 26, 28, 30, 32, 34,
141 36, 38, 43, 47, 49, 51, 53, 55,
142 57, 59, 61, 63, 65, 67, 69, 71,
143 73, 75, 77, 80, 82, 84, 86, 88,
144 90, 92, 94, 96, 98, 100, 103, 105,
145 107, 109, 111, 113, 115, 117, 119, 121,
146 123, 134
147};
148
149#if defined(__GNUC__)
150static __attribute__((used)) const char _ExpressionParser_trans_targs[] = {
151#else
152static const char _ExpressionParser_trans_targs[] = {
153#endif
154 2, 3, 19, 34, 0, 2, 3, 19,
155 34, 0, 4, 0, 5, 0, 6, 0,
156 7, 0, 8, 0, 9, 0, 10, 0,
157 11, 0, 12, 0, 13, 0, 14, 0,
158 15, 0, 16, 0, 17, 0, 18, 1,
159 57, 17, 0, 18, 1, 57, 0, 20,
160 0, 21, 0, 22, 0, 23, 0, 24,
161 0, 25, 0, 26, 0, 27, 0, 28,
162 0, 29, 0, 30, 0, 31, 0, 32,
163 0, 33, 0, 15, 0, 35, 43, 0,
164 36, 0, 37, 0, 38, 0, 39, 0,
165 40, 0, 41, 0, 42, 0, 15, 0,
166 44, 0, 45, 0, 46, 51, 0, 47,
167 0, 48, 0, 49, 0, 50, 0, 15,
168 0, 52, 0, 53, 0, 54, 0, 55,
169 0, 15, 0, 56, 56, 56, 56, 56,
170 56, 56, 1, 56, 56, 0, 0, 0
171};
172
173#if defined(__GNUC__)
174static __attribute__((used)) const char _ExpressionParser_trans_actions[] = {
175#else
176static const char _ExpressionParser_trans_actions[] = {
177#endif
178 17, 17, 17, 17, 19, 0, 0, 0,
179 0, 19, 0, 19, 0, 19, 0, 19,
180 0, 19, 0, 19, 0, 19, 0, 19,
181 0, 19, 0, 19, 0, 19, 0, 19,
182 13, 19, 0, 19, 21, 19, 0, 5,
183 5, 1, 19, 0, 5, 5, 19, 0,
184 19, 0, 19, 0, 19, 0, 19, 0,
185 19, 0, 19, 0, 19, 0, 19, 0,
186 19, 0, 19, 0, 19, 0, 19, 0,
187 19, 0, 19, 15, 19, 0, 0, 19,
188 0, 19, 0, 19, 0, 19, 0, 19,
189 0, 19, 0, 19, 0, 19, 9, 19,
190 0, 19, 0, 19, 0, 0, 19, 0,
191 19, 0, 19, 0, 19, 0, 19, 11,
192 19, 0, 19, 0, 19, 0, 19, 0,
193 19, 7, 19, 3, 3, 3, 3, 3,
194 3, 3, 0, 3, 3, 19, 19, 0
195};
196
197#if defined(__GNUC__)
198static __attribute__((used)) const char _ExpressionParser_eof_actions[] = {
199#else
200static const char _ExpressionParser_eof_actions[] = {
201#endif
202 0, 19, 19, 19, 19, 19, 19, 19,
203 19, 19, 19, 19, 19, 19, 19, 19,
204 19, 19, 19, 19, 19, 19, 19, 19,
205 19, 19, 19, 19, 19, 19, 19, 19,
206 19, 19, 19, 19, 19, 19, 19, 19,
207 19, 19, 19, 19, 19, 19, 19, 19,
208 19, 19, 19, 19, 19, 19, 19, 19,
209 0, 0
210};
211
212enum {ExpressionParser_start = 56};
213enum {ExpressionParser_first_final = 56};
214enum {ExpressionParser_error = 0};
215
216enum {ExpressionParser_en_main = 56};
217
218
219#line 115 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
220
221
222} // namespace
223
224static
225void initExt(hs_expr_ext *ext) {
226 memset(ext, 0, sizeof(*ext));
227 ext->max_offset = MAX_OFFSET;
228}
229
230bool HS_CDECL readExpression(const std::string &input, std::string &expr,
231 unsigned int *flags, hs_expr_ext *ext,
232 bool *must_be_ordered) {
233 assert(flags);
234 assert(ext);
235
236 // Init flags and ext params.
237 *flags = 0;
238 initExt(ext);
239 if (must_be_ordered) {
240 *must_be_ordered = false;
241 }
242
243 // Extract expr, which is easier to do in straight C++ than with Ragel.
244 if (input.empty() || input[0] != '/') {
245 return false;
246 }
247 size_t end = input.find_last_of('/');
248 if (end == string::npos || end == 0) {
249 return false;
250 }
251 expr = input.substr(1, end - 1);
252
253 // Use a Ragel scanner to handle flags and params.
254 const char *p = input.c_str() + end + 1;
255 const char *pe = input.c_str() + input.size();
256 UNUSED const char *eof = pe;
257 UNUSED const char *ts = p, *te = p;
258 int cs;
259 UNUSED int act;
260
261 assert(p);
262 assert(pe);
263
264 // For storing integers as they're scanned.
265 u64a num = 0;
266 enum ParamKey key = PARAM_NONE;
267
268
269#line 270 "ExpressionParser.cpp"
270 {
271 cs = ExpressionParser_start;
272 }
273
274#line 275 "ExpressionParser.cpp"
275 {
276 int _klen;
277 unsigned int _trans;
278 const char *_acts;
279 unsigned int _nacts;
280 const char *_keys;
281
282 if ( p == pe )
283 goto _test_eof;
284 if ( cs == 0 )
285 goto _out;
286_resume:
287 _keys = _ExpressionParser_trans_keys + _ExpressionParser_key_offsets[cs];
288 _trans = _ExpressionParser_index_offsets[cs];
289
290 _klen = _ExpressionParser_single_lengths[cs];
291 if ( _klen > 0 ) {
292 const char *_lower = _keys;
293 const char *_mid;
294 const char *_upper = _keys + _klen - 1;
295 while (1) {
296 if ( _upper < _lower )
297 break;
298
299 _mid = _lower + ((_upper-_lower) >> 1);
300 if ( (*p) < *_mid )
301 _upper = _mid - 1;
302 else if ( (*p) > *_mid )
303 _lower = _mid + 1;
304 else {
305 _trans += (unsigned int)(_mid - _keys);
306 goto _match;
307 }
308 }
309 _keys += _klen;
310 _trans += _klen;
311 }
312
313 _klen = _ExpressionParser_range_lengths[cs];
314 if ( _klen > 0 ) {
315 const char *_lower = _keys;
316 const char *_mid;
317 const char *_upper = _keys + (_klen<<1) - 2;
318 while (1) {
319 if ( _upper < _lower )
320 break;
321
322 _mid = _lower + (((_upper-_lower) >> 1) & ~1);
323 if ( (*p) < _mid[0] )
324 _upper = _mid - 2;
325 else if ( (*p) > _mid[1] )
326 _lower = _mid + 2;
327 else {
328 _trans += (unsigned int)((_mid - _keys)>>1);
329 goto _match;
330 }
331 }
332 _trans += _klen;
333 }
334
335_match:
336 cs = _ExpressionParser_trans_targs[_trans];
337
338 if ( _ExpressionParser_trans_actions[_trans] == 0 )
339 goto _again;
340
341 _acts = _ExpressionParser_actions + _ExpressionParser_trans_actions[_trans];
342 _nacts = (unsigned int) *_acts++;
343 while ( _nacts-- > 0 )
344 {
345 switch ( *_acts++ )
346 {
347 case 0:
348#line 59 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
349 {
350 num = (num * 10) + ((*p) - '0');
351 }
352 break;
353 case 1:
354#line 63 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
355 {
356 switch ((*p)) {
357 case 'i': *flags |= HS_FLAG_CASELESS; break;
358 case 's': *flags |= HS_FLAG_DOTALL; break;
359 case 'm': *flags |= HS_FLAG_MULTILINE; break;
360 case 'H': *flags |= HS_FLAG_SINGLEMATCH; break;
361 case 'O':
362 if (must_be_ordered) {
363 *must_be_ordered = true;
364 }
365 break;
366 case 'V': *flags |= HS_FLAG_ALLOWEMPTY; break;
367 case 'W': *flags |= HS_FLAG_UCP; break;
368 case '8': *flags |= HS_FLAG_UTF8; break;
369 case 'P': *flags |= HS_FLAG_PREFILTER; break;
370 case 'L': *flags |= HS_FLAG_SOM_LEFTMOST; break;
371 case 'C': *flags |= HS_FLAG_COMBINATION; break;
372 case 'Q': *flags |= HS_FLAG_QUIET; break;
373 default: {p++; goto _out; }
374 }
375 }
376 break;
377 case 2:
378#line 85 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
379 {
380 switch (key) {
381 case PARAM_MIN_OFFSET:
382 ext->flags |= HS_EXT_FLAG_MIN_OFFSET;
383 ext->min_offset = num;
384 break;
385 case PARAM_MAX_OFFSET:
386 ext->flags |= HS_EXT_FLAG_MAX_OFFSET;
387 ext->max_offset = num;
388 break;
389 case PARAM_MIN_LENGTH:
390 ext->flags |= HS_EXT_FLAG_MIN_LENGTH;
391 ext->min_length = num;
392 break;
393 case PARAM_EDIT_DISTANCE:
394 ext->flags |= HS_EXT_FLAG_EDIT_DISTANCE;
395 ext->edit_distance = num;
396 break;
397 case PARAM_HAMM_DISTANCE:
398 ext->flags |= HS_EXT_FLAG_HAMMING_DISTANCE;
399 ext->hamming_distance = num;
400 break;
401 case PARAM_NONE:
402 default:
403 // No key specified, syntax invalid.
404 return false;
405 }
406 }
407 break;
408 case 3:
409#line 165 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
410 { key = PARAM_MIN_OFFSET; }
411 break;
412 case 4:
413#line 166 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
414 { key = PARAM_MAX_OFFSET; }
415 break;
416 case 5:
417#line 167 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
418 { key = PARAM_MIN_LENGTH; }
419 break;
420 case 6:
421#line 168 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
422 { key = PARAM_EDIT_DISTANCE; }
423 break;
424 case 7:
425#line 169 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
426 { key = PARAM_HAMM_DISTANCE; }
427 break;
428 case 8:
429#line 171 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
430 {num = 0;}
431 break;
432 case 9:
433#line 172 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
434 { key = PARAM_NONE; }
435 break;
436 case 10:
437#line 177 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
438 { return false; }
439 break;
440#line 441 "ExpressionParser.cpp"
441 }
442 }
443
444_again:
445 if ( cs == 0 )
446 goto _out;
447 if ( ++p != pe )
448 goto _resume;
449 _test_eof: {}
450 if ( p == eof )
451 {
452 const char *__acts = _ExpressionParser_actions + _ExpressionParser_eof_actions[cs];
453 unsigned int __nacts = (unsigned int) *__acts++;
454 while ( __nacts-- > 0 ) {
455 switch ( *__acts++ ) {
456 case 10:
457#line 177 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
458 { return false; }
459 break;
460#line 461 "ExpressionParser.cpp"
461 }
462 }
463 }
464
465 _out: {}
466 }
467
468#line 182 "ClickHouse/contrib/hyperscan/util/ExpressionParser.rl"
469
470
471 DEBUG_PRINTF("expr='%s', flags=%u\n", expr.c_str(), *flags);
472
473 return (cs != ExpressionParser_error) && (p == pe);
474}
475