1 | /************************************************* |
2 | * Perl-Compatible Regular Expressions * |
3 | *************************************************/ |
4 | |
5 | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | and semantics are as close as possible to those of the Perl 5 language. |
7 | |
8 | Written by Philip Hazel |
9 | Copyright (c) 1997-2013 University of Cambridge |
10 | |
11 | The machine code generator part (this module) was written by Zoltan Herczeg |
12 | Copyright (c) 2010-2013 |
13 | |
14 | ----------------------------------------------------------------------------- |
15 | Redistribution and use in source and binary forms, with or without |
16 | modification, are permitted provided that the following conditions are met: |
17 | |
18 | * Redistributions of source code must retain the above copyright notice, |
19 | this list of conditions and the following disclaimer. |
20 | |
21 | * Redistributions in binary form must reproduce the above copyright |
22 | notice, this list of conditions and the following disclaimer in the |
23 | documentation and/or other materials provided with the distribution. |
24 | |
25 | * Neither the name of the University of Cambridge nor the names of its |
26 | contributors may be used to endorse or promote products derived from |
27 | this software without specific prior written permission. |
28 | |
29 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
30 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
31 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
32 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
33 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
34 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
35 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
36 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
37 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
38 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
39 | POSSIBILITY OF SUCH DAMAGE. |
40 | ----------------------------------------------------------------------------- |
41 | */ |
42 | |
43 | #include "pcre_config.h" |
44 | #include "pcre_internal.h" |
45 | |
46 | #if defined SUPPORT_JIT |
47 | |
48 | /* All-in-one: Since we use the JIT compiler only from here, |
49 | we just include it. This way we don't need to touch the build |
50 | system files. */ |
51 | |
52 | #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size) |
53 | #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr) |
54 | #define SLJIT_CONFIG_AUTO 1 |
55 | #define SLJIT_CONFIG_STATIC 1 |
56 | #define SLJIT_VERBOSE 0 |
57 | #define SLJIT_DEBUG 0 |
58 | |
59 | #include "sljit/sljitLir.c" |
60 | |
61 | #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED |
62 | #error Unsupported architecture |
63 | #endif |
64 | |
65 | /* Defines for debugging purposes. */ |
66 | |
67 | /* 1 - Use unoptimized capturing brackets. |
68 | 2 - Enable capture_last_ptr (includes option 1). */ |
69 | /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */ |
70 | |
71 | /* 1 - Always have a control head. */ |
72 | /* #define DEBUG_FORCE_CONTROL_HEAD 1 */ |
73 | |
74 | /* Allocate memory for the regex stack on the real machine stack. |
75 | Fast, but limited size. */ |
76 | #define MACHINE_STACK_SIZE 32768 |
77 | |
78 | /* Growth rate for stack allocated by the OS. Should be the multiply |
79 | of page size. */ |
80 | #define STACK_GROWTH_RATE 8192 |
81 | |
82 | /* Enable to check that the allocation could destroy temporaries. */ |
83 | #if defined SLJIT_DEBUG && SLJIT_DEBUG |
84 | #define DESTROY_REGISTERS 1 |
85 | #endif |
86 | |
87 | /* |
88 | Short summary about the backtracking mechanism empolyed by the jit code generator: |
89 | |
90 | The code generator follows the recursive nature of the PERL compatible regular |
91 | expressions. The basic blocks of regular expressions are condition checkers |
92 | whose execute different commands depending on the result of the condition check. |
93 | The relationship between the operators can be horizontal (concatenation) and |
94 | vertical (sub-expression) (See struct backtrack_common for more details). |
95 | |
96 | 'ab' - 'a' and 'b' regexps are concatenated |
97 | 'a+' - 'a' is the sub-expression of the '+' operator |
98 | |
99 | The condition checkers are boolean (true/false) checkers. Machine code is generated |
100 | for the checker itself and for the actions depending on the result of the checker. |
101 | The 'true' case is called as the matching path (expected path), and the other is called as |
102 | the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken |
103 | branches on the matching path. |
104 | |
105 | Greedy star operator (*) : |
106 | Matching path: match happens. |
107 | Backtrack path: match failed. |
108 | Non-greedy star operator (*?) : |
109 | Matching path: no need to perform a match. |
110 | Backtrack path: match is required. |
111 | |
112 | The following example shows how the code generated for a capturing bracket |
113 | with two alternatives. Let A, B, C, D are arbirary regular expressions, and |
114 | we have the following regular expression: |
115 | |
116 | A(B|C)D |
117 | |
118 | The generated code will be the following: |
119 | |
120 | A matching path |
121 | '(' matching path (pushing arguments to the stack) |
122 | B matching path |
123 | ')' matching path (pushing arguments to the stack) |
124 | D matching path |
125 | return with successful match |
126 | |
127 | D backtrack path |
128 | ')' backtrack path (If we arrived from "C" jump to the backtrack of "C") |
129 | B backtrack path |
130 | C expected path |
131 | jump to D matching path |
132 | C backtrack path |
133 | A backtrack path |
134 | |
135 | Notice, that the order of backtrack code paths are the opposite of the fast |
136 | code paths. In this way the topmost value on the stack is always belong |
137 | to the current backtrack code path. The backtrack path must check |
138 | whether there is a next alternative. If so, it needs to jump back to |
139 | the matching path eventually. Otherwise it needs to clear out its own stack |
140 | frame and continue the execution on the backtrack code paths. |
141 | */ |
142 | |
143 | /* |
144 | Saved stack frames: |
145 | |
146 | Atomic blocks and asserts require reloading the values of private data |
147 | when the backtrack mechanism performed. Because of OP_RECURSE, the data |
148 | are not necessarly known in compile time, thus we need a dynamic restore |
149 | mechanism. |
150 | |
151 | The stack frames are stored in a chain list, and have the following format: |
152 | ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ] |
153 | |
154 | Thus we can restore the private data to a particular point in the stack. |
155 | */ |
156 | |
157 | typedef struct jit_arguments { |
158 | /* Pointers first. */ |
159 | struct sljit_stack *stack; |
160 | const pcre_uchar *str; |
161 | const pcre_uchar *begin; |
162 | const pcre_uchar *end; |
163 | int *offsets; |
164 | pcre_uchar *uchar_ptr; |
165 | pcre_uchar *mark_ptr; |
166 | void *callout_data; |
167 | /* Everything else after. */ |
168 | sljit_u32 limit_match; |
169 | int real_offset_count; |
170 | int offset_count; |
171 | sljit_u8 notbol; |
172 | sljit_u8 noteol; |
173 | sljit_u8 notempty; |
174 | sljit_u8 notempty_atstart; |
175 | } jit_arguments; |
176 | |
177 | typedef struct executable_functions { |
178 | void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; |
179 | void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES]; |
180 | sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; |
181 | PUBL(jit_callback) callback; |
182 | void *userdata; |
183 | sljit_u32 top_bracket; |
184 | sljit_u32 limit_match; |
185 | } executable_functions; |
186 | |
187 | typedef struct jump_list { |
188 | struct sljit_jump *jump; |
189 | struct jump_list *next; |
190 | } jump_list; |
191 | |
192 | typedef struct stub_list { |
193 | struct sljit_jump *start; |
194 | struct sljit_label *quit; |
195 | struct stub_list *next; |
196 | } stub_list; |
197 | |
198 | typedef struct label_addr_list { |
199 | struct sljit_label *label; |
200 | sljit_uw *update_addr; |
201 | struct label_addr_list *next; |
202 | } label_addr_list; |
203 | |
204 | enum frame_types { |
205 | no_frame = -1, |
206 | no_stack = -2 |
207 | }; |
208 | |
209 | enum control_types { |
210 | type_mark = 0, |
211 | type_then_trap = 1 |
212 | }; |
213 | |
214 | typedef int (SLJIT_CALL *jit_function)(jit_arguments *args); |
215 | |
216 | /* The following structure is the key data type for the recursive |
217 | code generator. It is allocated by compile_matchingpath, and contains |
218 | the arguments for compile_backtrackingpath. Must be the first member |
219 | of its descendants. */ |
220 | typedef struct backtrack_common { |
221 | /* Concatenation stack. */ |
222 | struct backtrack_common *prev; |
223 | jump_list *nextbacktracks; |
224 | /* Internal stack (for component operators). */ |
225 | struct backtrack_common *top; |
226 | jump_list *topbacktracks; |
227 | /* Opcode pointer. */ |
228 | pcre_uchar *cc; |
229 | } backtrack_common; |
230 | |
231 | typedef struct assert_backtrack { |
232 | backtrack_common common; |
233 | jump_list *condfailed; |
234 | /* Less than 0 if a frame is not needed. */ |
235 | int framesize; |
236 | /* Points to our private memory word on the stack. */ |
237 | int private_data_ptr; |
238 | /* For iterators. */ |
239 | struct sljit_label *matchingpath; |
240 | } assert_backtrack; |
241 | |
242 | typedef struct bracket_backtrack { |
243 | backtrack_common common; |
244 | /* Where to coninue if an alternative is successfully matched. */ |
245 | struct sljit_label *alternative_matchingpath; |
246 | /* For rmin and rmax iterators. */ |
247 | struct sljit_label *recursive_matchingpath; |
248 | /* For greedy ? operator. */ |
249 | struct sljit_label *zero_matchingpath; |
250 | /* Contains the branches of a failed condition. */ |
251 | union { |
252 | /* Both for OP_COND, OP_SCOND. */ |
253 | jump_list *condfailed; |
254 | assert_backtrack *assert; |
255 | /* For OP_ONCE. Less than 0 if not needed. */ |
256 | int framesize; |
257 | } u; |
258 | /* Points to our private memory word on the stack. */ |
259 | int private_data_ptr; |
260 | } bracket_backtrack; |
261 | |
262 | typedef struct bracketpos_backtrack { |
263 | backtrack_common common; |
264 | /* Points to our private memory word on the stack. */ |
265 | int private_data_ptr; |
266 | /* Reverting stack is needed. */ |
267 | int framesize; |
268 | /* Allocated stack size. */ |
269 | int stacksize; |
270 | } bracketpos_backtrack; |
271 | |
272 | typedef struct braminzero_backtrack { |
273 | backtrack_common common; |
274 | struct sljit_label *matchingpath; |
275 | } braminzero_backtrack; |
276 | |
277 | typedef struct char_iterator_backtrack { |
278 | backtrack_common common; |
279 | /* Next iteration. */ |
280 | struct sljit_label *matchingpath; |
281 | union { |
282 | jump_list *backtracks; |
283 | struct { |
284 | unsigned int othercasebit; |
285 | pcre_uchar chr; |
286 | BOOL enabled; |
287 | } charpos; |
288 | } u; |
289 | } char_iterator_backtrack; |
290 | |
291 | typedef struct ref_iterator_backtrack { |
292 | backtrack_common common; |
293 | /* Next iteration. */ |
294 | struct sljit_label *matchingpath; |
295 | } ref_iterator_backtrack; |
296 | |
297 | typedef struct recurse_entry { |
298 | struct recurse_entry *next; |
299 | /* Contains the function entry. */ |
300 | struct sljit_label *entry; |
301 | /* Collects the calls until the function is not created. */ |
302 | jump_list *calls; |
303 | /* Points to the starting opcode. */ |
304 | sljit_sw start; |
305 | } recurse_entry; |
306 | |
307 | typedef struct recurse_backtrack { |
308 | backtrack_common common; |
309 | BOOL inlined_pattern; |
310 | } recurse_backtrack; |
311 | |
312 | #define OP_THEN_TRAP OP_TABLE_LENGTH |
313 | |
314 | typedef struct then_trap_backtrack { |
315 | backtrack_common common; |
316 | /* If then_trap is not NULL, this structure contains the real |
317 | then_trap for the backtracking path. */ |
318 | struct then_trap_backtrack *then_trap; |
319 | /* Points to the starting opcode. */ |
320 | sljit_sw start; |
321 | /* Exit point for the then opcodes of this alternative. */ |
322 | jump_list *quit; |
323 | /* Frame size of the current alternative. */ |
324 | int framesize; |
325 | } then_trap_backtrack; |
326 | |
327 | #define MAX_RANGE_SIZE 4 |
328 | |
329 | typedef struct compiler_common { |
330 | /* The sljit ceneric compiler. */ |
331 | struct sljit_compiler *compiler; |
332 | /* First byte code. */ |
333 | pcre_uchar *start; |
334 | /* Maps private data offset to each opcode. */ |
335 | sljit_s32 *private_data_ptrs; |
336 | /* Chain list of read-only data ptrs. */ |
337 | void *read_only_data_head; |
338 | /* Tells whether the capturing bracket is optimized. */ |
339 | sljit_u8 *optimized_cbracket; |
340 | /* Tells whether the starting offset is a target of then. */ |
341 | sljit_u8 *then_offsets; |
342 | /* Current position where a THEN must jump. */ |
343 | then_trap_backtrack *then_trap; |
344 | /* Starting offset of private data for capturing brackets. */ |
345 | sljit_s32 cbra_ptr; |
346 | /* Output vector starting point. Must be divisible by 2. */ |
347 | sljit_s32 ovector_start; |
348 | /* Points to the starting character of the current match. */ |
349 | sljit_s32 start_ptr; |
350 | /* Last known position of the requested byte. */ |
351 | sljit_s32 req_char_ptr; |
352 | /* Head of the last recursion. */ |
353 | sljit_s32 recursive_head_ptr; |
354 | /* First inspected character for partial matching. |
355 | (Needed for avoiding zero length partial matches.) */ |
356 | sljit_s32 start_used_ptr; |
357 | /* Starting pointer for partial soft matches. */ |
358 | sljit_s32 hit_start; |
359 | /* Pointer of the match end position. */ |
360 | sljit_s32 match_end_ptr; |
361 | /* Points to the marked string. */ |
362 | sljit_s32 mark_ptr; |
363 | /* Recursive control verb management chain. */ |
364 | sljit_s32 control_head_ptr; |
365 | /* Points to the last matched capture block index. */ |
366 | sljit_s32 capture_last_ptr; |
367 | /* Fast forward skipping byte code pointer. */ |
368 | pcre_uchar *fast_forward_bc_ptr; |
369 | /* Locals used by fast fail optimization. */ |
370 | sljit_s32 fast_fail_start_ptr; |
371 | sljit_s32 fast_fail_end_ptr; |
372 | |
373 | /* Flipped and lower case tables. */ |
374 | const sljit_u8 *fcc; |
375 | sljit_sw lcc; |
376 | /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */ |
377 | int mode; |
378 | /* TRUE, when minlength is greater than 0. */ |
379 | BOOL might_be_empty; |
380 | /* \K is found in the pattern. */ |
381 | BOOL has_set_som; |
382 | /* (*SKIP:arg) is found in the pattern. */ |
383 | BOOL has_skip_arg; |
384 | /* (*THEN) is found in the pattern. */ |
385 | BOOL has_then; |
386 | /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */ |
387 | BOOL has_skip_in_assert_back; |
388 | /* Currently in recurse or negative assert. */ |
389 | BOOL local_exit; |
390 | /* Currently in a positive assert. */ |
391 | BOOL positive_assert; |
392 | /* Newline control. */ |
393 | int nltype; |
394 | sljit_u32 nlmax; |
395 | sljit_u32 nlmin; |
396 | int newline; |
397 | int bsr_nltype; |
398 | sljit_u32 bsr_nlmax; |
399 | sljit_u32 bsr_nlmin; |
400 | /* Dollar endonly. */ |
401 | int endonly; |
402 | /* Tables. */ |
403 | sljit_sw ctypes; |
404 | /* Named capturing brackets. */ |
405 | pcre_uchar *name_table; |
406 | sljit_sw name_count; |
407 | sljit_sw name_entry_size; |
408 | |
409 | /* Labels and jump lists. */ |
410 | struct sljit_label *partialmatchlabel; |
411 | struct sljit_label *quit_label; |
412 | struct sljit_label *forced_quit_label; |
413 | struct sljit_label *accept_label; |
414 | struct sljit_label *ff_newline_shortcut; |
415 | stub_list *stubs; |
416 | label_addr_list *label_addrs; |
417 | recurse_entry *entries; |
418 | recurse_entry *currententry; |
419 | jump_list *partialmatch; |
420 | jump_list *quit; |
421 | jump_list *positive_assert_quit; |
422 | jump_list *forced_quit; |
423 | jump_list *accept; |
424 | jump_list *calllimit; |
425 | jump_list *stackalloc; |
426 | jump_list *revertframes; |
427 | jump_list *wordboundary; |
428 | jump_list *anynewline; |
429 | jump_list *hspace; |
430 | jump_list *vspace; |
431 | jump_list *casefulcmp; |
432 | jump_list *caselesscmp; |
433 | jump_list *reset_match; |
434 | BOOL jscript_compat; |
435 | #ifdef SUPPORT_UTF |
436 | BOOL utf; |
437 | #ifdef SUPPORT_UCP |
438 | BOOL use_ucp; |
439 | jump_list *getucd; |
440 | #endif |
441 | #ifdef COMPILE_PCRE8 |
442 | jump_list *utfreadchar; |
443 | jump_list *utfreadchar16; |
444 | jump_list *utfreadtype8; |
445 | #endif |
446 | #endif /* SUPPORT_UTF */ |
447 | } compiler_common; |
448 | |
449 | /* For byte_sequence_compare. */ |
450 | |
451 | typedef struct compare_context { |
452 | int length; |
453 | int sourcereg; |
454 | #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
455 | int ucharptr; |
456 | union { |
457 | sljit_s32 asint; |
458 | sljit_u16 asushort; |
459 | #if defined COMPILE_PCRE8 |
460 | sljit_u8 asbyte; |
461 | sljit_u8 asuchars[4]; |
462 | #elif defined COMPILE_PCRE16 |
463 | sljit_u16 asuchars[2]; |
464 | #elif defined COMPILE_PCRE32 |
465 | sljit_u32 asuchars[1]; |
466 | #endif |
467 | } c; |
468 | union { |
469 | sljit_s32 asint; |
470 | sljit_u16 asushort; |
471 | #if defined COMPILE_PCRE8 |
472 | sljit_u8 asbyte; |
473 | sljit_u8 asuchars[4]; |
474 | #elif defined COMPILE_PCRE16 |
475 | sljit_u16 asuchars[2]; |
476 | #elif defined COMPILE_PCRE32 |
477 | sljit_u32 asuchars[1]; |
478 | #endif |
479 | } oc; |
480 | #endif |
481 | } compare_context; |
482 | |
483 | /* Undefine sljit macros. */ |
484 | #undef CMP |
485 | |
486 | /* Used for accessing the elements of the stack. */ |
487 | #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw)) |
488 | |
489 | #define TMP1 SLJIT_R0 |
490 | #define TMP2 SLJIT_R2 |
491 | #define TMP3 SLJIT_R3 |
492 | #define STR_PTR SLJIT_S0 |
493 | #define STR_END SLJIT_S1 |
494 | #define STACK_TOP SLJIT_R1 |
495 | #define STACK_LIMIT SLJIT_S2 |
496 | #define COUNT_MATCH SLJIT_S3 |
497 | #define ARGUMENTS SLJIT_S4 |
498 | #define RETURN_ADDR SLJIT_R4 |
499 | |
500 | /* Local space layout. */ |
501 | /* These two locals can be used by the current opcode. */ |
502 | #define LOCALS0 (0 * sizeof(sljit_sw)) |
503 | #define LOCALS1 (1 * sizeof(sljit_sw)) |
504 | /* Two local variables for possessive quantifiers (char1 cannot use them). */ |
505 | #define POSSESSIVE0 (2 * sizeof(sljit_sw)) |
506 | #define POSSESSIVE1 (3 * sizeof(sljit_sw)) |
507 | /* Max limit of recursions. */ |
508 | #define LIMIT_MATCH (4 * sizeof(sljit_sw)) |
509 | /* The output vector is stored on the stack, and contains pointers |
510 | to characters. The vector data is divided into two groups: the first |
511 | group contains the start / end character pointers, and the second is |
512 | the start pointers when the end of the capturing group has not yet reached. */ |
513 | #define OVECTOR_START (common->ovector_start) |
514 | #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) |
515 | #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) |
516 | #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) |
517 | |
518 | #if defined COMPILE_PCRE8 |
519 | #define MOV_UCHAR SLJIT_MOV_U8 |
520 | #define MOVU_UCHAR SLJIT_MOVU_U8 |
521 | #elif defined COMPILE_PCRE16 |
522 | #define MOV_UCHAR SLJIT_MOV_U16 |
523 | #define MOVU_UCHAR SLJIT_MOVU_U16 |
524 | #elif defined COMPILE_PCRE32 |
525 | #define MOV_UCHAR SLJIT_MOV_U32 |
526 | #define MOVU_UCHAR SLJIT_MOVU_U32 |
527 | #else |
528 | #error Unsupported compiling mode |
529 | #endif |
530 | |
531 | /* Shortcuts. */ |
532 | #define DEFINE_COMPILER \ |
533 | struct sljit_compiler *compiler = common->compiler |
534 | #define OP1(op, dst, dstw, src, srcw) \ |
535 | sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) |
536 | #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ |
537 | sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) |
538 | #define LABEL() \ |
539 | sljit_emit_label(compiler) |
540 | #define JUMP(type) \ |
541 | sljit_emit_jump(compiler, (type)) |
542 | #define JUMPTO(type, label) \ |
543 | sljit_set_label(sljit_emit_jump(compiler, (type)), (label)) |
544 | #define JUMPHERE(jump) \ |
545 | sljit_set_label((jump), sljit_emit_label(compiler)) |
546 | #define SET_LABEL(jump, label) \ |
547 | sljit_set_label((jump), (label)) |
548 | #define CMP(type, src1, src1w, src2, src2w) \ |
549 | sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)) |
550 | #define CMPTO(type, src1, src1w, src2, src2w, label) \ |
551 | sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) |
552 | #define OP_FLAGS(op, dst, dstw, src, srcw, type) \ |
553 | sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type)) |
554 | #define GET_LOCAL_BASE(dst, dstw, offset) \ |
555 | sljit_get_local_base(compiler, (dst), (dstw), (offset)) |
556 | |
557 | #define READ_CHAR_MAX 0x7fffffff |
558 | |
559 | static pcre_uchar *bracketend(pcre_uchar *cc) |
560 | { |
561 | SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); |
562 | do cc += GET(cc, 1); while (*cc == OP_ALT); |
563 | SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); |
564 | cc += 1 + LINK_SIZE; |
565 | return cc; |
566 | } |
567 | |
568 | static int no_alternatives(pcre_uchar *cc) |
569 | { |
570 | int count = 0; |
571 | SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); |
572 | do |
573 | { |
574 | cc += GET(cc, 1); |
575 | count++; |
576 | } |
577 | while (*cc == OP_ALT); |
578 | SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); |
579 | return count; |
580 | } |
581 | |
582 | /* Functions whose might need modification for all new supported opcodes: |
583 | next_opcode |
584 | check_opcode_types |
585 | set_private_data_ptrs |
586 | get_framesize |
587 | init_frame |
588 | get_private_data_copy_length |
589 | copy_private_data |
590 | compile_matchingpath |
591 | compile_backtrackingpath |
592 | */ |
593 | |
594 | static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc) |
595 | { |
596 | SLJIT_UNUSED_ARG(common); |
597 | switch(*cc) |
598 | { |
599 | case OP_SOD: |
600 | case OP_SOM: |
601 | case OP_SET_SOM: |
602 | case OP_NOT_WORD_BOUNDARY: |
603 | case OP_WORD_BOUNDARY: |
604 | case OP_NOT_DIGIT: |
605 | case OP_DIGIT: |
606 | case OP_NOT_WHITESPACE: |
607 | case OP_WHITESPACE: |
608 | case OP_NOT_WORDCHAR: |
609 | case OP_WORDCHAR: |
610 | case OP_ANY: |
611 | case OP_ALLANY: |
612 | case OP_NOTPROP: |
613 | case OP_PROP: |
614 | case OP_ANYNL: |
615 | case OP_NOT_HSPACE: |
616 | case OP_HSPACE: |
617 | case OP_NOT_VSPACE: |
618 | case OP_VSPACE: |
619 | case OP_EXTUNI: |
620 | case OP_EODN: |
621 | case OP_EOD: |
622 | case OP_CIRC: |
623 | case OP_CIRCM: |
624 | case OP_DOLL: |
625 | case OP_DOLLM: |
626 | case OP_CRSTAR: |
627 | case OP_CRMINSTAR: |
628 | case OP_CRPLUS: |
629 | case OP_CRMINPLUS: |
630 | case OP_CRQUERY: |
631 | case OP_CRMINQUERY: |
632 | case OP_CRRANGE: |
633 | case OP_CRMINRANGE: |
634 | case OP_CRPOSSTAR: |
635 | case OP_CRPOSPLUS: |
636 | case OP_CRPOSQUERY: |
637 | case OP_CRPOSRANGE: |
638 | case OP_CLASS: |
639 | case OP_NCLASS: |
640 | case OP_REF: |
641 | case OP_REFI: |
642 | case OP_DNREF: |
643 | case OP_DNREFI: |
644 | case OP_RECURSE: |
645 | case OP_CALLOUT: |
646 | case OP_ALT: |
647 | case OP_KET: |
648 | case OP_KETRMAX: |
649 | case OP_KETRMIN: |
650 | case OP_KETRPOS: |
651 | case OP_REVERSE: |
652 | case OP_ASSERT: |
653 | case OP_ASSERT_NOT: |
654 | case OP_ASSERTBACK: |
655 | case OP_ASSERTBACK_NOT: |
656 | case OP_ONCE: |
657 | case OP_ONCE_NC: |
658 | case OP_BRA: |
659 | case OP_BRAPOS: |
660 | case OP_CBRA: |
661 | case OP_CBRAPOS: |
662 | case OP_COND: |
663 | case OP_SBRA: |
664 | case OP_SBRAPOS: |
665 | case OP_SCBRA: |
666 | case OP_SCBRAPOS: |
667 | case OP_SCOND: |
668 | case OP_CREF: |
669 | case OP_DNCREF: |
670 | case OP_RREF: |
671 | case OP_DNRREF: |
672 | case OP_DEF: |
673 | case OP_BRAZERO: |
674 | case OP_BRAMINZERO: |
675 | case OP_BRAPOSZERO: |
676 | case OP_PRUNE: |
677 | case OP_SKIP: |
678 | case OP_THEN: |
679 | case OP_COMMIT: |
680 | case OP_FAIL: |
681 | case OP_ACCEPT: |
682 | case OP_ASSERT_ACCEPT: |
683 | case OP_CLOSE: |
684 | case OP_SKIPZERO: |
685 | return cc + PRIV(OP_lengths)[*cc]; |
686 | |
687 | case OP_CHAR: |
688 | case OP_CHARI: |
689 | case OP_NOT: |
690 | case OP_NOTI: |
691 | case OP_STAR: |
692 | case OP_MINSTAR: |
693 | case OP_PLUS: |
694 | case OP_MINPLUS: |
695 | case OP_QUERY: |
696 | case OP_MINQUERY: |
697 | case OP_UPTO: |
698 | case OP_MINUPTO: |
699 | case OP_EXACT: |
700 | case OP_POSSTAR: |
701 | case OP_POSPLUS: |
702 | case OP_POSQUERY: |
703 | case OP_POSUPTO: |
704 | case OP_STARI: |
705 | case OP_MINSTARI: |
706 | case OP_PLUSI: |
707 | case OP_MINPLUSI: |
708 | case OP_QUERYI: |
709 | case OP_MINQUERYI: |
710 | case OP_UPTOI: |
711 | case OP_MINUPTOI: |
712 | case OP_EXACTI: |
713 | case OP_POSSTARI: |
714 | case OP_POSPLUSI: |
715 | case OP_POSQUERYI: |
716 | case OP_POSUPTOI: |
717 | case OP_NOTSTAR: |
718 | case OP_NOTMINSTAR: |
719 | case OP_NOTPLUS: |
720 | case OP_NOTMINPLUS: |
721 | case OP_NOTQUERY: |
722 | case OP_NOTMINQUERY: |
723 | case OP_NOTUPTO: |
724 | case OP_NOTMINUPTO: |
725 | case OP_NOTEXACT: |
726 | case OP_NOTPOSSTAR: |
727 | case OP_NOTPOSPLUS: |
728 | case OP_NOTPOSQUERY: |
729 | case OP_NOTPOSUPTO: |
730 | case OP_NOTSTARI: |
731 | case OP_NOTMINSTARI: |
732 | case OP_NOTPLUSI: |
733 | case OP_NOTMINPLUSI: |
734 | case OP_NOTQUERYI: |
735 | case OP_NOTMINQUERYI: |
736 | case OP_NOTUPTOI: |
737 | case OP_NOTMINUPTOI: |
738 | case OP_NOTEXACTI: |
739 | case OP_NOTPOSSTARI: |
740 | case OP_NOTPOSPLUSI: |
741 | case OP_NOTPOSQUERYI: |
742 | case OP_NOTPOSUPTOI: |
743 | cc += PRIV(OP_lengths)[*cc]; |
744 | #ifdef SUPPORT_UTF |
745 | if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
746 | #endif |
747 | return cc; |
748 | |
749 | /* Special cases. */ |
750 | case OP_TYPESTAR: |
751 | case OP_TYPEMINSTAR: |
752 | case OP_TYPEPLUS: |
753 | case OP_TYPEMINPLUS: |
754 | case OP_TYPEQUERY: |
755 | case OP_TYPEMINQUERY: |
756 | case OP_TYPEUPTO: |
757 | case OP_TYPEMINUPTO: |
758 | case OP_TYPEEXACT: |
759 | case OP_TYPEPOSSTAR: |
760 | case OP_TYPEPOSPLUS: |
761 | case OP_TYPEPOSQUERY: |
762 | case OP_TYPEPOSUPTO: |
763 | return cc + PRIV(OP_lengths)[*cc] - 1; |
764 | |
765 | case OP_ANYBYTE: |
766 | #ifdef SUPPORT_UTF |
767 | if (common->utf) return NULL; |
768 | #endif |
769 | return cc + 1; |
770 | |
771 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
772 | case OP_XCLASS: |
773 | return cc + GET(cc, 1); |
774 | #endif |
775 | |
776 | case OP_MARK: |
777 | case OP_PRUNE_ARG: |
778 | case OP_SKIP_ARG: |
779 | case OP_THEN_ARG: |
780 | return cc + 1 + 2 + cc[1]; |
781 | |
782 | default: |
783 | /* All opcodes are supported now! */ |
784 | SLJIT_ASSERT_STOP(); |
785 | return NULL; |
786 | } |
787 | } |
788 | |
789 | static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) |
790 | { |
791 | int count; |
792 | pcre_uchar *slot; |
793 | pcre_uchar *assert_back_end = cc - 1; |
794 | |
795 | /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ |
796 | while (cc < ccend) |
797 | { |
798 | switch(*cc) |
799 | { |
800 | case OP_SET_SOM: |
801 | common->has_set_som = TRUE; |
802 | common->might_be_empty = TRUE; |
803 | cc += 1; |
804 | break; |
805 | |
806 | case OP_REF: |
807 | case OP_REFI: |
808 | common->optimized_cbracket[GET2(cc, 1)] = 0; |
809 | cc += 1 + IMM2_SIZE; |
810 | break; |
811 | |
812 | case OP_CBRAPOS: |
813 | case OP_SCBRAPOS: |
814 | common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0; |
815 | cc += 1 + LINK_SIZE + IMM2_SIZE; |
816 | break; |
817 | |
818 | case OP_COND: |
819 | case OP_SCOND: |
820 | /* Only AUTO_CALLOUT can insert this opcode. We do |
821 | not intend to support this case. */ |
822 | if (cc[1 + LINK_SIZE] == OP_CALLOUT) |
823 | return FALSE; |
824 | cc += 1 + LINK_SIZE; |
825 | break; |
826 | |
827 | case OP_CREF: |
828 | common->optimized_cbracket[GET2(cc, 1)] = 0; |
829 | cc += 1 + IMM2_SIZE; |
830 | break; |
831 | |
832 | case OP_DNREF: |
833 | case OP_DNREFI: |
834 | case OP_DNCREF: |
835 | count = GET2(cc, 1 + IMM2_SIZE); |
836 | slot = common->name_table + GET2(cc, 1) * common->name_entry_size; |
837 | while (count-- > 0) |
838 | { |
839 | common->optimized_cbracket[GET2(slot, 0)] = 0; |
840 | slot += common->name_entry_size; |
841 | } |
842 | cc += 1 + 2 * IMM2_SIZE; |
843 | break; |
844 | |
845 | case OP_RECURSE: |
846 | /* Set its value only once. */ |
847 | if (common->recursive_head_ptr == 0) |
848 | { |
849 | common->recursive_head_ptr = common->ovector_start; |
850 | common->ovector_start += sizeof(sljit_sw); |
851 | } |
852 | cc += 1 + LINK_SIZE; |
853 | break; |
854 | |
855 | case OP_CALLOUT: |
856 | if (common->capture_last_ptr == 0) |
857 | { |
858 | common->capture_last_ptr = common->ovector_start; |
859 | common->ovector_start += sizeof(sljit_sw); |
860 | } |
861 | cc += 2 + 2 * LINK_SIZE; |
862 | break; |
863 | |
864 | case OP_ASSERTBACK: |
865 | slot = bracketend(cc); |
866 | if (slot > assert_back_end) |
867 | assert_back_end = slot; |
868 | cc += 1 + LINK_SIZE; |
869 | break; |
870 | |
871 | case OP_THEN_ARG: |
872 | common->has_then = TRUE; |
873 | common->control_head_ptr = 1; |
874 | /* Fall through. */ |
875 | |
876 | case OP_PRUNE_ARG: |
877 | case OP_MARK: |
878 | if (common->mark_ptr == 0) |
879 | { |
880 | common->mark_ptr = common->ovector_start; |
881 | common->ovector_start += sizeof(sljit_sw); |
882 | } |
883 | cc += 1 + 2 + cc[1]; |
884 | break; |
885 | |
886 | case OP_THEN: |
887 | common->has_then = TRUE; |
888 | common->control_head_ptr = 1; |
889 | cc += 1; |
890 | break; |
891 | |
892 | case OP_SKIP: |
893 | if (cc < assert_back_end) |
894 | common->has_skip_in_assert_back = TRUE; |
895 | cc += 1; |
896 | break; |
897 | |
898 | case OP_SKIP_ARG: |
899 | common->control_head_ptr = 1; |
900 | common->has_skip_arg = TRUE; |
901 | if (cc < assert_back_end) |
902 | common->has_skip_in_assert_back = TRUE; |
903 | cc += 1 + 2 + cc[1]; |
904 | break; |
905 | |
906 | default: |
907 | cc = next_opcode(common, cc); |
908 | if (cc == NULL) |
909 | return FALSE; |
910 | break; |
911 | } |
912 | } |
913 | return TRUE; |
914 | } |
915 | |
916 | static BOOL is_accelerated_repeat(pcre_uchar *cc) |
917 | { |
918 | switch(*cc) |
919 | { |
920 | case OP_TYPESTAR: |
921 | case OP_TYPEMINSTAR: |
922 | case OP_TYPEPLUS: |
923 | case OP_TYPEMINPLUS: |
924 | case OP_TYPEPOSSTAR: |
925 | case OP_TYPEPOSPLUS: |
926 | return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI); |
927 | |
928 | case OP_STAR: |
929 | case OP_MINSTAR: |
930 | case OP_PLUS: |
931 | case OP_MINPLUS: |
932 | case OP_POSSTAR: |
933 | case OP_POSPLUS: |
934 | |
935 | case OP_STARI: |
936 | case OP_MINSTARI: |
937 | case OP_PLUSI: |
938 | case OP_MINPLUSI: |
939 | case OP_POSSTARI: |
940 | case OP_POSPLUSI: |
941 | |
942 | case OP_NOTSTAR: |
943 | case OP_NOTMINSTAR: |
944 | case OP_NOTPLUS: |
945 | case OP_NOTMINPLUS: |
946 | case OP_NOTPOSSTAR: |
947 | case OP_NOTPOSPLUS: |
948 | |
949 | case OP_NOTSTARI: |
950 | case OP_NOTMINSTARI: |
951 | case OP_NOTPLUSI: |
952 | case OP_NOTMINPLUSI: |
953 | case OP_NOTPOSSTARI: |
954 | case OP_NOTPOSPLUSI: |
955 | return TRUE; |
956 | |
957 | case OP_CLASS: |
958 | case OP_NCLASS: |
959 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
960 | case OP_XCLASS: |
961 | cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar))); |
962 | #else |
963 | cc += (1 + (32 / sizeof(pcre_uchar))); |
964 | #endif |
965 | |
966 | switch(*cc) |
967 | { |
968 | case OP_CRSTAR: |
969 | case OP_CRMINSTAR: |
970 | case OP_CRPLUS: |
971 | case OP_CRMINPLUS: |
972 | case OP_CRPOSSTAR: |
973 | case OP_CRPOSPLUS: |
974 | return TRUE; |
975 | } |
976 | break; |
977 | } |
978 | return FALSE; |
979 | } |
980 | |
981 | static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start) |
982 | { |
983 | pcre_uchar *cc = common->start; |
984 | pcre_uchar *end; |
985 | |
986 | /* Skip not repeated brackets. */ |
987 | while (TRUE) |
988 | { |
989 | switch(*cc) |
990 | { |
991 | case OP_SOD: |
992 | case OP_SOM: |
993 | case OP_SET_SOM: |
994 | case OP_NOT_WORD_BOUNDARY: |
995 | case OP_WORD_BOUNDARY: |
996 | case OP_EODN: |
997 | case OP_EOD: |
998 | case OP_CIRC: |
999 | case OP_CIRCM: |
1000 | case OP_DOLL: |
1001 | case OP_DOLLM: |
1002 | /* Zero width assertions. */ |
1003 | cc++; |
1004 | continue; |
1005 | } |
1006 | |
1007 | if (*cc != OP_BRA && *cc != OP_CBRA) |
1008 | break; |
1009 | |
1010 | end = cc + GET(cc, 1); |
1011 | if (*end != OP_KET || PRIVATE_DATA(end) != 0) |
1012 | return FALSE; |
1013 | if (*cc == OP_CBRA) |
1014 | { |
1015 | if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) |
1016 | return FALSE; |
1017 | cc += IMM2_SIZE; |
1018 | } |
1019 | cc += 1 + LINK_SIZE; |
1020 | } |
1021 | |
1022 | if (is_accelerated_repeat(cc)) |
1023 | { |
1024 | common->fast_forward_bc_ptr = cc; |
1025 | common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; |
1026 | *private_data_start += sizeof(sljit_sw); |
1027 | return TRUE; |
1028 | } |
1029 | return FALSE; |
1030 | } |
1031 | |
1032 | static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth) |
1033 | { |
1034 | pcre_uchar *next_alt; |
1035 | |
1036 | SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA); |
1037 | |
1038 | if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) |
1039 | return; |
1040 | |
1041 | next_alt = bracketend(cc) - (1 + LINK_SIZE); |
1042 | if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0) |
1043 | return; |
1044 | |
1045 | do |
1046 | { |
1047 | next_alt = cc + GET(cc, 1); |
1048 | |
1049 | cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); |
1050 | |
1051 | while (TRUE) |
1052 | { |
1053 | switch(*cc) |
1054 | { |
1055 | case OP_SOD: |
1056 | case OP_SOM: |
1057 | case OP_SET_SOM: |
1058 | case OP_NOT_WORD_BOUNDARY: |
1059 | case OP_WORD_BOUNDARY: |
1060 | case OP_EODN: |
1061 | case OP_EOD: |
1062 | case OP_CIRC: |
1063 | case OP_CIRCM: |
1064 | case OP_DOLL: |
1065 | case OP_DOLLM: |
1066 | /* Zero width assertions. */ |
1067 | cc++; |
1068 | continue; |
1069 | } |
1070 | break; |
1071 | } |
1072 | |
1073 | if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA)) |
1074 | detect_fast_fail(common, cc, private_data_start, depth - 1); |
1075 | |
1076 | if (is_accelerated_repeat(cc)) |
1077 | { |
1078 | common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; |
1079 | |
1080 | if (common->fast_fail_start_ptr == 0) |
1081 | common->fast_fail_start_ptr = *private_data_start; |
1082 | |
1083 | *private_data_start += sizeof(sljit_sw); |
1084 | common->fast_fail_end_ptr = *private_data_start; |
1085 | |
1086 | if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) |
1087 | return; |
1088 | } |
1089 | |
1090 | cc = next_alt; |
1091 | } |
1092 | while (*cc == OP_ALT); |
1093 | } |
1094 | |
1095 | static int get_class_iterator_size(pcre_uchar *cc) |
1096 | { |
1097 | sljit_u32 min; |
1098 | sljit_u32 max; |
1099 | switch(*cc) |
1100 | { |
1101 | case OP_CRSTAR: |
1102 | case OP_CRPLUS: |
1103 | return 2; |
1104 | |
1105 | case OP_CRMINSTAR: |
1106 | case OP_CRMINPLUS: |
1107 | case OP_CRQUERY: |
1108 | case OP_CRMINQUERY: |
1109 | return 1; |
1110 | |
1111 | case OP_CRRANGE: |
1112 | case OP_CRMINRANGE: |
1113 | min = GET2(cc, 1); |
1114 | max = GET2(cc, 1 + IMM2_SIZE); |
1115 | if (max == 0) |
1116 | return (*cc == OP_CRRANGE) ? 2 : 1; |
1117 | max -= min; |
1118 | if (max > 2) |
1119 | max = 2; |
1120 | return max; |
1121 | |
1122 | default: |
1123 | return 0; |
1124 | } |
1125 | } |
1126 | |
1127 | static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin) |
1128 | { |
1129 | pcre_uchar *end = bracketend(begin); |
1130 | pcre_uchar *next; |
1131 | pcre_uchar *next_end; |
1132 | pcre_uchar *max_end; |
1133 | pcre_uchar type; |
1134 | sljit_sw length = end - begin; |
1135 | int min, max, i; |
1136 | |
1137 | /* Detect fixed iterations first. */ |
1138 | if (end[-(1 + LINK_SIZE)] != OP_KET) |
1139 | return FALSE; |
1140 | |
1141 | /* Already detected repeat. */ |
1142 | if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) |
1143 | return TRUE; |
1144 | |
1145 | next = end; |
1146 | min = 1; |
1147 | while (1) |
1148 | { |
1149 | if (*next != *begin) |
1150 | break; |
1151 | next_end = bracketend(next); |
1152 | if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0) |
1153 | break; |
1154 | next = next_end; |
1155 | min++; |
1156 | } |
1157 | |
1158 | if (min == 2) |
1159 | return FALSE; |
1160 | |
1161 | max = 0; |
1162 | max_end = next; |
1163 | if (*next == OP_BRAZERO || *next == OP_BRAMINZERO) |
1164 | { |
1165 | type = *next; |
1166 | while (1) |
1167 | { |
1168 | if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin) |
1169 | break; |
1170 | next_end = bracketend(next + 2 + LINK_SIZE); |
1171 | if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0) |
1172 | break; |
1173 | next = next_end; |
1174 | max++; |
1175 | } |
1176 | |
1177 | if (next[0] == type && next[1] == *begin && max >= 1) |
1178 | { |
1179 | next_end = bracketend(next + 1); |
1180 | if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0) |
1181 | { |
1182 | for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE) |
1183 | if (*next_end != OP_KET) |
1184 | break; |
1185 | |
1186 | if (i == max) |
1187 | { |
1188 | common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end; |
1189 | common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO; |
1190 | /* +2 the original and the last. */ |
1191 | common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2; |
1192 | if (min == 1) |
1193 | return TRUE; |
1194 | min--; |
1195 | max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE); |
1196 | } |
1197 | } |
1198 | } |
1199 | } |
1200 | |
1201 | if (min >= 3) |
1202 | { |
1203 | common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end; |
1204 | common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT; |
1205 | common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min; |
1206 | return TRUE; |
1207 | } |
1208 | |
1209 | return FALSE; |
1210 | } |
1211 | |
1212 | #define CASE_ITERATOR_PRIVATE_DATA_1 \ |
1213 | case OP_MINSTAR: \ |
1214 | case OP_MINPLUS: \ |
1215 | case OP_QUERY: \ |
1216 | case OP_MINQUERY: \ |
1217 | case OP_MINSTARI: \ |
1218 | case OP_MINPLUSI: \ |
1219 | case OP_QUERYI: \ |
1220 | case OP_MINQUERYI: \ |
1221 | case OP_NOTMINSTAR: \ |
1222 | case OP_NOTMINPLUS: \ |
1223 | case OP_NOTQUERY: \ |
1224 | case OP_NOTMINQUERY: \ |
1225 | case OP_NOTMINSTARI: \ |
1226 | case OP_NOTMINPLUSI: \ |
1227 | case OP_NOTQUERYI: \ |
1228 | case OP_NOTMINQUERYI: |
1229 | |
1230 | #define CASE_ITERATOR_PRIVATE_DATA_2A \ |
1231 | case OP_STAR: \ |
1232 | case OP_PLUS: \ |
1233 | case OP_STARI: \ |
1234 | case OP_PLUSI: \ |
1235 | case OP_NOTSTAR: \ |
1236 | case OP_NOTPLUS: \ |
1237 | case OP_NOTSTARI: \ |
1238 | case OP_NOTPLUSI: |
1239 | |
1240 | #define CASE_ITERATOR_PRIVATE_DATA_2B \ |
1241 | case OP_UPTO: \ |
1242 | case OP_MINUPTO: \ |
1243 | case OP_UPTOI: \ |
1244 | case OP_MINUPTOI: \ |
1245 | case OP_NOTUPTO: \ |
1246 | case OP_NOTMINUPTO: \ |
1247 | case OP_NOTUPTOI: \ |
1248 | case OP_NOTMINUPTOI: |
1249 | |
1250 | #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \ |
1251 | case OP_TYPEMINSTAR: \ |
1252 | case OP_TYPEMINPLUS: \ |
1253 | case OP_TYPEQUERY: \ |
1254 | case OP_TYPEMINQUERY: |
1255 | |
1256 | #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \ |
1257 | case OP_TYPESTAR: \ |
1258 | case OP_TYPEPLUS: |
1259 | |
1260 | #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \ |
1261 | case OP_TYPEUPTO: \ |
1262 | case OP_TYPEMINUPTO: |
1263 | |
1264 | static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend) |
1265 | { |
1266 | pcre_uchar *cc = common->start; |
1267 | pcre_uchar *alternative; |
1268 | pcre_uchar *end = NULL; |
1269 | int private_data_ptr = *private_data_start; |
1270 | int space, size, bracketlen; |
1271 | BOOL repeat_check = TRUE; |
1272 | |
1273 | while (cc < ccend) |
1274 | { |
1275 | space = 0; |
1276 | size = 0; |
1277 | bracketlen = 0; |
1278 | if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) |
1279 | break; |
1280 | |
1281 | if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) |
1282 | { |
1283 | if (detect_repeat(common, cc)) |
1284 | { |
1285 | /* These brackets are converted to repeats, so no global |
1286 | based single character repeat is allowed. */ |
1287 | if (cc >= end) |
1288 | end = bracketend(cc); |
1289 | } |
1290 | } |
1291 | repeat_check = TRUE; |
1292 | |
1293 | switch(*cc) |
1294 | { |
1295 | case OP_KET: |
1296 | if (common->private_data_ptrs[cc + 1 - common->start] != 0) |
1297 | { |
1298 | common->private_data_ptrs[cc - common->start] = private_data_ptr; |
1299 | private_data_ptr += sizeof(sljit_sw); |
1300 | cc += common->private_data_ptrs[cc + 1 - common->start]; |
1301 | } |
1302 | cc += 1 + LINK_SIZE; |
1303 | break; |
1304 | |
1305 | case OP_ASSERT: |
1306 | case OP_ASSERT_NOT: |
1307 | case OP_ASSERTBACK: |
1308 | case OP_ASSERTBACK_NOT: |
1309 | case OP_ONCE: |
1310 | case OP_ONCE_NC: |
1311 | case OP_BRAPOS: |
1312 | case OP_SBRA: |
1313 | case OP_SBRAPOS: |
1314 | case OP_SCOND: |
1315 | common->private_data_ptrs[cc - common->start] = private_data_ptr; |
1316 | private_data_ptr += sizeof(sljit_sw); |
1317 | bracketlen = 1 + LINK_SIZE; |
1318 | break; |
1319 | |
1320 | case OP_CBRAPOS: |
1321 | case OP_SCBRAPOS: |
1322 | common->private_data_ptrs[cc - common->start] = private_data_ptr; |
1323 | private_data_ptr += sizeof(sljit_sw); |
1324 | bracketlen = 1 + LINK_SIZE + IMM2_SIZE; |
1325 | break; |
1326 | |
1327 | case OP_COND: |
1328 | /* Might be a hidden SCOND. */ |
1329 | alternative = cc + GET(cc, 1); |
1330 | if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) |
1331 | { |
1332 | common->private_data_ptrs[cc - common->start] = private_data_ptr; |
1333 | private_data_ptr += sizeof(sljit_sw); |
1334 | } |
1335 | bracketlen = 1 + LINK_SIZE; |
1336 | break; |
1337 | |
1338 | case OP_BRA: |
1339 | bracketlen = 1 + LINK_SIZE; |
1340 | break; |
1341 | |
1342 | case OP_CBRA: |
1343 | case OP_SCBRA: |
1344 | bracketlen = 1 + LINK_SIZE + IMM2_SIZE; |
1345 | break; |
1346 | |
1347 | case OP_BRAZERO: |
1348 | case OP_BRAMINZERO: |
1349 | case OP_BRAPOSZERO: |
1350 | repeat_check = FALSE; |
1351 | size = 1; |
1352 | break; |
1353 | |
1354 | CASE_ITERATOR_PRIVATE_DATA_1 |
1355 | space = 1; |
1356 | size = -2; |
1357 | break; |
1358 | |
1359 | CASE_ITERATOR_PRIVATE_DATA_2A |
1360 | space = 2; |
1361 | size = -2; |
1362 | break; |
1363 | |
1364 | CASE_ITERATOR_PRIVATE_DATA_2B |
1365 | space = 2; |
1366 | size = -(2 + IMM2_SIZE); |
1367 | break; |
1368 | |
1369 | CASE_ITERATOR_TYPE_PRIVATE_DATA_1 |
1370 | space = 1; |
1371 | size = 1; |
1372 | break; |
1373 | |
1374 | CASE_ITERATOR_TYPE_PRIVATE_DATA_2A |
1375 | if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) |
1376 | space = 2; |
1377 | size = 1; |
1378 | break; |
1379 | |
1380 | case OP_TYPEUPTO: |
1381 | if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) |
1382 | space = 2; |
1383 | size = 1 + IMM2_SIZE; |
1384 | break; |
1385 | |
1386 | case OP_TYPEMINUPTO: |
1387 | space = 2; |
1388 | size = 1 + IMM2_SIZE; |
1389 | break; |
1390 | |
1391 | case OP_CLASS: |
1392 | case OP_NCLASS: |
1393 | space = get_class_iterator_size(cc + size); |
1394 | size = 1 + 32 / sizeof(pcre_uchar); |
1395 | break; |
1396 | |
1397 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
1398 | case OP_XCLASS: |
1399 | space = get_class_iterator_size(cc + size); |
1400 | size = GET(cc, 1); |
1401 | break; |
1402 | #endif |
1403 | |
1404 | default: |
1405 | cc = next_opcode(common, cc); |
1406 | SLJIT_ASSERT(cc != NULL); |
1407 | break; |
1408 | } |
1409 | |
1410 | /* Character iterators, which are not inside a repeated bracket, |
1411 | gets a private slot instead of allocating it on the stack. */ |
1412 | if (space > 0 && cc >= end) |
1413 | { |
1414 | common->private_data_ptrs[cc - common->start] = private_data_ptr; |
1415 | private_data_ptr += sizeof(sljit_sw) * space; |
1416 | } |
1417 | |
1418 | if (size != 0) |
1419 | { |
1420 | if (size < 0) |
1421 | { |
1422 | cc += -size; |
1423 | #ifdef SUPPORT_UTF |
1424 | if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1425 | #endif |
1426 | } |
1427 | else |
1428 | cc += size; |
1429 | } |
1430 | |
1431 | if (bracketlen > 0) |
1432 | { |
1433 | if (cc >= end) |
1434 | { |
1435 | end = bracketend(cc); |
1436 | if (end[-1 - LINK_SIZE] == OP_KET) |
1437 | end = NULL; |
1438 | } |
1439 | cc += bracketlen; |
1440 | } |
1441 | } |
1442 | *private_data_start = private_data_ptr; |
1443 | } |
1444 | |
1445 | /* Returns with a frame_types (always < 0) if no need for frame. */ |
1446 | static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head) |
1447 | { |
1448 | int length = 0; |
1449 | int possessive = 0; |
1450 | BOOL stack_restore = FALSE; |
1451 | BOOL setsom_found = recursive; |
1452 | BOOL setmark_found = recursive; |
1453 | /* The last capture is a local variable even for recursions. */ |
1454 | BOOL capture_last_found = FALSE; |
1455 | |
1456 | #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD |
1457 | SLJIT_ASSERT(common->control_head_ptr != 0); |
1458 | *needs_control_head = TRUE; |
1459 | #else |
1460 | *needs_control_head = FALSE; |
1461 | #endif |
1462 | |
1463 | if (ccend == NULL) |
1464 | { |
1465 | ccend = bracketend(cc) - (1 + LINK_SIZE); |
1466 | if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) |
1467 | { |
1468 | possessive = length = (common->capture_last_ptr != 0) ? 5 : 3; |
1469 | /* This is correct regardless of common->capture_last_ptr. */ |
1470 | capture_last_found = TRUE; |
1471 | } |
1472 | cc = next_opcode(common, cc); |
1473 | } |
1474 | |
1475 | SLJIT_ASSERT(cc != NULL); |
1476 | while (cc < ccend) |
1477 | switch(*cc) |
1478 | { |
1479 | case OP_SET_SOM: |
1480 | SLJIT_ASSERT(common->has_set_som); |
1481 | stack_restore = TRUE; |
1482 | if (!setsom_found) |
1483 | { |
1484 | length += 2; |
1485 | setsom_found = TRUE; |
1486 | } |
1487 | cc += 1; |
1488 | break; |
1489 | |
1490 | case OP_MARK: |
1491 | case OP_PRUNE_ARG: |
1492 | case OP_THEN_ARG: |
1493 | SLJIT_ASSERT(common->mark_ptr != 0); |
1494 | stack_restore = TRUE; |
1495 | if (!setmark_found) |
1496 | { |
1497 | length += 2; |
1498 | setmark_found = TRUE; |
1499 | } |
1500 | if (common->control_head_ptr != 0) |
1501 | *needs_control_head = TRUE; |
1502 | cc += 1 + 2 + cc[1]; |
1503 | break; |
1504 | |
1505 | case OP_RECURSE: |
1506 | stack_restore = TRUE; |
1507 | if (common->has_set_som && !setsom_found) |
1508 | { |
1509 | length += 2; |
1510 | setsom_found = TRUE; |
1511 | } |
1512 | if (common->mark_ptr != 0 && !setmark_found) |
1513 | { |
1514 | length += 2; |
1515 | setmark_found = TRUE; |
1516 | } |
1517 | if (common->capture_last_ptr != 0 && !capture_last_found) |
1518 | { |
1519 | length += 2; |
1520 | capture_last_found = TRUE; |
1521 | } |
1522 | cc += 1 + LINK_SIZE; |
1523 | break; |
1524 | |
1525 | case OP_CBRA: |
1526 | case OP_CBRAPOS: |
1527 | case OP_SCBRA: |
1528 | case OP_SCBRAPOS: |
1529 | stack_restore = TRUE; |
1530 | if (common->capture_last_ptr != 0 && !capture_last_found) |
1531 | { |
1532 | length += 2; |
1533 | capture_last_found = TRUE; |
1534 | } |
1535 | length += 3; |
1536 | cc += 1 + LINK_SIZE + IMM2_SIZE; |
1537 | break; |
1538 | |
1539 | case OP_THEN: |
1540 | stack_restore = TRUE; |
1541 | if (common->control_head_ptr != 0) |
1542 | *needs_control_head = TRUE; |
1543 | cc ++; |
1544 | break; |
1545 | |
1546 | default: |
1547 | stack_restore = TRUE; |
1548 | /* Fall through. */ |
1549 | |
1550 | case OP_NOT_WORD_BOUNDARY: |
1551 | case OP_WORD_BOUNDARY: |
1552 | case OP_NOT_DIGIT: |
1553 | case OP_DIGIT: |
1554 | case OP_NOT_WHITESPACE: |
1555 | case OP_WHITESPACE: |
1556 | case OP_NOT_WORDCHAR: |
1557 | case OP_WORDCHAR: |
1558 | case OP_ANY: |
1559 | case OP_ALLANY: |
1560 | case OP_ANYBYTE: |
1561 | case OP_NOTPROP: |
1562 | case OP_PROP: |
1563 | case OP_ANYNL: |
1564 | case OP_NOT_HSPACE: |
1565 | case OP_HSPACE: |
1566 | case OP_NOT_VSPACE: |
1567 | case OP_VSPACE: |
1568 | case OP_EXTUNI: |
1569 | case OP_EODN: |
1570 | case OP_EOD: |
1571 | case OP_CIRC: |
1572 | case OP_CIRCM: |
1573 | case OP_DOLL: |
1574 | case OP_DOLLM: |
1575 | case OP_CHAR: |
1576 | case OP_CHARI: |
1577 | case OP_NOT: |
1578 | case OP_NOTI: |
1579 | |
1580 | case OP_EXACT: |
1581 | case OP_POSSTAR: |
1582 | case OP_POSPLUS: |
1583 | case OP_POSQUERY: |
1584 | case OP_POSUPTO: |
1585 | |
1586 | case OP_EXACTI: |
1587 | case OP_POSSTARI: |
1588 | case OP_POSPLUSI: |
1589 | case OP_POSQUERYI: |
1590 | case OP_POSUPTOI: |
1591 | |
1592 | case OP_NOTEXACT: |
1593 | case OP_NOTPOSSTAR: |
1594 | case OP_NOTPOSPLUS: |
1595 | case OP_NOTPOSQUERY: |
1596 | case OP_NOTPOSUPTO: |
1597 | |
1598 | case OP_NOTEXACTI: |
1599 | case OP_NOTPOSSTARI: |
1600 | case OP_NOTPOSPLUSI: |
1601 | case OP_NOTPOSQUERYI: |
1602 | case OP_NOTPOSUPTOI: |
1603 | |
1604 | case OP_TYPEEXACT: |
1605 | case OP_TYPEPOSSTAR: |
1606 | case OP_TYPEPOSPLUS: |
1607 | case OP_TYPEPOSQUERY: |
1608 | case OP_TYPEPOSUPTO: |
1609 | |
1610 | case OP_CLASS: |
1611 | case OP_NCLASS: |
1612 | case OP_XCLASS: |
1613 | case OP_CALLOUT: |
1614 | |
1615 | cc = next_opcode(common, cc); |
1616 | SLJIT_ASSERT(cc != NULL); |
1617 | break; |
1618 | } |
1619 | |
1620 | /* Possessive quantifiers can use a special case. */ |
1621 | if (SLJIT_UNLIKELY(possessive == length)) |
1622 | return stack_restore ? no_frame : no_stack; |
1623 | |
1624 | if (length > 0) |
1625 | return length + 1; |
1626 | return stack_restore ? no_frame : no_stack; |
1627 | } |
1628 | |
1629 | static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive) |
1630 | { |
1631 | DEFINE_COMPILER; |
1632 | BOOL setsom_found = recursive; |
1633 | BOOL setmark_found = recursive; |
1634 | /* The last capture is a local variable even for recursions. */ |
1635 | BOOL capture_last_found = FALSE; |
1636 | int offset; |
1637 | |
1638 | /* >= 1 + shortest item size (2) */ |
1639 | SLJIT_UNUSED_ARG(stacktop); |
1640 | SLJIT_ASSERT(stackpos >= stacktop + 2); |
1641 | |
1642 | stackpos = STACK(stackpos); |
1643 | if (ccend == NULL) |
1644 | { |
1645 | ccend = bracketend(cc) - (1 + LINK_SIZE); |
1646 | if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)) |
1647 | cc = next_opcode(common, cc); |
1648 | } |
1649 | |
1650 | SLJIT_ASSERT(cc != NULL); |
1651 | while (cc < ccend) |
1652 | switch(*cc) |
1653 | { |
1654 | case OP_SET_SOM: |
1655 | SLJIT_ASSERT(common->has_set_som); |
1656 | if (!setsom_found) |
1657 | { |
1658 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
1659 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); |
1660 | stackpos += (int)sizeof(sljit_sw); |
1661 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1662 | stackpos += (int)sizeof(sljit_sw); |
1663 | setsom_found = TRUE; |
1664 | } |
1665 | cc += 1; |
1666 | break; |
1667 | |
1668 | case OP_MARK: |
1669 | case OP_PRUNE_ARG: |
1670 | case OP_THEN_ARG: |
1671 | SLJIT_ASSERT(common->mark_ptr != 0); |
1672 | if (!setmark_found) |
1673 | { |
1674 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); |
1675 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); |
1676 | stackpos += (int)sizeof(sljit_sw); |
1677 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1678 | stackpos += (int)sizeof(sljit_sw); |
1679 | setmark_found = TRUE; |
1680 | } |
1681 | cc += 1 + 2 + cc[1]; |
1682 | break; |
1683 | |
1684 | case OP_RECURSE: |
1685 | if (common->has_set_som && !setsom_found) |
1686 | { |
1687 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
1688 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); |
1689 | stackpos += (int)sizeof(sljit_sw); |
1690 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1691 | stackpos += (int)sizeof(sljit_sw); |
1692 | setsom_found = TRUE; |
1693 | } |
1694 | if (common->mark_ptr != 0 && !setmark_found) |
1695 | { |
1696 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); |
1697 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); |
1698 | stackpos += (int)sizeof(sljit_sw); |
1699 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1700 | stackpos += (int)sizeof(sljit_sw); |
1701 | setmark_found = TRUE; |
1702 | } |
1703 | if (common->capture_last_ptr != 0 && !capture_last_found) |
1704 | { |
1705 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); |
1706 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); |
1707 | stackpos += (int)sizeof(sljit_sw); |
1708 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1709 | stackpos += (int)sizeof(sljit_sw); |
1710 | capture_last_found = TRUE; |
1711 | } |
1712 | cc += 1 + LINK_SIZE; |
1713 | break; |
1714 | |
1715 | case OP_CBRA: |
1716 | case OP_CBRAPOS: |
1717 | case OP_SCBRA: |
1718 | case OP_SCBRAPOS: |
1719 | if (common->capture_last_ptr != 0 && !capture_last_found) |
1720 | { |
1721 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); |
1722 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); |
1723 | stackpos += (int)sizeof(sljit_sw); |
1724 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1725 | stackpos += (int)sizeof(sljit_sw); |
1726 | capture_last_found = TRUE; |
1727 | } |
1728 | offset = (GET2(cc, 1 + LINK_SIZE)) << 1; |
1729 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); |
1730 | stackpos += (int)sizeof(sljit_sw); |
1731 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
1732 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
1733 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1734 | stackpos += (int)sizeof(sljit_sw); |
1735 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); |
1736 | stackpos += (int)sizeof(sljit_sw); |
1737 | |
1738 | cc += 1 + LINK_SIZE + IMM2_SIZE; |
1739 | break; |
1740 | |
1741 | default: |
1742 | cc = next_opcode(common, cc); |
1743 | SLJIT_ASSERT(cc != NULL); |
1744 | break; |
1745 | } |
1746 | |
1747 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0); |
1748 | SLJIT_ASSERT(stackpos == STACK(stacktop)); |
1749 | } |
1750 | |
1751 | static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head) |
1752 | { |
1753 | int private_data_length = needs_control_head ? 3 : 2; |
1754 | int size; |
1755 | pcre_uchar *alternative; |
1756 | /* Calculate the sum of the private machine words. */ |
1757 | while (cc < ccend) |
1758 | { |
1759 | size = 0; |
1760 | switch(*cc) |
1761 | { |
1762 | case OP_KET: |
1763 | if (PRIVATE_DATA(cc) != 0) |
1764 | { |
1765 | private_data_length++; |
1766 | SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); |
1767 | cc += PRIVATE_DATA(cc + 1); |
1768 | } |
1769 | cc += 1 + LINK_SIZE; |
1770 | break; |
1771 | |
1772 | case OP_ASSERT: |
1773 | case OP_ASSERT_NOT: |
1774 | case OP_ASSERTBACK: |
1775 | case OP_ASSERTBACK_NOT: |
1776 | case OP_ONCE: |
1777 | case OP_ONCE_NC: |
1778 | case OP_BRAPOS: |
1779 | case OP_SBRA: |
1780 | case OP_SBRAPOS: |
1781 | case OP_SCOND: |
1782 | private_data_length++; |
1783 | SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); |
1784 | cc += 1 + LINK_SIZE; |
1785 | break; |
1786 | |
1787 | case OP_CBRA: |
1788 | case OP_SCBRA: |
1789 | if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) |
1790 | private_data_length++; |
1791 | cc += 1 + LINK_SIZE + IMM2_SIZE; |
1792 | break; |
1793 | |
1794 | case OP_CBRAPOS: |
1795 | case OP_SCBRAPOS: |
1796 | private_data_length += 2; |
1797 | cc += 1 + LINK_SIZE + IMM2_SIZE; |
1798 | break; |
1799 | |
1800 | case OP_COND: |
1801 | /* Might be a hidden SCOND. */ |
1802 | alternative = cc + GET(cc, 1); |
1803 | if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) |
1804 | private_data_length++; |
1805 | cc += 1 + LINK_SIZE; |
1806 | break; |
1807 | |
1808 | CASE_ITERATOR_PRIVATE_DATA_1 |
1809 | if (PRIVATE_DATA(cc)) |
1810 | private_data_length++; |
1811 | cc += 2; |
1812 | #ifdef SUPPORT_UTF |
1813 | if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1814 | #endif |
1815 | break; |
1816 | |
1817 | CASE_ITERATOR_PRIVATE_DATA_2A |
1818 | if (PRIVATE_DATA(cc)) |
1819 | private_data_length += 2; |
1820 | cc += 2; |
1821 | #ifdef SUPPORT_UTF |
1822 | if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1823 | #endif |
1824 | break; |
1825 | |
1826 | CASE_ITERATOR_PRIVATE_DATA_2B |
1827 | if (PRIVATE_DATA(cc)) |
1828 | private_data_length += 2; |
1829 | cc += 2 + IMM2_SIZE; |
1830 | #ifdef SUPPORT_UTF |
1831 | if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1832 | #endif |
1833 | break; |
1834 | |
1835 | CASE_ITERATOR_TYPE_PRIVATE_DATA_1 |
1836 | if (PRIVATE_DATA(cc)) |
1837 | private_data_length++; |
1838 | cc += 1; |
1839 | break; |
1840 | |
1841 | CASE_ITERATOR_TYPE_PRIVATE_DATA_2A |
1842 | if (PRIVATE_DATA(cc)) |
1843 | private_data_length += 2; |
1844 | cc += 1; |
1845 | break; |
1846 | |
1847 | CASE_ITERATOR_TYPE_PRIVATE_DATA_2B |
1848 | if (PRIVATE_DATA(cc)) |
1849 | private_data_length += 2; |
1850 | cc += 1 + IMM2_SIZE; |
1851 | break; |
1852 | |
1853 | case OP_CLASS: |
1854 | case OP_NCLASS: |
1855 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
1856 | case OP_XCLASS: |
1857 | size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); |
1858 | #else |
1859 | size = 1 + 32 / (int)sizeof(pcre_uchar); |
1860 | #endif |
1861 | if (PRIVATE_DATA(cc)) |
1862 | private_data_length += get_class_iterator_size(cc + size); |
1863 | cc += size; |
1864 | break; |
1865 | |
1866 | default: |
1867 | cc = next_opcode(common, cc); |
1868 | SLJIT_ASSERT(cc != NULL); |
1869 | break; |
1870 | } |
1871 | } |
1872 | SLJIT_ASSERT(cc == ccend); |
1873 | return private_data_length; |
1874 | } |
1875 | |
1876 | static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, |
1877 | BOOL save, int stackptr, int stacktop, BOOL needs_control_head) |
1878 | { |
1879 | DEFINE_COMPILER; |
1880 | int srcw[2]; |
1881 | int count, size; |
1882 | BOOL tmp1next = TRUE; |
1883 | BOOL tmp1empty = TRUE; |
1884 | BOOL tmp2empty = TRUE; |
1885 | pcre_uchar *alternative; |
1886 | enum { |
1887 | start, |
1888 | loop, |
1889 | end |
1890 | } status; |
1891 | |
1892 | status = save ? start : loop; |
1893 | stackptr = STACK(stackptr - 2); |
1894 | stacktop = STACK(stacktop - 1); |
1895 | |
1896 | if (!save) |
1897 | { |
1898 | stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw); |
1899 | if (stackptr < stacktop) |
1900 | { |
1901 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
1902 | stackptr += sizeof(sljit_sw); |
1903 | tmp1empty = FALSE; |
1904 | } |
1905 | if (stackptr < stacktop) |
1906 | { |
1907 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
1908 | stackptr += sizeof(sljit_sw); |
1909 | tmp2empty = FALSE; |
1910 | } |
1911 | /* The tmp1next must be TRUE in either way. */ |
1912 | } |
1913 | |
1914 | do |
1915 | { |
1916 | count = 0; |
1917 | switch(status) |
1918 | { |
1919 | case start: |
1920 | SLJIT_ASSERT(save && common->recursive_head_ptr != 0); |
1921 | count = 1; |
1922 | srcw[0] = common->recursive_head_ptr; |
1923 | if (needs_control_head) |
1924 | { |
1925 | SLJIT_ASSERT(common->control_head_ptr != 0); |
1926 | count = 2; |
1927 | srcw[1] = common->control_head_ptr; |
1928 | } |
1929 | status = loop; |
1930 | break; |
1931 | |
1932 | case loop: |
1933 | if (cc >= ccend) |
1934 | { |
1935 | status = end; |
1936 | break; |
1937 | } |
1938 | |
1939 | switch(*cc) |
1940 | { |
1941 | case OP_KET: |
1942 | if (PRIVATE_DATA(cc) != 0) |
1943 | { |
1944 | count = 1; |
1945 | srcw[0] = PRIVATE_DATA(cc); |
1946 | SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); |
1947 | cc += PRIVATE_DATA(cc + 1); |
1948 | } |
1949 | cc += 1 + LINK_SIZE; |
1950 | break; |
1951 | |
1952 | case OP_ASSERT: |
1953 | case OP_ASSERT_NOT: |
1954 | case OP_ASSERTBACK: |
1955 | case OP_ASSERTBACK_NOT: |
1956 | case OP_ONCE: |
1957 | case OP_ONCE_NC: |
1958 | case OP_BRAPOS: |
1959 | case OP_SBRA: |
1960 | case OP_SBRAPOS: |
1961 | case OP_SCOND: |
1962 | count = 1; |
1963 | srcw[0] = PRIVATE_DATA(cc); |
1964 | SLJIT_ASSERT(srcw[0] != 0); |
1965 | cc += 1 + LINK_SIZE; |
1966 | break; |
1967 | |
1968 | case OP_CBRA: |
1969 | case OP_SCBRA: |
1970 | if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) |
1971 | { |
1972 | count = 1; |
1973 | srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); |
1974 | } |
1975 | cc += 1 + LINK_SIZE + IMM2_SIZE; |
1976 | break; |
1977 | |
1978 | case OP_CBRAPOS: |
1979 | case OP_SCBRAPOS: |
1980 | count = 2; |
1981 | srcw[0] = PRIVATE_DATA(cc); |
1982 | srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); |
1983 | SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0); |
1984 | cc += 1 + LINK_SIZE + IMM2_SIZE; |
1985 | break; |
1986 | |
1987 | case OP_COND: |
1988 | /* Might be a hidden SCOND. */ |
1989 | alternative = cc + GET(cc, 1); |
1990 | if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) |
1991 | { |
1992 | count = 1; |
1993 | srcw[0] = PRIVATE_DATA(cc); |
1994 | SLJIT_ASSERT(srcw[0] != 0); |
1995 | } |
1996 | cc += 1 + LINK_SIZE; |
1997 | break; |
1998 | |
1999 | CASE_ITERATOR_PRIVATE_DATA_1 |
2000 | if (PRIVATE_DATA(cc)) |
2001 | { |
2002 | count = 1; |
2003 | srcw[0] = PRIVATE_DATA(cc); |
2004 | } |
2005 | cc += 2; |
2006 | #ifdef SUPPORT_UTF |
2007 | if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
2008 | #endif |
2009 | break; |
2010 | |
2011 | CASE_ITERATOR_PRIVATE_DATA_2A |
2012 | if (PRIVATE_DATA(cc)) |
2013 | { |
2014 | count = 2; |
2015 | srcw[0] = PRIVATE_DATA(cc); |
2016 | srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); |
2017 | } |
2018 | cc += 2; |
2019 | #ifdef SUPPORT_UTF |
2020 | if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
2021 | #endif |
2022 | break; |
2023 | |
2024 | CASE_ITERATOR_PRIVATE_DATA_2B |
2025 | if (PRIVATE_DATA(cc)) |
2026 | { |
2027 | count = 2; |
2028 | srcw[0] = PRIVATE_DATA(cc); |
2029 | srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); |
2030 | } |
2031 | cc += 2 + IMM2_SIZE; |
2032 | #ifdef SUPPORT_UTF |
2033 | if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
2034 | #endif |
2035 | break; |
2036 | |
2037 | CASE_ITERATOR_TYPE_PRIVATE_DATA_1 |
2038 | if (PRIVATE_DATA(cc)) |
2039 | { |
2040 | count = 1; |
2041 | srcw[0] = PRIVATE_DATA(cc); |
2042 | } |
2043 | cc += 1; |
2044 | break; |
2045 | |
2046 | CASE_ITERATOR_TYPE_PRIVATE_DATA_2A |
2047 | if (PRIVATE_DATA(cc)) |
2048 | { |
2049 | count = 2; |
2050 | srcw[0] = PRIVATE_DATA(cc); |
2051 | srcw[1] = srcw[0] + sizeof(sljit_sw); |
2052 | } |
2053 | cc += 1; |
2054 | break; |
2055 | |
2056 | CASE_ITERATOR_TYPE_PRIVATE_DATA_2B |
2057 | if (PRIVATE_DATA(cc)) |
2058 | { |
2059 | count = 2; |
2060 | srcw[0] = PRIVATE_DATA(cc); |
2061 | srcw[1] = srcw[0] + sizeof(sljit_sw); |
2062 | } |
2063 | cc += 1 + IMM2_SIZE; |
2064 | break; |
2065 | |
2066 | case OP_CLASS: |
2067 | case OP_NCLASS: |
2068 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
2069 | case OP_XCLASS: |
2070 | size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); |
2071 | #else |
2072 | size = 1 + 32 / (int)sizeof(pcre_uchar); |
2073 | #endif |
2074 | if (PRIVATE_DATA(cc)) |
2075 | switch(get_class_iterator_size(cc + size)) |
2076 | { |
2077 | case 1: |
2078 | count = 1; |
2079 | srcw[0] = PRIVATE_DATA(cc); |
2080 | break; |
2081 | |
2082 | case 2: |
2083 | count = 2; |
2084 | srcw[0] = PRIVATE_DATA(cc); |
2085 | srcw[1] = srcw[0] + sizeof(sljit_sw); |
2086 | break; |
2087 | |
2088 | default: |
2089 | SLJIT_ASSERT_STOP(); |
2090 | break; |
2091 | } |
2092 | cc += size; |
2093 | break; |
2094 | |
2095 | default: |
2096 | cc = next_opcode(common, cc); |
2097 | SLJIT_ASSERT(cc != NULL); |
2098 | break; |
2099 | } |
2100 | break; |
2101 | |
2102 | case end: |
2103 | SLJIT_ASSERT_STOP(); |
2104 | break; |
2105 | } |
2106 | |
2107 | while (count > 0) |
2108 | { |
2109 | count--; |
2110 | if (save) |
2111 | { |
2112 | if (tmp1next) |
2113 | { |
2114 | if (!tmp1empty) |
2115 | { |
2116 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); |
2117 | stackptr += sizeof(sljit_sw); |
2118 | } |
2119 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]); |
2120 | tmp1empty = FALSE; |
2121 | tmp1next = FALSE; |
2122 | } |
2123 | else |
2124 | { |
2125 | if (!tmp2empty) |
2126 | { |
2127 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); |
2128 | stackptr += sizeof(sljit_sw); |
2129 | } |
2130 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]); |
2131 | tmp2empty = FALSE; |
2132 | tmp1next = TRUE; |
2133 | } |
2134 | } |
2135 | else |
2136 | { |
2137 | if (tmp1next) |
2138 | { |
2139 | SLJIT_ASSERT(!tmp1empty); |
2140 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0); |
2141 | tmp1empty = stackptr >= stacktop; |
2142 | if (!tmp1empty) |
2143 | { |
2144 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
2145 | stackptr += sizeof(sljit_sw); |
2146 | } |
2147 | tmp1next = FALSE; |
2148 | } |
2149 | else |
2150 | { |
2151 | SLJIT_ASSERT(!tmp2empty); |
2152 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0); |
2153 | tmp2empty = stackptr >= stacktop; |
2154 | if (!tmp2empty) |
2155 | { |
2156 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
2157 | stackptr += sizeof(sljit_sw); |
2158 | } |
2159 | tmp1next = TRUE; |
2160 | } |
2161 | } |
2162 | } |
2163 | } |
2164 | while (status != end); |
2165 | |
2166 | if (save) |
2167 | { |
2168 | if (tmp1next) |
2169 | { |
2170 | if (!tmp1empty) |
2171 | { |
2172 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); |
2173 | stackptr += sizeof(sljit_sw); |
2174 | } |
2175 | if (!tmp2empty) |
2176 | { |
2177 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); |
2178 | stackptr += sizeof(sljit_sw); |
2179 | } |
2180 | } |
2181 | else |
2182 | { |
2183 | if (!tmp2empty) |
2184 | { |
2185 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); |
2186 | stackptr += sizeof(sljit_sw); |
2187 | } |
2188 | if (!tmp1empty) |
2189 | { |
2190 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); |
2191 | stackptr += sizeof(sljit_sw); |
2192 | } |
2193 | } |
2194 | } |
2195 | SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty))); |
2196 | } |
2197 | |
2198 | static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset) |
2199 | { |
2200 | pcre_uchar *end = bracketend(cc); |
2201 | BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; |
2202 | |
2203 | /* Assert captures then. */ |
2204 | if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) |
2205 | current_offset = NULL; |
2206 | /* Conditional block does not. */ |
2207 | if (*cc == OP_COND || *cc == OP_SCOND) |
2208 | has_alternatives = FALSE; |
2209 | |
2210 | cc = next_opcode(common, cc); |
2211 | if (has_alternatives) |
2212 | current_offset = common->then_offsets + (cc - common->start); |
2213 | |
2214 | while (cc < end) |
2215 | { |
2216 | if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) |
2217 | cc = set_then_offsets(common, cc, current_offset); |
2218 | else |
2219 | { |
2220 | if (*cc == OP_ALT && has_alternatives) |
2221 | current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start); |
2222 | if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) |
2223 | *current_offset = 1; |
2224 | cc = next_opcode(common, cc); |
2225 | } |
2226 | } |
2227 | |
2228 | return end; |
2229 | } |
2230 | |
2231 | #undef CASE_ITERATOR_PRIVATE_DATA_1 |
2232 | #undef CASE_ITERATOR_PRIVATE_DATA_2A |
2233 | #undef CASE_ITERATOR_PRIVATE_DATA_2B |
2234 | #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1 |
2235 | #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A |
2236 | #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B |
2237 | |
2238 | static SLJIT_INLINE BOOL is_powerof2(unsigned int value) |
2239 | { |
2240 | return (value & (value - 1)) == 0; |
2241 | } |
2242 | |
2243 | static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) |
2244 | { |
2245 | while (list) |
2246 | { |
2247 | /* sljit_set_label is clever enough to do nothing |
2248 | if either the jump or the label is NULL. */ |
2249 | SET_LABEL(list->jump, label); |
2250 | list = list->next; |
2251 | } |
2252 | } |
2253 | |
2254 | static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump) |
2255 | { |
2256 | jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list)); |
2257 | if (list_item) |
2258 | { |
2259 | list_item->next = *list; |
2260 | list_item->jump = jump; |
2261 | *list = list_item; |
2262 | } |
2263 | } |
2264 | |
2265 | static void add_stub(compiler_common *common, struct sljit_jump *start) |
2266 | { |
2267 | DEFINE_COMPILER; |
2268 | stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); |
2269 | |
2270 | if (list_item) |
2271 | { |
2272 | list_item->start = start; |
2273 | list_item->quit = LABEL(); |
2274 | list_item->next = common->stubs; |
2275 | common->stubs = list_item; |
2276 | } |
2277 | } |
2278 | |
2279 | static void flush_stubs(compiler_common *common) |
2280 | { |
2281 | DEFINE_COMPILER; |
2282 | stub_list *list_item = common->stubs; |
2283 | |
2284 | while (list_item) |
2285 | { |
2286 | JUMPHERE(list_item->start); |
2287 | add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL)); |
2288 | JUMPTO(SLJIT_JUMP, list_item->quit); |
2289 | list_item = list_item->next; |
2290 | } |
2291 | common->stubs = NULL; |
2292 | } |
2293 | |
2294 | static void add_label_addr(compiler_common *common, sljit_uw *update_addr) |
2295 | { |
2296 | DEFINE_COMPILER; |
2297 | label_addr_list *label_addr; |
2298 | |
2299 | label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list)); |
2300 | if (label_addr == NULL) |
2301 | return; |
2302 | label_addr->label = LABEL(); |
2303 | label_addr->update_addr = update_addr; |
2304 | label_addr->next = common->label_addrs; |
2305 | common->label_addrs = label_addr; |
2306 | } |
2307 | |
2308 | static SLJIT_INLINE void count_match(compiler_common *common) |
2309 | { |
2310 | DEFINE_COMPILER; |
2311 | |
2312 | OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); |
2313 | add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO)); |
2314 | } |
2315 | |
2316 | static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) |
2317 | { |
2318 | /* May destroy all locals and registers except TMP2. */ |
2319 | DEFINE_COMPILER; |
2320 | |
2321 | SLJIT_ASSERT(size > 0); |
2322 | OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); |
2323 | #ifdef DESTROY_REGISTERS |
2324 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); |
2325 | OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
2326 | OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); |
2327 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0); |
2328 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); |
2329 | #endif |
2330 | add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0)); |
2331 | } |
2332 | |
2333 | static SLJIT_INLINE void free_stack(compiler_common *common, int size) |
2334 | { |
2335 | DEFINE_COMPILER; |
2336 | |
2337 | SLJIT_ASSERT(size > 0); |
2338 | OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); |
2339 | } |
2340 | |
2341 | static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size) |
2342 | { |
2343 | DEFINE_COMPILER; |
2344 | sljit_uw *result; |
2345 | |
2346 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
2347 | return NULL; |
2348 | |
2349 | result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data); |
2350 | if (SLJIT_UNLIKELY(result == NULL)) |
2351 | { |
2352 | sljit_set_compiler_memory_error(compiler); |
2353 | return NULL; |
2354 | } |
2355 | |
2356 | *(void**)result = common->read_only_data_head; |
2357 | common->read_only_data_head = (void *)result; |
2358 | return result + 1; |
2359 | } |
2360 | |
2361 | static void free_read_only_data(void *current, void *allocator_data) |
2362 | { |
2363 | void *next; |
2364 | |
2365 | SLJIT_UNUSED_ARG(allocator_data); |
2366 | |
2367 | while (current != NULL) |
2368 | { |
2369 | next = *(void**)current; |
2370 | SLJIT_FREE(current, allocator_data); |
2371 | current = next; |
2372 | } |
2373 | } |
2374 | |
2375 | static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) |
2376 | { |
2377 | DEFINE_COMPILER; |
2378 | struct sljit_label *loop; |
2379 | int i; |
2380 | |
2381 | /* At this point we can freely use all temporary registers. */ |
2382 | SLJIT_ASSERT(length > 1); |
2383 | /* TMP1 returns with begin - 1. */ |
2384 | OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); |
2385 | if (length < 8) |
2386 | { |
2387 | for (i = 1; i < length; i++) |
2388 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0); |
2389 | } |
2390 | else |
2391 | { |
2392 | GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START); |
2393 | OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); |
2394 | loop = LABEL(); |
2395 | OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0); |
2396 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); |
2397 | JUMPTO(SLJIT_NOT_ZERO, loop); |
2398 | } |
2399 | } |
2400 | |
2401 | static SLJIT_INLINE void reset_fast_fail(compiler_common *common) |
2402 | { |
2403 | DEFINE_COMPILER; |
2404 | sljit_s32 i; |
2405 | |
2406 | SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr); |
2407 | |
2408 | OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2409 | for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw)) |
2410 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0); |
2411 | } |
2412 | |
2413 | static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) |
2414 | { |
2415 | DEFINE_COMPILER; |
2416 | struct sljit_label *loop; |
2417 | int i; |
2418 | |
2419 | SLJIT_ASSERT(length > 1); |
2420 | /* OVECTOR(1) contains the "string begin - 1" constant. */ |
2421 | if (length > 2) |
2422 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); |
2423 | if (length < 8) |
2424 | { |
2425 | for (i = 2; i < length; i++) |
2426 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0); |
2427 | } |
2428 | else |
2429 | { |
2430 | GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw)); |
2431 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); |
2432 | loop = LABEL(); |
2433 | OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); |
2434 | OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); |
2435 | JUMPTO(SLJIT_NOT_ZERO, loop); |
2436 | } |
2437 | |
2438 | OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); |
2439 | if (common->mark_ptr != 0) |
2440 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); |
2441 | if (common->control_head_ptr != 0) |
2442 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); |
2443 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); |
2444 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); |
2445 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base)); |
2446 | } |
2447 | |
2448 | static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg) |
2449 | { |
2450 | while (current != NULL) |
2451 | { |
2452 | switch (current[-2]) |
2453 | { |
2454 | case type_then_trap: |
2455 | break; |
2456 | |
2457 | case type_mark: |
2458 | if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0) |
2459 | return current[-4]; |
2460 | break; |
2461 | |
2462 | default: |
2463 | SLJIT_ASSERT_STOP(); |
2464 | break; |
2465 | } |
2466 | SLJIT_ASSERT(current > (sljit_sw*)current[-1]); |
2467 | current = (sljit_sw*)current[-1]; |
2468 | } |
2469 | return -1; |
2470 | } |
2471 | |
2472 | static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket) |
2473 | { |
2474 | DEFINE_COMPILER; |
2475 | struct sljit_label *loop; |
2476 | struct sljit_jump *early_quit; |
2477 | |
2478 | /* At this point we can freely use all registers. */ |
2479 | OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); |
2480 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0); |
2481 | |
2482 | OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); |
2483 | if (common->mark_ptr != 0) |
2484 | OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); |
2485 | OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count)); |
2486 | if (common->mark_ptr != 0) |
2487 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); |
2488 | OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); |
2489 | OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin)); |
2490 | GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START); |
2491 | /* Unlikely, but possible */ |
2492 | early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0); |
2493 | loop = LABEL(); |
2494 | OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); |
2495 | OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw)); |
2496 | /* Copy the integer value to the output buffer */ |
2497 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
2498 | OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); |
2499 | #endif |
2500 | OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0); |
2501 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); |
2502 | JUMPTO(SLJIT_NOT_ZERO, loop); |
2503 | JUMPHERE(early_quit); |
2504 | |
2505 | /* Calculate the return value, which is the maximum ovector value. */ |
2506 | if (topbracket > 1) |
2507 | { |
2508 | GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); |
2509 | OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); |
2510 | |
2511 | /* OVECTOR(0) is never equal to SLJIT_S2. */ |
2512 | loop = LABEL(); |
2513 | OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))); |
2514 | OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); |
2515 | CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); |
2516 | OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); |
2517 | } |
2518 | else |
2519 | OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); |
2520 | } |
2521 | |
2522 | static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit) |
2523 | { |
2524 | DEFINE_COMPILER; |
2525 | struct sljit_jump *jump; |
2526 | |
2527 | SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2); |
2528 | SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0 |
2529 | && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0)); |
2530 | |
2531 | OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); |
2532 | OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL); |
2533 | OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count)); |
2534 | CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit); |
2535 | |
2536 | /* Store match begin and end. */ |
2537 | OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin)); |
2538 | OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets)); |
2539 | |
2540 | jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3); |
2541 | OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0); |
2542 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
2543 | OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); |
2544 | #endif |
2545 | OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0); |
2546 | JUMPHERE(jump); |
2547 | |
2548 | OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start); |
2549 | OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0); |
2550 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
2551 | OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); |
2552 | #endif |
2553 | OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0); |
2554 | |
2555 | OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0); |
2556 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
2557 | OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); |
2558 | #endif |
2559 | OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0); |
2560 | |
2561 | JUMPTO(SLJIT_JUMP, quit); |
2562 | } |
2563 | |
2564 | static SLJIT_INLINE void check_start_used_ptr(compiler_common *common) |
2565 | { |
2566 | /* May destroy TMP1. */ |
2567 | DEFINE_COMPILER; |
2568 | struct sljit_jump *jump; |
2569 | |
2570 | if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
2571 | { |
2572 | /* The value of -1 must be kept for start_used_ptr! */ |
2573 | OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1); |
2574 | /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting |
2575 | is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ |
2576 | jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0); |
2577 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
2578 | JUMPHERE(jump); |
2579 | } |
2580 | else if (common->mode == JIT_PARTIAL_HARD_COMPILE) |
2581 | { |
2582 | jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
2583 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
2584 | JUMPHERE(jump); |
2585 | } |
2586 | } |
2587 | |
2588 | static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc) |
2589 | { |
2590 | /* Detects if the character has an othercase. */ |
2591 | unsigned int c; |
2592 | |
2593 | #ifdef SUPPORT_UTF |
2594 | if (common->utf) |
2595 | { |
2596 | GETCHAR(c, cc); |
2597 | if (c > 127) |
2598 | { |
2599 | #ifdef SUPPORT_UCP |
2600 | return c != UCD_OTHERCASE(c); |
2601 | #else |
2602 | return FALSE; |
2603 | #endif |
2604 | } |
2605 | #ifndef COMPILE_PCRE8 |
2606 | return common->fcc[c] != c; |
2607 | #endif |
2608 | } |
2609 | else |
2610 | #endif |
2611 | c = *cc; |
2612 | return MAX_255(c) ? common->fcc[c] != c : FALSE; |
2613 | } |
2614 | |
2615 | static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c) |
2616 | { |
2617 | /* Returns with the othercase. */ |
2618 | #ifdef SUPPORT_UTF |
2619 | if (common->utf && c > 127) |
2620 | { |
2621 | #ifdef SUPPORT_UCP |
2622 | return UCD_OTHERCASE(c); |
2623 | #else |
2624 | return c; |
2625 | #endif |
2626 | } |
2627 | #endif |
2628 | return TABLE_GET(c, common->fcc, c); |
2629 | } |
2630 | |
2631 | static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc) |
2632 | { |
2633 | /* Detects if the character and its othercase has only 1 bit difference. */ |
2634 | unsigned int c, oc, bit; |
2635 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2636 | int n; |
2637 | #endif |
2638 | |
2639 | #ifdef SUPPORT_UTF |
2640 | if (common->utf) |
2641 | { |
2642 | GETCHAR(c, cc); |
2643 | if (c <= 127) |
2644 | oc = common->fcc[c]; |
2645 | else |
2646 | { |
2647 | #ifdef SUPPORT_UCP |
2648 | oc = UCD_OTHERCASE(c); |
2649 | #else |
2650 | oc = c; |
2651 | #endif |
2652 | } |
2653 | } |
2654 | else |
2655 | { |
2656 | c = *cc; |
2657 | oc = TABLE_GET(c, common->fcc, c); |
2658 | } |
2659 | #else |
2660 | c = *cc; |
2661 | oc = TABLE_GET(c, common->fcc, c); |
2662 | #endif |
2663 | |
2664 | SLJIT_ASSERT(c != oc); |
2665 | |
2666 | bit = c ^ oc; |
2667 | /* Optimized for English alphabet. */ |
2668 | if (c <= 127 && bit == 0x20) |
2669 | return (0 << 8) | 0x20; |
2670 | |
2671 | /* Since c != oc, they must have at least 1 bit difference. */ |
2672 | if (!is_powerof2(bit)) |
2673 | return 0; |
2674 | |
2675 | #if defined COMPILE_PCRE8 |
2676 | |
2677 | #ifdef SUPPORT_UTF |
2678 | if (common->utf && c > 127) |
2679 | { |
2680 | n = GET_EXTRALEN(*cc); |
2681 | while ((bit & 0x3f) == 0) |
2682 | { |
2683 | n--; |
2684 | bit >>= 6; |
2685 | } |
2686 | return (n << 8) | bit; |
2687 | } |
2688 | #endif /* SUPPORT_UTF */ |
2689 | return (0 << 8) | bit; |
2690 | |
2691 | #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
2692 | |
2693 | #ifdef SUPPORT_UTF |
2694 | if (common->utf && c > 65535) |
2695 | { |
2696 | if (bit >= (1 << 10)) |
2697 | bit >>= 10; |
2698 | else |
2699 | return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8)); |
2700 | } |
2701 | #endif /* SUPPORT_UTF */ |
2702 | return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8)); |
2703 | |
2704 | #endif /* COMPILE_PCRE[8|16|32] */ |
2705 | } |
2706 | |
2707 | static void check_partial(compiler_common *common, BOOL force) |
2708 | { |
2709 | /* Checks whether a partial matching is occurred. Does not modify registers. */ |
2710 | DEFINE_COMPILER; |
2711 | struct sljit_jump *jump = NULL; |
2712 | |
2713 | SLJIT_ASSERT(!force || common->mode != JIT_COMPILE); |
2714 | |
2715 | if (common->mode == JIT_COMPILE) |
2716 | return; |
2717 | |
2718 | if (!force) |
2719 | jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
2720 | else if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
2721 | jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); |
2722 | |
2723 | if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
2724 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); |
2725 | else |
2726 | { |
2727 | if (common->partialmatchlabel != NULL) |
2728 | JUMPTO(SLJIT_JUMP, common->partialmatchlabel); |
2729 | else |
2730 | add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); |
2731 | } |
2732 | |
2733 | if (jump != NULL) |
2734 | JUMPHERE(jump); |
2735 | } |
2736 | |
2737 | static void check_str_end(compiler_common *common, jump_list **end_reached) |
2738 | { |
2739 | /* Does not affect registers. Usually used in a tight spot. */ |
2740 | DEFINE_COMPILER; |
2741 | struct sljit_jump *jump; |
2742 | |
2743 | if (common->mode == JIT_COMPILE) |
2744 | { |
2745 | add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
2746 | return; |
2747 | } |
2748 | |
2749 | jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
2750 | if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
2751 | { |
2752 | add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); |
2753 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); |
2754 | add_jump(compiler, end_reached, JUMP(SLJIT_JUMP)); |
2755 | } |
2756 | else |
2757 | { |
2758 | add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); |
2759 | if (common->partialmatchlabel != NULL) |
2760 | JUMPTO(SLJIT_JUMP, common->partialmatchlabel); |
2761 | else |
2762 | add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); |
2763 | } |
2764 | JUMPHERE(jump); |
2765 | } |
2766 | |
2767 | static void detect_partial_match(compiler_common *common, jump_list **backtracks) |
2768 | { |
2769 | DEFINE_COMPILER; |
2770 | struct sljit_jump *jump; |
2771 | |
2772 | if (common->mode == JIT_COMPILE) |
2773 | { |
2774 | add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
2775 | return; |
2776 | } |
2777 | |
2778 | /* Partial matching mode. */ |
2779 | jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
2780 | add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); |
2781 | if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
2782 | { |
2783 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); |
2784 | add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
2785 | } |
2786 | else |
2787 | { |
2788 | if (common->partialmatchlabel != NULL) |
2789 | JUMPTO(SLJIT_JUMP, common->partialmatchlabel); |
2790 | else |
2791 | add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); |
2792 | } |
2793 | JUMPHERE(jump); |
2794 | } |
2795 | |
2796 | static void peek_char(compiler_common *common, sljit_u32 max) |
2797 | { |
2798 | /* Reads the character into TMP1, keeps STR_PTR. |
2799 | Does not check STR_END. TMP2 Destroyed. */ |
2800 | DEFINE_COMPILER; |
2801 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
2802 | struct sljit_jump *jump; |
2803 | #endif |
2804 | |
2805 | SLJIT_UNUSED_ARG(max); |
2806 | |
2807 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
2808 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2809 | if (common->utf) |
2810 | { |
2811 | if (max < 128) return; |
2812 | |
2813 | jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
2814 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2815 | add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); |
2816 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2817 | JUMPHERE(jump); |
2818 | } |
2819 | #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
2820 | |
2821 | #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
2822 | if (common->utf) |
2823 | { |
2824 | if (max < 0xd800) return; |
2825 | |
2826 | OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
2827 | jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
2828 | /* TMP2 contains the high surrogate. */ |
2829 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2830 | OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40); |
2831 | OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); |
2832 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); |
2833 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2834 | JUMPHERE(jump); |
2835 | } |
2836 | #endif |
2837 | } |
2838 | |
2839 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2840 | |
2841 | static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass) |
2842 | { |
2843 | /* Tells whether the character codes below 128 are enough |
2844 | to determine a match. */ |
2845 | const sljit_u8 value = nclass ? 0xff : 0; |
2846 | const sljit_u8 *end = bitset + 32; |
2847 | |
2848 | bitset += 16; |
2849 | do |
2850 | { |
2851 | if (*bitset++ != value) |
2852 | return FALSE; |
2853 | } |
2854 | while (bitset < end); |
2855 | return TRUE; |
2856 | } |
2857 | |
2858 | static void read_char7_type(compiler_common *common, BOOL full_read) |
2859 | { |
2860 | /* Reads the precise character type of a character into TMP1, if the character |
2861 | is less than 128. Otherwise it returns with zero. Does not check STR_END. The |
2862 | full_read argument tells whether characters above max are accepted or not. */ |
2863 | DEFINE_COMPILER; |
2864 | struct sljit_jump *jump; |
2865 | |
2866 | SLJIT_ASSERT(common->utf); |
2867 | |
2868 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); |
2869 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2870 | |
2871 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
2872 | |
2873 | if (full_read) |
2874 | { |
2875 | jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0); |
2876 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2877 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2878 | JUMPHERE(jump); |
2879 | } |
2880 | } |
2881 | |
2882 | #endif /* SUPPORT_UTF && COMPILE_PCRE8 */ |
2883 | |
2884 | static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr) |
2885 | { |
2886 | /* Reads the precise value of a character into TMP1, if the character is |
2887 | between min and max (c >= min && c <= max). Otherwise it returns with a value |
2888 | outside the range. Does not check STR_END. */ |
2889 | DEFINE_COMPILER; |
2890 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
2891 | struct sljit_jump *jump; |
2892 | #endif |
2893 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2894 | struct sljit_jump *jump2; |
2895 | #endif |
2896 | |
2897 | SLJIT_UNUSED_ARG(update_str_ptr); |
2898 | SLJIT_UNUSED_ARG(min); |
2899 | SLJIT_UNUSED_ARG(max); |
2900 | SLJIT_ASSERT(min <= max); |
2901 | |
2902 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2903 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2904 | |
2905 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2906 | if (common->utf) |
2907 | { |
2908 | if (max < 128 && !update_str_ptr) return; |
2909 | |
2910 | jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
2911 | if (min >= 0x10000) |
2912 | { |
2913 | OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0); |
2914 | if (update_str_ptr) |
2915 | OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2916 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2917 | jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7); |
2918 | OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
2919 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
2920 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2921 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
2922 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
2923 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2924 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2925 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); |
2926 | if (!update_str_ptr) |
2927 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); |
2928 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
2929 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2930 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2931 | JUMPHERE(jump2); |
2932 | if (update_str_ptr) |
2933 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); |
2934 | } |
2935 | else if (min >= 0x800 && max <= 0xffff) |
2936 | { |
2937 | OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0); |
2938 | if (update_str_ptr) |
2939 | OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2940 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2941 | jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf); |
2942 | OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
2943 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
2944 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2945 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
2946 | if (!update_str_ptr) |
2947 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
2948 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
2949 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2950 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2951 | JUMPHERE(jump2); |
2952 | if (update_str_ptr) |
2953 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); |
2954 | } |
2955 | else if (max >= 0x800) |
2956 | add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); |
2957 | else if (max < 128) |
2958 | { |
2959 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2960 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2961 | } |
2962 | else |
2963 | { |
2964 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2965 | if (!update_str_ptr) |
2966 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2967 | else |
2968 | OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2969 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
2970 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
2971 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2972 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2973 | if (update_str_ptr) |
2974 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); |
2975 | } |
2976 | JUMPHERE(jump); |
2977 | } |
2978 | #endif |
2979 | |
2980 | #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
2981 | if (common->utf) |
2982 | { |
2983 | if (max >= 0x10000) |
2984 | { |
2985 | OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
2986 | jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
2987 | /* TMP2 contains the high surrogate. */ |
2988 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2989 | OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40); |
2990 | OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); |
2991 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2992 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); |
2993 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2994 | JUMPHERE(jump); |
2995 | return; |
2996 | } |
2997 | |
2998 | if (max < 0xd800 && !update_str_ptr) return; |
2999 | |
3000 | /* Skip low surrogate if necessary. */ |
3001 | OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
3002 | jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
3003 | if (update_str_ptr) |
3004 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3005 | if (max >= 0xd800) |
3006 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); |
3007 | JUMPHERE(jump); |
3008 | } |
3009 | #endif |
3010 | } |
3011 | |
3012 | static SLJIT_INLINE void read_char(compiler_common *common) |
3013 | { |
3014 | read_char_range(common, 0, READ_CHAR_MAX, TRUE); |
3015 | } |
3016 | |
3017 | static void read_char8_type(compiler_common *common, BOOL update_str_ptr) |
3018 | { |
3019 | /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ |
3020 | DEFINE_COMPILER; |
3021 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
3022 | struct sljit_jump *jump; |
3023 | #endif |
3024 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3025 | struct sljit_jump *jump2; |
3026 | #endif |
3027 | |
3028 | SLJIT_UNUSED_ARG(update_str_ptr); |
3029 | |
3030 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); |
3031 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3032 | |
3033 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3034 | if (common->utf) |
3035 | { |
3036 | /* This can be an extra read in some situations, but hopefully |
3037 | it is needed in most cases. */ |
3038 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
3039 | jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0); |
3040 | if (!update_str_ptr) |
3041 | { |
3042 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3043 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3044 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3045 | OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
3046 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
3047 | OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); |
3048 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
3049 | jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); |
3050 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
3051 | JUMPHERE(jump2); |
3052 | } |
3053 | else |
3054 | add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); |
3055 | JUMPHERE(jump); |
3056 | return; |
3057 | } |
3058 | #endif /* SUPPORT_UTF && COMPILE_PCRE8 */ |
3059 | |
3060 | #if !defined COMPILE_PCRE8 |
3061 | /* The ctypes array contains only 256 values. */ |
3062 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
3063 | jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); |
3064 | #endif |
3065 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
3066 | #if !defined COMPILE_PCRE8 |
3067 | JUMPHERE(jump); |
3068 | #endif |
3069 | |
3070 | #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
3071 | if (common->utf && update_str_ptr) |
3072 | { |
3073 | /* Skip low surrogate if necessary. */ |
3074 | OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); |
3075 | jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
3076 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3077 | JUMPHERE(jump); |
3078 | } |
3079 | #endif /* SUPPORT_UTF && COMPILE_PCRE16 */ |
3080 | } |
3081 | |
3082 | static void skip_char_back(compiler_common *common) |
3083 | { |
3084 | /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */ |
3085 | DEFINE_COMPILER; |
3086 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3087 | #if defined COMPILE_PCRE8 |
3088 | struct sljit_label *label; |
3089 | |
3090 | if (common->utf) |
3091 | { |
3092 | label = LABEL(); |
3093 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); |
3094 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3095 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); |
3096 | CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); |
3097 | return; |
3098 | } |
3099 | #elif defined COMPILE_PCRE16 |
3100 | if (common->utf) |
3101 | { |
3102 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); |
3103 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3104 | /* Skip low surrogate if necessary. */ |
3105 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
3106 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); |
3107 | OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
3108 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
3109 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
3110 | return; |
3111 | } |
3112 | #endif /* COMPILE_PCRE[8|16] */ |
3113 | #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
3114 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3115 | } |
3116 | |
3117 | static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch) |
3118 | { |
3119 | /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ |
3120 | DEFINE_COMPILER; |
3121 | struct sljit_jump *jump; |
3122 | |
3123 | if (nltype == NLTYPE_ANY) |
3124 | { |
3125 | add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); |
3126 | add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO)); |
3127 | } |
3128 | else if (nltype == NLTYPE_ANYCRLF) |
3129 | { |
3130 | if (jumpifmatch) |
3131 | { |
3132 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR)); |
3133 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); |
3134 | } |
3135 | else |
3136 | { |
3137 | jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); |
3138 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); |
3139 | JUMPHERE(jump); |
3140 | } |
3141 | } |
3142 | else |
3143 | { |
3144 | SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); |
3145 | add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); |
3146 | } |
3147 | } |
3148 | |
3149 | #ifdef SUPPORT_UTF |
3150 | |
3151 | #if defined COMPILE_PCRE8 |
3152 | static void do_utfreadchar(compiler_common *common) |
3153 | { |
3154 | /* Fast decoding a UTF-8 character. TMP1 contains the first byte |
3155 | of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */ |
3156 | DEFINE_COMPILER; |
3157 | struct sljit_jump *jump; |
3158 | |
3159 | sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
3160 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3161 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
3162 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3163 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3164 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3165 | |
3166 | /* Searching for the first zero. */ |
3167 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); |
3168 | jump = JUMP(SLJIT_NOT_ZERO); |
3169 | /* Two byte sequence. */ |
3170 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3171 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); |
3172 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3173 | |
3174 | JUMPHERE(jump); |
3175 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
3176 | OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800); |
3177 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3178 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3179 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3180 | |
3181 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); |
3182 | jump = JUMP(SLJIT_NOT_ZERO); |
3183 | /* Three byte sequence. */ |
3184 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
3185 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); |
3186 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3187 | |
3188 | /* Four byte sequence. */ |
3189 | JUMPHERE(jump); |
3190 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); |
3191 | OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); |
3192 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3193 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); |
3194 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3195 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3196 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4)); |
3197 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3198 | } |
3199 | |
3200 | static void do_utfreadchar16(compiler_common *common) |
3201 | { |
3202 | /* Fast decoding a UTF-8 character. TMP1 contains the first byte |
3203 | of the character (>= 0xc0). Return value in TMP1. */ |
3204 | DEFINE_COMPILER; |
3205 | struct sljit_jump *jump; |
3206 | |
3207 | sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
3208 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3209 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
3210 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3211 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3212 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3213 | |
3214 | /* Searching for the first zero. */ |
3215 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); |
3216 | jump = JUMP(SLJIT_NOT_ZERO); |
3217 | /* Two byte sequence. */ |
3218 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3219 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3220 | |
3221 | JUMPHERE(jump); |
3222 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); |
3223 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO); |
3224 | /* This code runs only in 8 bit mode. No need to shift the value. */ |
3225 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
3226 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
3227 | OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800); |
3228 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3229 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3230 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3231 | /* Three byte sequence. */ |
3232 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
3233 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3234 | } |
3235 | |
3236 | static void do_utfreadtype8(compiler_common *common) |
3237 | { |
3238 | /* Fast decoding a UTF-8 character type. TMP2 contains the first byte |
3239 | of the character (>= 0xc0). Return value in TMP1. */ |
3240 | DEFINE_COMPILER; |
3241 | struct sljit_jump *jump; |
3242 | struct sljit_jump *compare; |
3243 | |
3244 | sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
3245 | |
3246 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); |
3247 | jump = JUMP(SLJIT_NOT_ZERO); |
3248 | /* Two byte sequence. */ |
3249 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3250 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3251 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); |
3252 | /* The upper 5 bits are known at this point. */ |
3253 | compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3); |
3254 | OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
3255 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
3256 | OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); |
3257 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
3258 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3259 | |
3260 | JUMPHERE(compare); |
3261 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
3262 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3263 | |
3264 | /* We only have types for characters less than 256. */ |
3265 | JUMPHERE(jump); |
3266 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
3267 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
3268 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
3269 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3270 | } |
3271 | |
3272 | #endif /* COMPILE_PCRE8 */ |
3273 | |
3274 | #endif /* SUPPORT_UTF */ |
3275 | |
3276 | #ifdef SUPPORT_UCP |
3277 | |
3278 | /* UCD_BLOCK_SIZE must be 128 (see the assert below). */ |
3279 | #define UCD_BLOCK_MASK 127 |
3280 | #define UCD_BLOCK_SHIFT 7 |
3281 | |
3282 | static void do_getucd(compiler_common *common) |
3283 | { |
3284 | /* Search the UCD record for the character comes in TMP1. |
3285 | Returns chartype in TMP1 and UCD offset in TMP2. */ |
3286 | DEFINE_COMPILER; |
3287 | |
3288 | SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8); |
3289 | |
3290 | sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
3291 | OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); |
3292 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); |
3293 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); |
3294 | OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); |
3295 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); |
3296 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); |
3297 | OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); |
3298 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); |
3299 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); |
3300 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3301 | } |
3302 | #endif |
3303 | |
3304 | static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf) |
3305 | { |
3306 | DEFINE_COMPILER; |
3307 | struct sljit_label *mainloop; |
3308 | struct sljit_label *newlinelabel = NULL; |
3309 | struct sljit_jump *start; |
3310 | struct sljit_jump *end = NULL; |
3311 | struct sljit_jump *end2 = NULL; |
3312 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3313 | struct sljit_jump *singlechar; |
3314 | #endif |
3315 | jump_list *newline = NULL; |
3316 | BOOL newlinecheck = FALSE; |
3317 | BOOL readuchar = FALSE; |
3318 | |
3319 | if (!(hascrorlf || (common->match_end_ptr != 0)) && |
3320 | (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) |
3321 | newlinecheck = TRUE; |
3322 | |
3323 | if (common->match_end_ptr != 0) |
3324 | { |
3325 | /* Search for the end of the first line. */ |
3326 | OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); |
3327 | |
3328 | if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
3329 | { |
3330 | mainloop = LABEL(); |
3331 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3332 | end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
3333 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
3334 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3335 | CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); |
3336 | CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); |
3337 | JUMPHERE(end); |
3338 | OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3339 | } |
3340 | else |
3341 | { |
3342 | end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
3343 | mainloop = LABEL(); |
3344 | /* Continual stores does not cause data dependency. */ |
3345 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); |
3346 | read_char_range(common, common->nlmin, common->nlmax, TRUE); |
3347 | check_newlinechar(common, common->nltype, &newline, TRUE); |
3348 | CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop); |
3349 | JUMPHERE(end); |
3350 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); |
3351 | set_jumps(newline, LABEL()); |
3352 | } |
3353 | |
3354 | OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); |
3355 | } |
3356 | |
3357 | start = JUMP(SLJIT_JUMP); |
3358 | |
3359 | if (newlinecheck) |
3360 | { |
3361 | newlinelabel = LABEL(); |
3362 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3363 | end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
3364 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
3365 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); |
3366 | OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
3367 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
3368 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); |
3369 | #endif |
3370 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
3371 | end2 = JUMP(SLJIT_JUMP); |
3372 | } |
3373 | |
3374 | mainloop = LABEL(); |
3375 | |
3376 | /* Increasing the STR_PTR here requires one less jump in the most common case. */ |
3377 | #ifdef SUPPORT_UTF |
3378 | if (common->utf) readuchar = TRUE; |
3379 | #endif |
3380 | if (newlinecheck) readuchar = TRUE; |
3381 | |
3382 | if (readuchar) |
3383 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
3384 | |
3385 | if (newlinecheck) |
3386 | CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); |
3387 | |
3388 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3389 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3390 | #if defined COMPILE_PCRE8 |
3391 | if (common->utf) |
3392 | { |
3393 | singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
3394 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
3395 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
3396 | JUMPHERE(singlechar); |
3397 | } |
3398 | #elif defined COMPILE_PCRE16 |
3399 | if (common->utf) |
3400 | { |
3401 | singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); |
3402 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
3403 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
3404 | OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
3405 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
3406 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
3407 | JUMPHERE(singlechar); |
3408 | } |
3409 | #endif /* COMPILE_PCRE[8|16] */ |
3410 | #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
3411 | JUMPHERE(start); |
3412 | |
3413 | if (newlinecheck) |
3414 | { |
3415 | JUMPHERE(end); |
3416 | JUMPHERE(end2); |
3417 | } |
3418 | |
3419 | return mainloop; |
3420 | } |
3421 | |
3422 | #define MAX_N_CHARS 16 |
3423 | #define MAX_DIFF_CHARS 6 |
3424 | |
3425 | static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars) |
3426 | { |
3427 | pcre_uchar i, len; |
3428 | |
3429 | len = chars[0]; |
3430 | if (len == 255) |
3431 | return; |
3432 | |
3433 | if (len == 0) |
3434 | { |
3435 | chars[0] = 1; |
3436 | chars[1] = chr; |
3437 | return; |
3438 | } |
3439 | |
3440 | for (i = len; i > 0; i--) |
3441 | if (chars[i] == chr) |
3442 | return; |
3443 | |
3444 | if (len >= MAX_DIFF_CHARS - 1) |
3445 | { |
3446 | chars[0] = 255; |
3447 | return; |
3448 | } |
3449 | |
3450 | len++; |
3451 | chars[len] = chr; |
3452 | chars[0] = len; |
3453 | } |
3454 | |
3455 | static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count) |
3456 | { |
3457 | /* Recursive function, which scans prefix literals. */ |
3458 | BOOL last, any, class, caseless; |
3459 | int len, repeat, len_save, consumed = 0; |
3460 | sljit_u32 chr; /* Any unicode character. */ |
3461 | sljit_u8 *bytes, *bytes_end, byte; |
3462 | pcre_uchar *alternative, *cc_save, *oc; |
3463 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3464 | pcre_uchar othercase[8]; |
3465 | #elif defined SUPPORT_UTF && defined COMPILE_PCRE16 |
3466 | pcre_uchar othercase[2]; |
3467 | #else |
3468 | pcre_uchar othercase[1]; |
3469 | #endif |
3470 | |
3471 | repeat = 1; |
3472 | while (TRUE) |
3473 | { |
3474 | if (*rec_count == 0) |
3475 | return 0; |
3476 | (*rec_count)--; |
3477 | |
3478 | last = TRUE; |
3479 | any = FALSE; |
3480 | class = FALSE; |
3481 | caseless = FALSE; |
3482 | |
3483 | switch (*cc) |
3484 | { |
3485 | case OP_CHARI: |
3486 | caseless = TRUE; |
3487 | case OP_CHAR: |
3488 | last = FALSE; |
3489 | cc++; |
3490 | break; |
3491 | |
3492 | case OP_SOD: |
3493 | case OP_SOM: |
3494 | case OP_SET_SOM: |
3495 | case OP_NOT_WORD_BOUNDARY: |
3496 | case OP_WORD_BOUNDARY: |
3497 | case OP_EODN: |
3498 | case OP_EOD: |
3499 | case OP_CIRC: |
3500 | case OP_CIRCM: |
3501 | case OP_DOLL: |
3502 | case OP_DOLLM: |
3503 | /* Zero width assertions. */ |
3504 | cc++; |
3505 | continue; |
3506 | |
3507 | case OP_ASSERT: |
3508 | case OP_ASSERT_NOT: |
3509 | case OP_ASSERTBACK: |
3510 | case OP_ASSERTBACK_NOT: |
3511 | cc = bracketend(cc); |
3512 | continue; |
3513 | |
3514 | case OP_PLUSI: |
3515 | case OP_MINPLUSI: |
3516 | case OP_POSPLUSI: |
3517 | caseless = TRUE; |
3518 | case OP_PLUS: |
3519 | case OP_MINPLUS: |
3520 | case OP_POSPLUS: |
3521 | cc++; |
3522 | break; |
3523 | |
3524 | case OP_EXACTI: |
3525 | caseless = TRUE; |
3526 | case OP_EXACT: |
3527 | repeat = GET2(cc, 1); |
3528 | last = FALSE; |
3529 | cc += 1 + IMM2_SIZE; |
3530 | break; |
3531 | |
3532 | case OP_QUERYI: |
3533 | case OP_MINQUERYI: |
3534 | case OP_POSQUERYI: |
3535 | caseless = TRUE; |
3536 | case OP_QUERY: |
3537 | case OP_MINQUERY: |
3538 | case OP_POSQUERY: |
3539 | len = 1; |
3540 | cc++; |
3541 | #ifdef SUPPORT_UTF |
3542 | if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); |
3543 | #endif |
3544 | max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count); |
3545 | if (max_chars == 0) |
3546 | return consumed; |
3547 | last = FALSE; |
3548 | break; |
3549 | |
3550 | case OP_KET: |
3551 | cc += 1 + LINK_SIZE; |
3552 | continue; |
3553 | |
3554 | case OP_ALT: |
3555 | cc += GET(cc, 1); |
3556 | continue; |
3557 | |
3558 | case OP_ONCE: |
3559 | case OP_ONCE_NC: |
3560 | case OP_BRA: |
3561 | case OP_BRAPOS: |
3562 | case OP_CBRA: |
3563 | case OP_CBRAPOS: |
3564 | alternative = cc + GET(cc, 1); |
3565 | while (*alternative == OP_ALT) |
3566 | { |
3567 | max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count); |
3568 | if (max_chars == 0) |
3569 | return consumed; |
3570 | alternative += GET(alternative, 1); |
3571 | } |
3572 | |
3573 | if (*cc == OP_CBRA || *cc == OP_CBRAPOS) |
3574 | cc += IMM2_SIZE; |
3575 | cc += 1 + LINK_SIZE; |
3576 | continue; |
3577 | |
3578 | case OP_CLASS: |
3579 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3580 | if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE)) |
3581 | return consumed; |
3582 | #endif |
3583 | class = TRUE; |
3584 | break; |
3585 | |
3586 | case OP_NCLASS: |
3587 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3588 | if (common->utf) return consumed; |
3589 | #endif |
3590 | class = TRUE; |
3591 | break; |
3592 | |
3593 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
3594 | case OP_XCLASS: |
3595 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3596 | if (common->utf) return consumed; |
3597 | #endif |
3598 | any = TRUE; |
3599 | cc += GET(cc, 1); |
3600 | break; |
3601 | #endif |
3602 | |
3603 | case OP_DIGIT: |
3604 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3605 | if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) |
3606 | return consumed; |
3607 | #endif |
3608 | any = TRUE; |
3609 | cc++; |
3610 | break; |
3611 | |
3612 | case OP_WHITESPACE: |
3613 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3614 | if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) |
3615 | return consumed; |
3616 | #endif |
3617 | any = TRUE; |
3618 | cc++; |
3619 | break; |
3620 | |
3621 | case OP_WORDCHAR: |
3622 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3623 | if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) |
3624 | return consumed; |
3625 | #endif |
3626 | any = TRUE; |
3627 | cc++; |
3628 | break; |
3629 | |
3630 | case OP_NOT: |
3631 | case OP_NOTI: |
3632 | cc++; |
3633 | /* Fall through. */ |
3634 | case OP_NOT_DIGIT: |
3635 | case OP_NOT_WHITESPACE: |
3636 | case OP_NOT_WORDCHAR: |
3637 | case OP_ANY: |
3638 | case OP_ALLANY: |
3639 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3640 | if (common->utf) return consumed; |
3641 | #endif |
3642 | any = TRUE; |
3643 | cc++; |
3644 | break; |
3645 | |
3646 | #ifdef SUPPORT_UTF |
3647 | case OP_NOTPROP: |
3648 | case OP_PROP: |
3649 | #ifndef COMPILE_PCRE32 |
3650 | if (common->utf) return consumed; |
3651 | #endif |
3652 | any = TRUE; |
3653 | cc += 1 + 2; |
3654 | break; |
3655 | #endif |
3656 | |
3657 | case OP_TYPEEXACT: |
3658 | repeat = GET2(cc, 1); |
3659 | cc += 1 + IMM2_SIZE; |
3660 | continue; |
3661 | |
3662 | case OP_NOTEXACT: |
3663 | case OP_NOTEXACTI: |
3664 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3665 | if (common->utf) return consumed; |
3666 | #endif |
3667 | any = TRUE; |
3668 | repeat = GET2(cc, 1); |
3669 | cc += 1 + IMM2_SIZE + 1; |
3670 | break; |
3671 | |
3672 | default: |
3673 | return consumed; |
3674 | } |
3675 | |
3676 | if (any) |
3677 | { |
3678 | do |
3679 | { |
3680 | chars[0] = 255; |
3681 | |
3682 | consumed++; |
3683 | if (--max_chars == 0) |
3684 | return consumed; |
3685 | chars += MAX_DIFF_CHARS; |
3686 | } |
3687 | while (--repeat > 0); |
3688 | |
3689 | repeat = 1; |
3690 | continue; |
3691 | } |
3692 | |
3693 | if (class) |
3694 | { |
3695 | bytes = (sljit_u8*) (cc + 1); |
3696 | cc += 1 + 32 / sizeof(pcre_uchar); |
3697 | |
3698 | switch (*cc) |
3699 | { |
3700 | case OP_CRSTAR: |
3701 | case OP_CRMINSTAR: |
3702 | case OP_CRPOSSTAR: |
3703 | case OP_CRQUERY: |
3704 | case OP_CRMINQUERY: |
3705 | case OP_CRPOSQUERY: |
3706 | max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count); |
3707 | if (max_chars == 0) |
3708 | return consumed; |
3709 | break; |
3710 | |
3711 | default: |
3712 | case OP_CRPLUS: |
3713 | case OP_CRMINPLUS: |
3714 | case OP_CRPOSPLUS: |
3715 | break; |
3716 | |
3717 | case OP_CRRANGE: |
3718 | case OP_CRMINRANGE: |
3719 | case OP_CRPOSRANGE: |
3720 | repeat = GET2(cc, 1); |
3721 | if (repeat <= 0) |
3722 | return consumed; |
3723 | break; |
3724 | } |
3725 | |
3726 | do |
3727 | { |
3728 | if (bytes[31] & 0x80) |
3729 | chars[0] = 255; |
3730 | else if (chars[0] != 255) |
3731 | { |
3732 | bytes_end = bytes + 32; |
3733 | chr = 0; |
3734 | do |
3735 | { |
3736 | byte = *bytes++; |
3737 | SLJIT_ASSERT((chr & 0x7) == 0); |
3738 | if (byte == 0) |
3739 | chr += 8; |
3740 | else |
3741 | { |
3742 | do |
3743 | { |
3744 | if ((byte & 0x1) != 0) |
3745 | add_prefix_char(chr, chars); |
3746 | byte >>= 1; |
3747 | chr++; |
3748 | } |
3749 | while (byte != 0); |
3750 | chr = (chr + 7) & ~7; |
3751 | } |
3752 | } |
3753 | while (chars[0] != 255 && bytes < bytes_end); |
3754 | bytes = bytes_end - 32; |
3755 | } |
3756 | |
3757 | consumed++; |
3758 | if (--max_chars == 0) |
3759 | return consumed; |
3760 | chars += MAX_DIFF_CHARS; |
3761 | } |
3762 | while (--repeat > 0); |
3763 | |
3764 | switch (*cc) |
3765 | { |
3766 | case OP_CRSTAR: |
3767 | case OP_CRMINSTAR: |
3768 | case OP_CRPOSSTAR: |
3769 | return consumed; |
3770 | |
3771 | case OP_CRQUERY: |
3772 | case OP_CRMINQUERY: |
3773 | case OP_CRPOSQUERY: |
3774 | cc++; |
3775 | break; |
3776 | |
3777 | case OP_CRRANGE: |
3778 | case OP_CRMINRANGE: |
3779 | case OP_CRPOSRANGE: |
3780 | if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE)) |
3781 | return consumed; |
3782 | cc += 1 + 2 * IMM2_SIZE; |
3783 | break; |
3784 | } |
3785 | |
3786 | repeat = 1; |
3787 | continue; |
3788 | } |
3789 | |
3790 | len = 1; |
3791 | #ifdef SUPPORT_UTF |
3792 | if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); |
3793 | #endif |
3794 | |
3795 | if (caseless && char_has_othercase(common, cc)) |
3796 | { |
3797 | #ifdef SUPPORT_UTF |
3798 | if (common->utf) |
3799 | { |
3800 | GETCHAR(chr, cc); |
3801 | if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len) |
3802 | return consumed; |
3803 | } |
3804 | else |
3805 | #endif |
3806 | { |
3807 | chr = *cc; |
3808 | othercase[0] = TABLE_GET(chr, common->fcc, chr); |
3809 | } |
3810 | } |
3811 | else |
3812 | { |
3813 | caseless = FALSE; |
3814 | othercase[0] = 0; /* Stops compiler warning - PH */ |
3815 | } |
3816 | |
3817 | len_save = len; |
3818 | cc_save = cc; |
3819 | while (TRUE) |
3820 | { |
3821 | oc = othercase; |
3822 | do |
3823 | { |
3824 | chr = *cc; |
3825 | add_prefix_char(*cc, chars); |
3826 | |
3827 | if (caseless) |
3828 | add_prefix_char(*oc, chars); |
3829 | |
3830 | len--; |
3831 | consumed++; |
3832 | if (--max_chars == 0) |
3833 | return consumed; |
3834 | chars += MAX_DIFF_CHARS; |
3835 | cc++; |
3836 | oc++; |
3837 | } |
3838 | while (len > 0); |
3839 | |
3840 | if (--repeat == 0) |
3841 | break; |
3842 | |
3843 | len = len_save; |
3844 | cc = cc_save; |
3845 | } |
3846 | |
3847 | repeat = 1; |
3848 | if (last) |
3849 | return consumed; |
3850 | } |
3851 | } |
3852 | |
3853 | #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) |
3854 | |
3855 | static sljit_s32 character_to_int32(pcre_uchar chr) |
3856 | { |
3857 | sljit_s32 value = (sljit_s32)chr; |
3858 | #if defined COMPILE_PCRE8 |
3859 | #define SSE2_COMPARE_TYPE_INDEX 0 |
3860 | return (value << 24) | (value << 16) | (value << 8) | value; |
3861 | #elif defined COMPILE_PCRE16 |
3862 | #define SSE2_COMPARE_TYPE_INDEX 1 |
3863 | return (value << 16) | value; |
3864 | #elif defined COMPILE_PCRE32 |
3865 | #define SSE2_COMPARE_TYPE_INDEX 2 |
3866 | return value; |
3867 | #else |
3868 | #error "Unsupported unit width" |
3869 | #endif |
3870 | } |
3871 | |
3872 | static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2) |
3873 | { |
3874 | DEFINE_COMPILER; |
3875 | struct sljit_label *start; |
3876 | struct sljit_jump *quit[3]; |
3877 | struct sljit_jump *nomatch; |
3878 | sljit_u8 instruction[8]; |
3879 | sljit_s32 tmp1_ind = sljit_get_register_index(TMP1); |
3880 | sljit_s32 tmp2_ind = sljit_get_register_index(TMP2); |
3881 | sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR); |
3882 | BOOL load_twice = FALSE; |
3883 | pcre_uchar bit; |
3884 | |
3885 | bit = char1 ^ char2; |
3886 | if (!is_powerof2(bit)) |
3887 | bit = 0; |
3888 | |
3889 | if ((char1 != char2) && bit == 0) |
3890 | load_twice = TRUE; |
3891 | |
3892 | quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
3893 | |
3894 | /* First part (unaligned start) */ |
3895 | |
3896 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); |
3897 | |
3898 | SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1); |
3899 | |
3900 | /* MOVD xmm, r/m32 */ |
3901 | instruction[0] = 0x66; |
3902 | instruction[1] = 0x0f; |
3903 | instruction[2] = 0x6e; |
3904 | instruction[3] = 0xc0 | (2 << 3) | tmp1_ind; |
3905 | sljit_emit_op_custom(compiler, instruction, 4); |
3906 | |
3907 | if (char1 != char2) |
3908 | { |
3909 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); |
3910 | |
3911 | /* MOVD xmm, r/m32 */ |
3912 | instruction[3] = 0xc0 | (3 << 3) | tmp1_ind; |
3913 | sljit_emit_op_custom(compiler, instruction, 4); |
3914 | } |
3915 | |
3916 | /* PSHUFD xmm1, xmm2/m128, imm8 */ |
3917 | instruction[2] = 0x70; |
3918 | instruction[3] = 0xc0 | (2 << 3) | 2; |
3919 | instruction[4] = 0; |
3920 | sljit_emit_op_custom(compiler, instruction, 5); |
3921 | |
3922 | if (char1 != char2) |
3923 | { |
3924 | /* PSHUFD xmm1, xmm2/m128, imm8 */ |
3925 | instruction[3] = 0xc0 | (3 << 3) | 3; |
3926 | instruction[4] = 0; |
3927 | sljit_emit_op_custom(compiler, instruction, 5); |
3928 | } |
3929 | |
3930 | OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf); |
3931 | OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); |
3932 | |
3933 | /* MOVDQA xmm1, xmm2/m128 */ |
3934 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3935 | |
3936 | if (str_ptr_ind < 8) |
3937 | { |
3938 | instruction[2] = 0x6f; |
3939 | instruction[3] = (0 << 3) | str_ptr_ind; |
3940 | sljit_emit_op_custom(compiler, instruction, 4); |
3941 | |
3942 | if (load_twice) |
3943 | { |
3944 | instruction[3] = (1 << 3) | str_ptr_ind; |
3945 | sljit_emit_op_custom(compiler, instruction, 4); |
3946 | } |
3947 | } |
3948 | else |
3949 | { |
3950 | instruction[1] = 0x41; |
3951 | instruction[2] = 0x0f; |
3952 | instruction[3] = 0x6f; |
3953 | instruction[4] = (0 << 3) | (str_ptr_ind & 0x7); |
3954 | sljit_emit_op_custom(compiler, instruction, 5); |
3955 | |
3956 | if (load_twice) |
3957 | { |
3958 | instruction[4] = (1 << 3) | str_ptr_ind; |
3959 | sljit_emit_op_custom(compiler, instruction, 5); |
3960 | } |
3961 | instruction[1] = 0x0f; |
3962 | } |
3963 | |
3964 | #else |
3965 | |
3966 | instruction[2] = 0x6f; |
3967 | instruction[3] = (0 << 3) | str_ptr_ind; |
3968 | sljit_emit_op_custom(compiler, instruction, 4); |
3969 | |
3970 | if (load_twice) |
3971 | { |
3972 | instruction[3] = (1 << 3) | str_ptr_ind; |
3973 | sljit_emit_op_custom(compiler, instruction, 4); |
3974 | } |
3975 | |
3976 | #endif |
3977 | |
3978 | if (bit != 0) |
3979 | { |
3980 | /* POR xmm1, xmm2/m128 */ |
3981 | instruction[2] = 0xeb; |
3982 | instruction[3] = 0xc0 | (0 << 3) | 3; |
3983 | sljit_emit_op_custom(compiler, instruction, 4); |
3984 | } |
3985 | |
3986 | /* PCMPEQB/W/D xmm1, xmm2/m128 */ |
3987 | instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; |
3988 | instruction[3] = 0xc0 | (0 << 3) | 2; |
3989 | sljit_emit_op_custom(compiler, instruction, 4); |
3990 | |
3991 | if (load_twice) |
3992 | { |
3993 | instruction[3] = 0xc0 | (1 << 3) | 3; |
3994 | sljit_emit_op_custom(compiler, instruction, 4); |
3995 | } |
3996 | |
3997 | /* PMOVMSKB reg, xmm */ |
3998 | instruction[2] = 0xd7; |
3999 | instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; |
4000 | sljit_emit_op_custom(compiler, instruction, 4); |
4001 | |
4002 | if (load_twice) |
4003 | { |
4004 | OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); |
4005 | instruction[3] = 0xc0 | (tmp2_ind << 3) | 1; |
4006 | sljit_emit_op_custom(compiler, instruction, 4); |
4007 | |
4008 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
4009 | OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); |
4010 | } |
4011 | |
4012 | OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0); |
4013 | |
4014 | /* BSF r32, r/m32 */ |
4015 | instruction[0] = 0x0f; |
4016 | instruction[1] = 0xbc; |
4017 | instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; |
4018 | sljit_emit_op_custom(compiler, instruction, 3); |
4019 | |
4020 | nomatch = JUMP(SLJIT_ZERO); |
4021 | |
4022 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
4023 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4024 | quit[1] = JUMP(SLJIT_JUMP); |
4025 | |
4026 | JUMPHERE(nomatch); |
4027 | |
4028 | start = LABEL(); |
4029 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
4030 | quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4031 | |
4032 | /* Second part (aligned) */ |
4033 | |
4034 | instruction[0] = 0x66; |
4035 | instruction[1] = 0x0f; |
4036 | |
4037 | /* MOVDQA xmm1, xmm2/m128 */ |
4038 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4039 | |
4040 | if (str_ptr_ind < 8) |
4041 | { |
4042 | instruction[2] = 0x6f; |
4043 | instruction[3] = (0 << 3) | str_ptr_ind; |
4044 | sljit_emit_op_custom(compiler, instruction, 4); |
4045 | |
4046 | if (load_twice) |
4047 | { |
4048 | instruction[3] = (1 << 3) | str_ptr_ind; |
4049 | sljit_emit_op_custom(compiler, instruction, 4); |
4050 | } |
4051 | } |
4052 | else |
4053 | { |
4054 | instruction[1] = 0x41; |
4055 | instruction[2] = 0x0f; |
4056 | instruction[3] = 0x6f; |
4057 | instruction[4] = (0 << 3) | (str_ptr_ind & 0x7); |
4058 | sljit_emit_op_custom(compiler, instruction, 5); |
4059 | |
4060 | if (load_twice) |
4061 | { |
4062 | instruction[4] = (1 << 3) | str_ptr_ind; |
4063 | sljit_emit_op_custom(compiler, instruction, 5); |
4064 | } |
4065 | instruction[1] = 0x0f; |
4066 | } |
4067 | |
4068 | #else |
4069 | |
4070 | instruction[2] = 0x6f; |
4071 | instruction[3] = (0 << 3) | str_ptr_ind; |
4072 | sljit_emit_op_custom(compiler, instruction, 4); |
4073 | |
4074 | if (load_twice) |
4075 | { |
4076 | instruction[3] = (1 << 3) | str_ptr_ind; |
4077 | sljit_emit_op_custom(compiler, instruction, 4); |
4078 | } |
4079 | |
4080 | #endif |
4081 | |
4082 | if (bit != 0) |
4083 | { |
4084 | /* POR xmm1, xmm2/m128 */ |
4085 | instruction[2] = 0xeb; |
4086 | instruction[3] = 0xc0 | (0 << 3) | 3; |
4087 | sljit_emit_op_custom(compiler, instruction, 4); |
4088 | } |
4089 | |
4090 | /* PCMPEQB/W/D xmm1, xmm2/m128 */ |
4091 | instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; |
4092 | instruction[3] = 0xc0 | (0 << 3) | 2; |
4093 | sljit_emit_op_custom(compiler, instruction, 4); |
4094 | |
4095 | if (load_twice) |
4096 | { |
4097 | instruction[3] = 0xc0 | (1 << 3) | 3; |
4098 | sljit_emit_op_custom(compiler, instruction, 4); |
4099 | } |
4100 | |
4101 | /* PMOVMSKB reg, xmm */ |
4102 | instruction[2] = 0xd7; |
4103 | instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; |
4104 | sljit_emit_op_custom(compiler, instruction, 4); |
4105 | |
4106 | if (load_twice) |
4107 | { |
4108 | instruction[3] = 0xc0 | (tmp2_ind << 3) | 1; |
4109 | sljit_emit_op_custom(compiler, instruction, 4); |
4110 | |
4111 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
4112 | } |
4113 | |
4114 | /* BSF r32, r/m32 */ |
4115 | instruction[0] = 0x0f; |
4116 | instruction[1] = 0xbc; |
4117 | instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; |
4118 | sljit_emit_op_custom(compiler, instruction, 3); |
4119 | |
4120 | JUMPTO(SLJIT_ZERO, start); |
4121 | |
4122 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4123 | |
4124 | start = LABEL(); |
4125 | SET_LABEL(quit[0], start); |
4126 | SET_LABEL(quit[1], start); |
4127 | SET_LABEL(quit[2], start); |
4128 | } |
4129 | |
4130 | #undef SSE2_COMPARE_TYPE_INDEX |
4131 | |
4132 | #endif |
4133 | |
4134 | static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset) |
4135 | { |
4136 | DEFINE_COMPILER; |
4137 | struct sljit_label *start; |
4138 | struct sljit_jump *quit; |
4139 | struct sljit_jump *found; |
4140 | pcre_uchar mask; |
4141 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4142 | struct sljit_label *utf_start = NULL; |
4143 | struct sljit_jump *utf_quit = NULL; |
4144 | #endif |
4145 | BOOL has_match_end = (common->match_end_ptr != 0); |
4146 | |
4147 | if (offset > 0) |
4148 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); |
4149 | |
4150 | if (has_match_end) |
4151 | { |
4152 | OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
4153 | |
4154 | OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1)); |
4155 | #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) |
4156 | if (sljit_x86_is_cmov_available()) |
4157 | { |
4158 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0); |
4159 | sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0); |
4160 | } |
4161 | #endif |
4162 | { |
4163 | quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0); |
4164 | OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4165 | JUMPHERE(quit); |
4166 | } |
4167 | } |
4168 | |
4169 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4170 | if (common->utf && offset > 0) |
4171 | utf_start = LABEL(); |
4172 | #endif |
4173 | |
4174 | #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) |
4175 | |
4176 | /* SSE2 accelerated first character search. */ |
4177 | |
4178 | if (sljit_x86_is_sse2_available()) |
4179 | { |
4180 | fast_forward_first_char2_sse2(common, char1, char2); |
4181 | |
4182 | SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0); |
4183 | if (common->mode == JIT_COMPILE) |
4184 | { |
4185 | /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */ |
4186 | SLJIT_ASSERT(common->forced_quit_label == NULL); |
4187 | OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); |
4188 | add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
4189 | |
4190 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4191 | if (common->utf && offset > 0) |
4192 | { |
4193 | SLJIT_ASSERT(common->mode == JIT_COMPILE); |
4194 | |
4195 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); |
4196 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4197 | #if defined COMPILE_PCRE8 |
4198 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); |
4199 | CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start); |
4200 | #elif defined COMPILE_PCRE16 |
4201 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
4202 | CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start); |
4203 | #else |
4204 | #error "Unknown code width" |
4205 | #endif |
4206 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4207 | } |
4208 | #endif |
4209 | |
4210 | if (offset > 0) |
4211 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); |
4212 | } |
4213 | else if (sljit_x86_is_cmov_available()) |
4214 | { |
4215 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); |
4216 | sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0); |
4217 | } |
4218 | else |
4219 | { |
4220 | quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
4221 | OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0); |
4222 | JUMPHERE(quit); |
4223 | } |
4224 | |
4225 | if (has_match_end) |
4226 | OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4227 | return; |
4228 | } |
4229 | |
4230 | #endif |
4231 | |
4232 | quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4233 | |
4234 | start = LABEL(); |
4235 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
4236 | |
4237 | if (char1 == char2) |
4238 | found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1); |
4239 | else |
4240 | { |
4241 | mask = char1 ^ char2; |
4242 | if (is_powerof2(mask)) |
4243 | { |
4244 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); |
4245 | found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask); |
4246 | } |
4247 | else |
4248 | { |
4249 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1); |
4250 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
4251 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2); |
4252 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
4253 | found = JUMP(SLJIT_NOT_ZERO); |
4254 | } |
4255 | } |
4256 | |
4257 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4258 | CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start); |
4259 | |
4260 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4261 | if (common->utf && offset > 0) |
4262 | utf_quit = JUMP(SLJIT_JUMP); |
4263 | #endif |
4264 | |
4265 | JUMPHERE(found); |
4266 | |
4267 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4268 | if (common->utf && offset > 0) |
4269 | { |
4270 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); |
4271 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4272 | #if defined COMPILE_PCRE8 |
4273 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); |
4274 | CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start); |
4275 | #elif defined COMPILE_PCRE16 |
4276 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
4277 | CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start); |
4278 | #else |
4279 | #error "Unknown code width" |
4280 | #endif |
4281 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4282 | JUMPHERE(utf_quit); |
4283 | } |
4284 | #endif |
4285 | |
4286 | JUMPHERE(quit); |
4287 | |
4288 | if (has_match_end) |
4289 | { |
4290 | quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
4291 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4292 | if (offset > 0) |
4293 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); |
4294 | JUMPHERE(quit); |
4295 | OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4296 | } |
4297 | |
4298 | if (offset > 0) |
4299 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); |
4300 | } |
4301 | |
4302 | static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common) |
4303 | { |
4304 | DEFINE_COMPILER; |
4305 | struct sljit_label *start; |
4306 | struct sljit_jump *quit; |
4307 | struct sljit_jump *match; |
4308 | /* bytes[0] represent the number of characters between 0 |
4309 | and MAX_N_BYTES - 1, 255 represents any character. */ |
4310 | pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS]; |
4311 | sljit_s32 offset; |
4312 | pcre_uchar mask; |
4313 | pcre_uchar *char_set, *char_set_end; |
4314 | int i, max, from; |
4315 | int range_right = -1, range_len; |
4316 | sljit_u8 *update_table = NULL; |
4317 | BOOL in_range; |
4318 | sljit_u32 rec_count; |
4319 | |
4320 | for (i = 0; i < MAX_N_CHARS; i++) |
4321 | chars[i * MAX_DIFF_CHARS] = 0; |
4322 | |
4323 | rec_count = 10000; |
4324 | max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count); |
4325 | |
4326 | if (max < 1) |
4327 | return FALSE; |
4328 | |
4329 | in_range = FALSE; |
4330 | /* Prevent compiler "uninitialized" warning */ |
4331 | from = 0; |
4332 | range_len = 4 /* minimum length */ - 1; |
4333 | for (i = 0; i <= max; i++) |
4334 | { |
4335 | if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255)) |
4336 | { |
4337 | range_len = i - from; |
4338 | range_right = i - 1; |
4339 | } |
4340 | |
4341 | if (i < max && chars[i * MAX_DIFF_CHARS] < 255) |
4342 | { |
4343 | SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0); |
4344 | if (!in_range) |
4345 | { |
4346 | in_range = TRUE; |
4347 | from = i; |
4348 | } |
4349 | } |
4350 | else |
4351 | in_range = FALSE; |
4352 | } |
4353 | |
4354 | if (range_right >= 0) |
4355 | { |
4356 | update_table = (sljit_u8 *)allocate_read_only_data(common, 256); |
4357 | if (update_table == NULL) |
4358 | return TRUE; |
4359 | memset(update_table, IN_UCHARS(range_len), 256); |
4360 | |
4361 | for (i = 0; i < range_len; i++) |
4362 | { |
4363 | char_set = chars + ((range_right - i) * MAX_DIFF_CHARS); |
4364 | SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255); |
4365 | char_set_end = char_set + char_set[0]; |
4366 | char_set++; |
4367 | while (char_set <= char_set_end) |
4368 | { |
4369 | if (update_table[(*char_set) & 0xff] > IN_UCHARS(i)) |
4370 | update_table[(*char_set) & 0xff] = IN_UCHARS(i); |
4371 | char_set++; |
4372 | } |
4373 | } |
4374 | } |
4375 | |
4376 | offset = -1; |
4377 | /* Scan forward. */ |
4378 | for (i = 0; i < max; i++) |
4379 | { |
4380 | if (offset == -1) |
4381 | { |
4382 | if (chars[i * MAX_DIFF_CHARS] <= 2) |
4383 | offset = i; |
4384 | } |
4385 | else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2) |
4386 | { |
4387 | if (chars[i * MAX_DIFF_CHARS] == 1) |
4388 | offset = i; |
4389 | else |
4390 | { |
4391 | mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2]; |
4392 | if (!is_powerof2(mask)) |
4393 | { |
4394 | mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2]; |
4395 | if (is_powerof2(mask)) |
4396 | offset = i; |
4397 | } |
4398 | } |
4399 | } |
4400 | } |
4401 | |
4402 | if (range_right < 0) |
4403 | { |
4404 | if (offset < 0) |
4405 | return FALSE; |
4406 | SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2); |
4407 | /* Works regardless the value is 1 or 2. */ |
4408 | mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]]; |
4409 | fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset); |
4410 | return TRUE; |
4411 | } |
4412 | |
4413 | if (range_right == offset) |
4414 | offset = -1; |
4415 | |
4416 | SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2)); |
4417 | |
4418 | max -= 1; |
4419 | SLJIT_ASSERT(max > 0); |
4420 | if (common->match_end_ptr != 0) |
4421 | { |
4422 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4423 | OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
4424 | OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); |
4425 | quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0); |
4426 | OP1(SLJIT_MOV, STR_END, 0, TMP1, 0); |
4427 | JUMPHERE(quit); |
4428 | } |
4429 | else |
4430 | OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); |
4431 | |
4432 | SLJIT_ASSERT(range_right >= 0); |
4433 | |
4434 | #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4435 | OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); |
4436 | #endif |
4437 | |
4438 | start = LABEL(); |
4439 | quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4440 | |
4441 | #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) |
4442 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right)); |
4443 | #else |
4444 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1); |
4445 | #endif |
4446 | |
4447 | #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4448 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); |
4449 | #else |
4450 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); |
4451 | #endif |
4452 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4453 | CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); |
4454 | |
4455 | if (offset >= 0) |
4456 | { |
4457 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset)); |
4458 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4459 | |
4460 | if (chars[offset * MAX_DIFF_CHARS] == 1) |
4461 | CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start); |
4462 | else |
4463 | { |
4464 | mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2]; |
4465 | if (is_powerof2(mask)) |
4466 | { |
4467 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); |
4468 | CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start); |
4469 | } |
4470 | else |
4471 | { |
4472 | match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]); |
4473 | CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start); |
4474 | JUMPHERE(match); |
4475 | } |
4476 | } |
4477 | } |
4478 | |
4479 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4480 | if (common->utf && offset != 0) |
4481 | { |
4482 | if (offset < 0) |
4483 | { |
4484 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
4485 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4486 | } |
4487 | else |
4488 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
4489 | #if defined COMPILE_PCRE8 |
4490 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); |
4491 | CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start); |
4492 | #elif defined COMPILE_PCRE16 |
4493 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
4494 | CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start); |
4495 | #else |
4496 | #error "Unknown code width" |
4497 | #endif |
4498 | if (offset < 0) |
4499 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4500 | } |
4501 | #endif |
4502 | |
4503 | if (offset >= 0) |
4504 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4505 | |
4506 | JUMPHERE(quit); |
4507 | |
4508 | if (common->match_end_ptr != 0) |
4509 | { |
4510 | if (range_right >= 0) |
4511 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4512 | OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4513 | if (range_right >= 0) |
4514 | { |
4515 | quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0); |
4516 | OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); |
4517 | JUMPHERE(quit); |
4518 | } |
4519 | } |
4520 | else |
4521 | OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); |
4522 | return TRUE; |
4523 | } |
4524 | |
4525 | #undef MAX_N_CHARS |
4526 | #undef MAX_DIFF_CHARS |
4527 | |
4528 | static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless) |
4529 | { |
4530 | pcre_uchar oc; |
4531 | |
4532 | oc = first_char; |
4533 | if (caseless) |
4534 | { |
4535 | oc = TABLE_GET(first_char, common->fcc, first_char); |
4536 | #if defined SUPPORT_UCP && !defined COMPILE_PCRE8 |
4537 | if (first_char > 127 && common->utf) |
4538 | oc = UCD_OTHERCASE(first_char); |
4539 | #endif |
4540 | } |
4541 | |
4542 | fast_forward_first_char2(common, first_char, oc, 0); |
4543 | } |
4544 | |
4545 | static SLJIT_INLINE void fast_forward_newline(compiler_common *common) |
4546 | { |
4547 | DEFINE_COMPILER; |
4548 | struct sljit_label *loop; |
4549 | struct sljit_jump *lastchar; |
4550 | struct sljit_jump *firstchar; |
4551 | struct sljit_jump *quit; |
4552 | struct sljit_jump *foundcr = NULL; |
4553 | struct sljit_jump *notfoundnl; |
4554 | jump_list *newline = NULL; |
4555 | |
4556 | if (common->match_end_ptr != 0) |
4557 | { |
4558 | OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
4559 | OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4560 | } |
4561 | |
4562 | if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
4563 | { |
4564 | lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4565 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
4566 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
4567 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
4568 | firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); |
4569 | |
4570 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); |
4571 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); |
4572 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL); |
4573 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4574 | OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); |
4575 | #endif |
4576 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
4577 | |
4578 | loop = LABEL(); |
4579 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4580 | quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4581 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); |
4582 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
4583 | CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); |
4584 | CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); |
4585 | |
4586 | JUMPHERE(quit); |
4587 | JUMPHERE(firstchar); |
4588 | JUMPHERE(lastchar); |
4589 | |
4590 | if (common->match_end_ptr != 0) |
4591 | OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4592 | return; |
4593 | } |
4594 | |
4595 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
4596 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
4597 | firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); |
4598 | skip_char_back(common); |
4599 | |
4600 | loop = LABEL(); |
4601 | common->ff_newline_shortcut = loop; |
4602 | |
4603 | read_char_range(common, common->nlmin, common->nlmax, TRUE); |
4604 | lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4605 | if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) |
4606 | foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); |
4607 | check_newlinechar(common, common->nltype, &newline, FALSE); |
4608 | set_jumps(newline, loop); |
4609 | |
4610 | if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) |
4611 | { |
4612 | quit = JUMP(SLJIT_JUMP); |
4613 | JUMPHERE(foundcr); |
4614 | notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4615 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
4616 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); |
4617 | OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
4618 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4619 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); |
4620 | #endif |
4621 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4622 | JUMPHERE(notfoundnl); |
4623 | JUMPHERE(quit); |
4624 | } |
4625 | JUMPHERE(lastchar); |
4626 | JUMPHERE(firstchar); |
4627 | |
4628 | if (common->match_end_ptr != 0) |
4629 | OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4630 | } |
4631 | |
4632 | static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks); |
4633 | |
4634 | static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits) |
4635 | { |
4636 | DEFINE_COMPILER; |
4637 | struct sljit_label *start; |
4638 | struct sljit_jump *quit; |
4639 | struct sljit_jump *found = NULL; |
4640 | jump_list *matches = NULL; |
4641 | #ifndef COMPILE_PCRE8 |
4642 | struct sljit_jump *jump; |
4643 | #endif |
4644 | |
4645 | if (common->match_end_ptr != 0) |
4646 | { |
4647 | OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); |
4648 | OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4649 | } |
4650 | |
4651 | start = LABEL(); |
4652 | quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4653 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
4654 | #ifdef SUPPORT_UTF |
4655 | if (common->utf) |
4656 | OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
4657 | #endif |
4658 | |
4659 | if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches)) |
4660 | { |
4661 | #ifndef COMPILE_PCRE8 |
4662 | jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255); |
4663 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255); |
4664 | JUMPHERE(jump); |
4665 | #endif |
4666 | OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
4667 | OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
4668 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); |
4669 | OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
4670 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
4671 | found = JUMP(SLJIT_NOT_ZERO); |
4672 | } |
4673 | |
4674 | #ifdef SUPPORT_UTF |
4675 | if (common->utf) |
4676 | OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); |
4677 | #endif |
4678 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4679 | #ifdef SUPPORT_UTF |
4680 | #if defined COMPILE_PCRE8 |
4681 | if (common->utf) |
4682 | { |
4683 | CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); |
4684 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
4685 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4686 | } |
4687 | #elif defined COMPILE_PCRE16 |
4688 | if (common->utf) |
4689 | { |
4690 | CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); |
4691 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
4692 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
4693 | OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
4694 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
4695 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4696 | } |
4697 | #endif /* COMPILE_PCRE[8|16] */ |
4698 | #endif /* SUPPORT_UTF */ |
4699 | JUMPTO(SLJIT_JUMP, start); |
4700 | if (found != NULL) |
4701 | JUMPHERE(found); |
4702 | if (matches != NULL) |
4703 | set_jumps(matches, LABEL()); |
4704 | JUMPHERE(quit); |
4705 | |
4706 | if (common->match_end_ptr != 0) |
4707 | OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); |
4708 | } |
4709 | |
4710 | static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar) |
4711 | { |
4712 | DEFINE_COMPILER; |
4713 | struct sljit_label *loop; |
4714 | struct sljit_jump *toolong; |
4715 | struct sljit_jump *alreadyfound; |
4716 | struct sljit_jump *found; |
4717 | struct sljit_jump *foundoc = NULL; |
4718 | struct sljit_jump *notfound; |
4719 | sljit_u32 oc, bit; |
4720 | |
4721 | SLJIT_ASSERT(common->req_char_ptr != 0); |
4722 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); |
4723 | OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX); |
4724 | toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0); |
4725 | alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); |
4726 | |
4727 | if (has_firstchar) |
4728 | OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4729 | else |
4730 | OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); |
4731 | |
4732 | loop = LABEL(); |
4733 | notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0); |
4734 | |
4735 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); |
4736 | oc = req_char; |
4737 | if (caseless) |
4738 | { |
4739 | oc = TABLE_GET(req_char, common->fcc, req_char); |
4740 | #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
4741 | if (req_char > 127 && common->utf) |
4742 | oc = UCD_OTHERCASE(req_char); |
4743 | #endif |
4744 | } |
4745 | if (req_char == oc) |
4746 | found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); |
4747 | else |
4748 | { |
4749 | bit = req_char ^ oc; |
4750 | if (is_powerof2(bit)) |
4751 | { |
4752 | OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); |
4753 | found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); |
4754 | } |
4755 | else |
4756 | { |
4757 | found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); |
4758 | foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc); |
4759 | } |
4760 | } |
4761 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
4762 | JUMPTO(SLJIT_JUMP, loop); |
4763 | |
4764 | JUMPHERE(found); |
4765 | if (foundoc) |
4766 | JUMPHERE(foundoc); |
4767 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0); |
4768 | JUMPHERE(alreadyfound); |
4769 | JUMPHERE(toolong); |
4770 | return notfound; |
4771 | } |
4772 | |
4773 | static void do_revertframes(compiler_common *common) |
4774 | { |
4775 | DEFINE_COMPILER; |
4776 | struct sljit_jump *jump; |
4777 | struct sljit_label *mainloop; |
4778 | |
4779 | sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
4780 | OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0); |
4781 | GET_LOCAL_BASE(TMP3, 0, 0); |
4782 | |
4783 | /* Drop frames until we reach STACK_TOP. */ |
4784 | mainloop = LABEL(); |
4785 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0); |
4786 | OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0); |
4787 | jump = JUMP(SLJIT_SIG_LESS_EQUAL); |
4788 | |
4789 | OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); |
4790 | OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); |
4791 | OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw)); |
4792 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); |
4793 | JUMPTO(SLJIT_JUMP, mainloop); |
4794 | |
4795 | JUMPHERE(jump); |
4796 | jump = JUMP(SLJIT_SIG_LESS); |
4797 | /* End of dropping frames. */ |
4798 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
4799 | |
4800 | JUMPHERE(jump); |
4801 | OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); |
4802 | OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); |
4803 | OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); |
4804 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); |
4805 | JUMPTO(SLJIT_JUMP, mainloop); |
4806 | } |
4807 | |
4808 | static void check_wordboundary(compiler_common *common) |
4809 | { |
4810 | DEFINE_COMPILER; |
4811 | struct sljit_jump *skipread; |
4812 | jump_list *skipread_list = NULL; |
4813 | #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF |
4814 | struct sljit_jump *jump; |
4815 | #endif |
4816 | |
4817 | SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); |
4818 | |
4819 | sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
4820 | /* Get type of the previous char, and put it to LOCALS1. */ |
4821 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
4822 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
4823 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0); |
4824 | skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0); |
4825 | skip_char_back(common); |
4826 | check_start_used_ptr(common); |
4827 | read_char(common); |
4828 | |
4829 | /* Testing char type. */ |
4830 | #ifdef SUPPORT_UCP |
4831 | if (common->use_ucp) |
4832 | { |
4833 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); |
4834 | jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); |
4835 | add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); |
4836 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); |
4837 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); |
4838 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); |
4839 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); |
4840 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); |
4841 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); |
4842 | JUMPHERE(jump); |
4843 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0); |
4844 | } |
4845 | else |
4846 | #endif |
4847 | { |
4848 | #ifndef COMPILE_PCRE8 |
4849 | jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
4850 | #elif defined SUPPORT_UTF |
4851 | /* Here LOCALS1 has already been zeroed. */ |
4852 | jump = NULL; |
4853 | if (common->utf) |
4854 | jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
4855 | #endif /* COMPILE_PCRE8 */ |
4856 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); |
4857 | OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); |
4858 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
4859 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); |
4860 | #ifndef COMPILE_PCRE8 |
4861 | JUMPHERE(jump); |
4862 | #elif defined SUPPORT_UTF |
4863 | if (jump != NULL) |
4864 | JUMPHERE(jump); |
4865 | #endif /* COMPILE_PCRE8 */ |
4866 | } |
4867 | JUMPHERE(skipread); |
4868 | |
4869 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); |
4870 | check_str_end(common, &skipread_list); |
4871 | peek_char(common, READ_CHAR_MAX); |
4872 | |
4873 | /* Testing char type. This is a code duplication. */ |
4874 | #ifdef SUPPORT_UCP |
4875 | if (common->use_ucp) |
4876 | { |
4877 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); |
4878 | jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); |
4879 | add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); |
4880 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); |
4881 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); |
4882 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); |
4883 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); |
4884 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); |
4885 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); |
4886 | JUMPHERE(jump); |
4887 | } |
4888 | else |
4889 | #endif |
4890 | { |
4891 | #ifndef COMPILE_PCRE8 |
4892 | /* TMP2 may be destroyed by peek_char. */ |
4893 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); |
4894 | jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
4895 | #elif defined SUPPORT_UTF |
4896 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); |
4897 | jump = NULL; |
4898 | if (common->utf) |
4899 | jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
4900 | #endif |
4901 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); |
4902 | OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); |
4903 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); |
4904 | #ifndef COMPILE_PCRE8 |
4905 | JUMPHERE(jump); |
4906 | #elif defined SUPPORT_UTF |
4907 | if (jump != NULL) |
4908 | JUMPHERE(jump); |
4909 | #endif /* COMPILE_PCRE8 */ |
4910 | } |
4911 | set_jumps(skipread_list, LABEL()); |
4912 | |
4913 | OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); |
4914 | sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
4915 | } |
4916 | |
4917 | static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) |
4918 | { |
4919 | /* May destroy TMP1. */ |
4920 | DEFINE_COMPILER; |
4921 | int ranges[MAX_RANGE_SIZE]; |
4922 | sljit_u8 bit, cbit, all; |
4923 | int i, byte, length = 0; |
4924 | |
4925 | bit = bits[0] & 0x1; |
4926 | /* All bits will be zero or one (since bit is zero or one). */ |
4927 | all = -bit; |
4928 | |
4929 | for (i = 0; i < 256; ) |
4930 | { |
4931 | byte = i >> 3; |
4932 | if ((i & 0x7) == 0 && bits[byte] == all) |
4933 | i += 8; |
4934 | else |
4935 | { |
4936 | cbit = (bits[byte] >> (i & 0x7)) & 0x1; |
4937 | if (cbit != bit) |
4938 | { |
4939 | if (length >= MAX_RANGE_SIZE) |
4940 | return FALSE; |
4941 | ranges[length] = i; |
4942 | length++; |
4943 | bit = cbit; |
4944 | all = -cbit; |
4945 | } |
4946 | i++; |
4947 | } |
4948 | } |
4949 | |
4950 | if (((bit == 0) && nclass) || ((bit == 1) && !nclass)) |
4951 | { |
4952 | if (length >= MAX_RANGE_SIZE) |
4953 | return FALSE; |
4954 | ranges[length] = 256; |
4955 | length++; |
4956 | } |
4957 | |
4958 | if (length < 0 || length > 4) |
4959 | return FALSE; |
4960 | |
4961 | bit = bits[0] & 0x1; |
4962 | if (invert) bit ^= 0x1; |
4963 | |
4964 | /* No character is accepted. */ |
4965 | if (length == 0 && bit == 0) |
4966 | add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
4967 | |
4968 | switch(length) |
4969 | { |
4970 | case 0: |
4971 | /* When bit != 0, all characters are accepted. */ |
4972 | return TRUE; |
4973 | |
4974 | case 1: |
4975 | add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
4976 | return TRUE; |
4977 | |
4978 | case 2: |
4979 | if (ranges[0] + 1 != ranges[1]) |
4980 | { |
4981 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
4982 | add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
4983 | } |
4984 | else |
4985 | add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
4986 | return TRUE; |
4987 | |
4988 | case 3: |
4989 | if (bit != 0) |
4990 | { |
4991 | add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); |
4992 | if (ranges[0] + 1 != ranges[1]) |
4993 | { |
4994 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
4995 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
4996 | } |
4997 | else |
4998 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
4999 | return TRUE; |
5000 | } |
5001 | |
5002 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0])); |
5003 | if (ranges[1] + 1 != ranges[2]) |
5004 | { |
5005 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]); |
5006 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); |
5007 | } |
5008 | else |
5009 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1])); |
5010 | return TRUE; |
5011 | |
5012 | case 4: |
5013 | if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2]) |
5014 | && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2] |
5015 | && (ranges[1] & (ranges[2] - ranges[0])) == 0 |
5016 | && is_powerof2(ranges[2] - ranges[0])) |
5017 | { |
5018 | SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0); |
5019 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]); |
5020 | if (ranges[2] + 1 != ranges[3]) |
5021 | { |
5022 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); |
5023 | add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); |
5024 | } |
5025 | else |
5026 | add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); |
5027 | return TRUE; |
5028 | } |
5029 | |
5030 | if (bit != 0) |
5031 | { |
5032 | i = 0; |
5033 | if (ranges[0] + 1 != ranges[1]) |
5034 | { |
5035 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
5036 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
5037 | i = ranges[0]; |
5038 | } |
5039 | else |
5040 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
5041 | |
5042 | if (ranges[2] + 1 != ranges[3]) |
5043 | { |
5044 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i); |
5045 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); |
5046 | } |
5047 | else |
5048 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i)); |
5049 | return TRUE; |
5050 | } |
5051 | |
5052 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
5053 | add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0])); |
5054 | if (ranges[1] + 1 != ranges[2]) |
5055 | { |
5056 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]); |
5057 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); |
5058 | } |
5059 | else |
5060 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
5061 | return TRUE; |
5062 | |
5063 | default: |
5064 | SLJIT_ASSERT_STOP(); |
5065 | return FALSE; |
5066 | } |
5067 | } |
5068 | |
5069 | static void check_anynewline(compiler_common *common) |
5070 | { |
5071 | /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ |
5072 | DEFINE_COMPILER; |
5073 | |
5074 | sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
5075 | |
5076 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); |
5077 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); |
5078 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); |
5079 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); |
5080 | #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5081 | #ifdef COMPILE_PCRE8 |
5082 | if (common->utf) |
5083 | { |
5084 | #endif |
5085 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5086 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); |
5087 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); |
5088 | #ifdef COMPILE_PCRE8 |
5089 | } |
5090 | #endif |
5091 | #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ |
5092 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5093 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
5094 | } |
5095 | |
5096 | static void check_hspace(compiler_common *common) |
5097 | { |
5098 | /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ |
5099 | DEFINE_COMPILER; |
5100 | |
5101 | sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
5102 | |
5103 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); |
5104 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
5105 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); |
5106 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5107 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); |
5108 | #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5109 | #ifdef COMPILE_PCRE8 |
5110 | if (common->utf) |
5111 | { |
5112 | #endif |
5113 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5114 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); |
5115 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5116 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); |
5117 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5118 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); |
5119 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); |
5120 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); |
5121 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); |
5122 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5123 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); |
5124 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5125 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); |
5126 | #ifdef COMPILE_PCRE8 |
5127 | } |
5128 | #endif |
5129 | #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ |
5130 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5131 | |
5132 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
5133 | } |
5134 | |
5135 | static void check_vspace(compiler_common *common) |
5136 | { |
5137 | /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ |
5138 | DEFINE_COMPILER; |
5139 | |
5140 | sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
5141 | |
5142 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); |
5143 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); |
5144 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); |
5145 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); |
5146 | #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5147 | #ifdef COMPILE_PCRE8 |
5148 | if (common->utf) |
5149 | { |
5150 | #endif |
5151 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5152 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); |
5153 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); |
5154 | #ifdef COMPILE_PCRE8 |
5155 | } |
5156 | #endif |
5157 | #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ |
5158 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5159 | |
5160 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
5161 | } |
5162 | |
5163 | #define CHAR1 STR_END |
5164 | #define CHAR2 STACK_TOP |
5165 | |
5166 | static void do_casefulcmp(compiler_common *common) |
5167 | { |
5168 | DEFINE_COMPILER; |
5169 | struct sljit_jump *jump; |
5170 | struct sljit_label *label; |
5171 | |
5172 | sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
5173 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
5174 | OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0); |
5175 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0); |
5176 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
5177 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
5178 | |
5179 | label = LABEL(); |
5180 | OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); |
5181 | OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
5182 | jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0); |
5183 | OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
5184 | JUMPTO(SLJIT_NOT_ZERO, label); |
5185 | |
5186 | JUMPHERE(jump); |
5187 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
5188 | OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0); |
5189 | OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
5190 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
5191 | } |
5192 | |
5193 | #define LCC_TABLE STACK_LIMIT |
5194 | |
5195 | static void do_caselesscmp(compiler_common *common) |
5196 | { |
5197 | DEFINE_COMPILER; |
5198 | struct sljit_jump *jump; |
5199 | struct sljit_label *label; |
5200 | |
5201 | sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
5202 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
5203 | |
5204 | OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0); |
5205 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0); |
5206 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0); |
5207 | OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc); |
5208 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
5209 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
5210 | |
5211 | label = LABEL(); |
5212 | OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); |
5213 | OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
5214 | #ifndef COMPILE_PCRE8 |
5215 | jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255); |
5216 | #endif |
5217 | OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0); |
5218 | #ifndef COMPILE_PCRE8 |
5219 | JUMPHERE(jump); |
5220 | jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255); |
5221 | #endif |
5222 | OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0); |
5223 | #ifndef COMPILE_PCRE8 |
5224 | JUMPHERE(jump); |
5225 | #endif |
5226 | jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0); |
5227 | OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
5228 | JUMPTO(SLJIT_NOT_ZERO, label); |
5229 | |
5230 | JUMPHERE(jump); |
5231 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
5232 | OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0); |
5233 | OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
5234 | OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); |
5235 | sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
5236 | } |
5237 | |
5238 | #undef LCC_TABLE |
5239 | #undef CHAR1 |
5240 | #undef CHAR2 |
5241 | |
5242 | #if defined SUPPORT_UTF && defined SUPPORT_UCP |
5243 | |
5244 | static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1) |
5245 | { |
5246 | /* This function would be ineffective to do in JIT level. */ |
5247 | sljit_u32 c1, c2; |
5248 | const pcre_uchar *src2 = args->uchar_ptr; |
5249 | const pcre_uchar *end2 = args->end; |
5250 | const ucd_record *ur; |
5251 | const sljit_u32 *pp; |
5252 | |
5253 | while (src1 < end1) |
5254 | { |
5255 | if (src2 >= end2) |
5256 | return (pcre_uchar*)1; |
5257 | GETCHARINC(c1, src1); |
5258 | GETCHARINC(c2, src2); |
5259 | ur = GET_UCD(c2); |
5260 | if (c1 != c2 && c1 != c2 + ur->other_case) |
5261 | { |
5262 | pp = PRIV(ucd_caseless_sets) + ur->caseset; |
5263 | for (;;) |
5264 | { |
5265 | if (c1 < *pp) return NULL; |
5266 | if (c1 == *pp++) break; |
5267 | } |
5268 | } |
5269 | } |
5270 | return src2; |
5271 | } |
5272 | |
5273 | #endif /* SUPPORT_UTF && SUPPORT_UCP */ |
5274 | |
5275 | static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, |
5276 | compare_context *context, jump_list **backtracks) |
5277 | { |
5278 | DEFINE_COMPILER; |
5279 | unsigned int othercasebit = 0; |
5280 | pcre_uchar *othercasechar = NULL; |
5281 | #ifdef SUPPORT_UTF |
5282 | int utflength; |
5283 | #endif |
5284 | |
5285 | if (caseless && char_has_othercase(common, cc)) |
5286 | { |
5287 | othercasebit = char_get_othercase_bit(common, cc); |
5288 | SLJIT_ASSERT(othercasebit); |
5289 | /* Extracting bit difference info. */ |
5290 | #if defined COMPILE_PCRE8 |
5291 | othercasechar = cc + (othercasebit >> 8); |
5292 | othercasebit &= 0xff; |
5293 | #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5294 | /* Note that this code only handles characters in the BMP. If there |
5295 | ever are characters outside the BMP whose othercase differs in only one |
5296 | bit from itself (there currently are none), this code will need to be |
5297 | revised for COMPILE_PCRE32. */ |
5298 | othercasechar = cc + (othercasebit >> 9); |
5299 | if ((othercasebit & 0x100) != 0) |
5300 | othercasebit = (othercasebit & 0xff) << 8; |
5301 | else |
5302 | othercasebit &= 0xff; |
5303 | #endif /* COMPILE_PCRE[8|16|32] */ |
5304 | } |
5305 | |
5306 | if (context->sourcereg == -1) |
5307 | { |
5308 | #if defined COMPILE_PCRE8 |
5309 | #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
5310 | if (context->length >= 4) |
5311 | OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5312 | else if (context->length >= 2) |
5313 | OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5314 | else |
5315 | #endif |
5316 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5317 | #elif defined COMPILE_PCRE16 |
5318 | #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
5319 | if (context->length >= 4) |
5320 | OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5321 | else |
5322 | #endif |
5323 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5324 | #elif defined COMPILE_PCRE32 |
5325 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5326 | #endif /* COMPILE_PCRE[8|16|32] */ |
5327 | context->sourcereg = TMP2; |
5328 | } |
5329 | |
5330 | #ifdef SUPPORT_UTF |
5331 | utflength = 1; |
5332 | if (common->utf && HAS_EXTRALEN(*cc)) |
5333 | utflength += GET_EXTRALEN(*cc); |
5334 | |
5335 | do |
5336 | { |
5337 | #endif |
5338 | |
5339 | context->length -= IN_UCHARS(1); |
5340 | #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) |
5341 | |
5342 | /* Unaligned read is supported. */ |
5343 | if (othercasebit != 0 && othercasechar == cc) |
5344 | { |
5345 | context->c.asuchars[context->ucharptr] = *cc | othercasebit; |
5346 | context->oc.asuchars[context->ucharptr] = othercasebit; |
5347 | } |
5348 | else |
5349 | { |
5350 | context->c.asuchars[context->ucharptr] = *cc; |
5351 | context->oc.asuchars[context->ucharptr] = 0; |
5352 | } |
5353 | context->ucharptr++; |
5354 | |
5355 | #if defined COMPILE_PCRE8 |
5356 | if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) |
5357 | #else |
5358 | if (context->ucharptr >= 2 || context->length == 0) |
5359 | #endif |
5360 | { |
5361 | if (context->length >= 4) |
5362 | OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5363 | else if (context->length >= 2) |
5364 | OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5365 | #if defined COMPILE_PCRE8 |
5366 | else if (context->length >= 1) |
5367 | OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5368 | #endif /* COMPILE_PCRE8 */ |
5369 | context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; |
5370 | |
5371 | switch(context->ucharptr) |
5372 | { |
5373 | case 4 / sizeof(pcre_uchar): |
5374 | if (context->oc.asint != 0) |
5375 | OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); |
5376 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); |
5377 | break; |
5378 | |
5379 | case 2 / sizeof(pcre_uchar): |
5380 | if (context->oc.asushort != 0) |
5381 | OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); |
5382 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); |
5383 | break; |
5384 | |
5385 | #ifdef COMPILE_PCRE8 |
5386 | case 1: |
5387 | if (context->oc.asbyte != 0) |
5388 | OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); |
5389 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); |
5390 | break; |
5391 | #endif |
5392 | |
5393 | default: |
5394 | SLJIT_ASSERT_STOP(); |
5395 | break; |
5396 | } |
5397 | context->ucharptr = 0; |
5398 | } |
5399 | |
5400 | #else |
5401 | |
5402 | /* Unaligned read is unsupported or in 32 bit mode. */ |
5403 | if (context->length >= 1) |
5404 | OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5405 | |
5406 | context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; |
5407 | |
5408 | if (othercasebit != 0 && othercasechar == cc) |
5409 | { |
5410 | OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); |
5411 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); |
5412 | } |
5413 | else |
5414 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); |
5415 | |
5416 | #endif |
5417 | |
5418 | cc++; |
5419 | #ifdef SUPPORT_UTF |
5420 | utflength--; |
5421 | } |
5422 | while (utflength > 0); |
5423 | #endif |
5424 | |
5425 | return cc; |
5426 | } |
5427 | |
5428 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
5429 | |
5430 | #define SET_TYPE_OFFSET(value) \ |
5431 | if ((value) != typeoffset) \ |
5432 | { \ |
5433 | if ((value) < typeoffset) \ |
5434 | OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \ |
5435 | else \ |
5436 | OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \ |
5437 | } \ |
5438 | typeoffset = (value); |
5439 | |
5440 | #define SET_CHAR_OFFSET(value) \ |
5441 | if ((value) != charoffset) \ |
5442 | { \ |
5443 | if ((value) < charoffset) \ |
5444 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \ |
5445 | else \ |
5446 | OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \ |
5447 | } \ |
5448 | charoffset = (value); |
5449 | |
5450 | static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr); |
5451 | |
5452 | static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) |
5453 | { |
5454 | DEFINE_COMPILER; |
5455 | jump_list *found = NULL; |
5456 | jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; |
5457 | sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX; |
5458 | struct sljit_jump *jump = NULL; |
5459 | pcre_uchar *ccbegin; |
5460 | int compares, invertcmp, numberofcmps; |
5461 | #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) |
5462 | BOOL utf = common->utf; |
5463 | #endif |
5464 | |
5465 | #ifdef SUPPORT_UCP |
5466 | BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; |
5467 | BOOL charsaved = FALSE; |
5468 | int typereg = TMP1; |
5469 | const sljit_u32 *other_cases; |
5470 | sljit_uw typeoffset; |
5471 | #endif |
5472 | |
5473 | /* Scanning the necessary info. */ |
5474 | cc++; |
5475 | ccbegin = cc; |
5476 | compares = 0; |
5477 | if (cc[-1] & XCL_MAP) |
5478 | { |
5479 | min = 0; |
5480 | cc += 32 / sizeof(pcre_uchar); |
5481 | } |
5482 | |
5483 | while (*cc != XCL_END) |
5484 | { |
5485 | compares++; |
5486 | if (*cc == XCL_SINGLE) |
5487 | { |
5488 | cc ++; |
5489 | GETCHARINCTEST(c, cc); |
5490 | if (c > max) max = c; |
5491 | if (c < min) min = c; |
5492 | #ifdef SUPPORT_UCP |
5493 | needschar = TRUE; |
5494 | #endif |
5495 | } |
5496 | else if (*cc == XCL_RANGE) |
5497 | { |
5498 | cc ++; |
5499 | GETCHARINCTEST(c, cc); |
5500 | if (c < min) min = c; |
5501 | GETCHARINCTEST(c, cc); |
5502 | if (c > max) max = c; |
5503 | #ifdef SUPPORT_UCP |
5504 | needschar = TRUE; |
5505 | #endif |
5506 | } |
5507 | #ifdef SUPPORT_UCP |
5508 | else |
5509 | { |
5510 | SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); |
5511 | cc++; |
5512 | if (*cc == PT_CLIST) |
5513 | { |
5514 | other_cases = PRIV(ucd_caseless_sets) + cc[1]; |
5515 | while (*other_cases != NOTACHAR) |
5516 | { |
5517 | if (*other_cases > max) max = *other_cases; |
5518 | if (*other_cases < min) min = *other_cases; |
5519 | other_cases++; |
5520 | } |
5521 | } |
5522 | else |
5523 | { |
5524 | max = READ_CHAR_MAX; |
5525 | min = 0; |
5526 | } |
5527 | |
5528 | switch(*cc) |
5529 | { |
5530 | case PT_ANY: |
5531 | /* Any either accepts everything or ignored. */ |
5532 | if (cc[-1] == XCL_PROP) |
5533 | { |
5534 | compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); |
5535 | if (list == backtracks) |
5536 | add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
5537 | return; |
5538 | } |
5539 | break; |
5540 | |
5541 | case PT_LAMP: |
5542 | case PT_GC: |
5543 | case PT_PC: |
5544 | case PT_ALNUM: |
5545 | needstype = TRUE; |
5546 | break; |
5547 | |
5548 | case PT_SC: |
5549 | needsscript = TRUE; |
5550 | break; |
5551 | |
5552 | case PT_SPACE: |
5553 | case PT_PXSPACE: |
5554 | case PT_WORD: |
5555 | case PT_PXGRAPH: |
5556 | case PT_PXPRINT: |
5557 | case PT_PXPUNCT: |
5558 | needstype = TRUE; |
5559 | needschar = TRUE; |
5560 | break; |
5561 | |
5562 | case PT_CLIST: |
5563 | case PT_UCNC: |
5564 | needschar = TRUE; |
5565 | break; |
5566 | |
5567 | default: |
5568 | SLJIT_ASSERT_STOP(); |
5569 | break; |
5570 | } |
5571 | cc += 2; |
5572 | } |
5573 | #endif |
5574 | } |
5575 | SLJIT_ASSERT(compares > 0); |
5576 | |
5577 | /* We are not necessary in utf mode even in 8 bit mode. */ |
5578 | cc = ccbegin; |
5579 | read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0); |
5580 | |
5581 | if ((cc[-1] & XCL_HASPROP) == 0) |
5582 | { |
5583 | if ((cc[-1] & XCL_MAP) != 0) |
5584 | { |
5585 | jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
5586 | if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found)) |
5587 | { |
5588 | OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
5589 | OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
5590 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
5591 | OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
5592 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
5593 | add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); |
5594 | } |
5595 | |
5596 | add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
5597 | JUMPHERE(jump); |
5598 | |
5599 | cc += 32 / sizeof(pcre_uchar); |
5600 | } |
5601 | else |
5602 | { |
5603 | OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min); |
5604 | add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min)); |
5605 | } |
5606 | } |
5607 | else if ((cc[-1] & XCL_MAP) != 0) |
5608 | { |
5609 | OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); |
5610 | #ifdef SUPPORT_UCP |
5611 | charsaved = TRUE; |
5612 | #endif |
5613 | if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) |
5614 | { |
5615 | #ifdef COMPILE_PCRE8 |
5616 | jump = NULL; |
5617 | if (common->utf) |
5618 | #endif |
5619 | jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
5620 | |
5621 | OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
5622 | OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
5623 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
5624 | OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
5625 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
5626 | add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); |
5627 | |
5628 | #ifdef COMPILE_PCRE8 |
5629 | if (common->utf) |
5630 | #endif |
5631 | JUMPHERE(jump); |
5632 | } |
5633 | |
5634 | OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); |
5635 | cc += 32 / sizeof(pcre_uchar); |
5636 | } |
5637 | |
5638 | #ifdef SUPPORT_UCP |
5639 | if (needstype || needsscript) |
5640 | { |
5641 | if (needschar && !charsaved) |
5642 | OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); |
5643 | |
5644 | OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); |
5645 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); |
5646 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); |
5647 | OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); |
5648 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); |
5649 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); |
5650 | OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); |
5651 | |
5652 | /* Before anything else, we deal with scripts. */ |
5653 | if (needsscript) |
5654 | { |
5655 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); |
5656 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); |
5657 | |
5658 | ccbegin = cc; |
5659 | |
5660 | while (*cc != XCL_END) |
5661 | { |
5662 | if (*cc == XCL_SINGLE) |
5663 | { |
5664 | cc ++; |
5665 | GETCHARINCTEST(c, cc); |
5666 | } |
5667 | else if (*cc == XCL_RANGE) |
5668 | { |
5669 | cc ++; |
5670 | GETCHARINCTEST(c, cc); |
5671 | GETCHARINCTEST(c, cc); |
5672 | } |
5673 | else |
5674 | { |
5675 | SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); |
5676 | cc++; |
5677 | if (*cc == PT_SC) |
5678 | { |
5679 | compares--; |
5680 | invertcmp = (compares == 0 && list != backtracks); |
5681 | if (cc[-1] == XCL_NOTPROP) |
5682 | invertcmp ^= 0x1; |
5683 | jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]); |
5684 | add_jump(compiler, compares > 0 ? list : backtracks, jump); |
5685 | } |
5686 | cc += 2; |
5687 | } |
5688 | } |
5689 | |
5690 | cc = ccbegin; |
5691 | } |
5692 | |
5693 | if (needschar) |
5694 | { |
5695 | OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); |
5696 | } |
5697 | |
5698 | if (needstype) |
5699 | { |
5700 | if (!needschar) |
5701 | { |
5702 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); |
5703 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); |
5704 | } |
5705 | else |
5706 | { |
5707 | OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); |
5708 | OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); |
5709 | typereg = RETURN_ADDR; |
5710 | } |
5711 | } |
5712 | } |
5713 | #endif |
5714 | |
5715 | /* Generating code. */ |
5716 | charoffset = 0; |
5717 | numberofcmps = 0; |
5718 | #ifdef SUPPORT_UCP |
5719 | typeoffset = 0; |
5720 | #endif |
5721 | |
5722 | while (*cc != XCL_END) |
5723 | { |
5724 | compares--; |
5725 | invertcmp = (compares == 0 && list != backtracks); |
5726 | jump = NULL; |
5727 | |
5728 | if (*cc == XCL_SINGLE) |
5729 | { |
5730 | cc ++; |
5731 | GETCHARINCTEST(c, cc); |
5732 | |
5733 | if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) |
5734 | { |
5735 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5736 | OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL); |
5737 | numberofcmps++; |
5738 | } |
5739 | else if (numberofcmps > 0) |
5740 | { |
5741 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5742 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5743 | jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5744 | numberofcmps = 0; |
5745 | } |
5746 | else |
5747 | { |
5748 | jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5749 | numberofcmps = 0; |
5750 | } |
5751 | } |
5752 | else if (*cc == XCL_RANGE) |
5753 | { |
5754 | cc ++; |
5755 | GETCHARINCTEST(c, cc); |
5756 | SET_CHAR_OFFSET(c); |
5757 | GETCHARINCTEST(c, cc); |
5758 | |
5759 | if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) |
5760 | { |
5761 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5762 | OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL); |
5763 | numberofcmps++; |
5764 | } |
5765 | else if (numberofcmps > 0) |
5766 | { |
5767 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5768 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); |
5769 | jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5770 | numberofcmps = 0; |
5771 | } |
5772 | else |
5773 | { |
5774 | jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5775 | numberofcmps = 0; |
5776 | } |
5777 | } |
5778 | #ifdef SUPPORT_UCP |
5779 | else |
5780 | { |
5781 | SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); |
5782 | if (*cc == XCL_NOTPROP) |
5783 | invertcmp ^= 0x1; |
5784 | cc++; |
5785 | switch(*cc) |
5786 | { |
5787 | case PT_ANY: |
5788 | if (!invertcmp) |
5789 | jump = JUMP(SLJIT_JUMP); |
5790 | break; |
5791 | |
5792 | case PT_LAMP: |
5793 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); |
5794 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
5795 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); |
5796 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5797 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); |
5798 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5799 | jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5800 | break; |
5801 | |
5802 | case PT_GC: |
5803 | c = PRIV(ucp_typerange)[(int)cc[1] * 2]; |
5804 | SET_TYPE_OFFSET(c); |
5805 | jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); |
5806 | break; |
5807 | |
5808 | case PT_PC: |
5809 | jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset); |
5810 | break; |
5811 | |
5812 | case PT_SC: |
5813 | compares++; |
5814 | /* Do nothing. */ |
5815 | break; |
5816 | |
5817 | case PT_SPACE: |
5818 | case PT_PXSPACE: |
5819 | SET_CHAR_OFFSET(9); |
5820 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); |
5821 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); |
5822 | |
5823 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); |
5824 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5825 | |
5826 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); |
5827 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5828 | |
5829 | SET_TYPE_OFFSET(ucp_Zl); |
5830 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); |
5831 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); |
5832 | jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5833 | break; |
5834 | |
5835 | case PT_WORD: |
5836 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset)); |
5837 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
5838 | /* Fall through. */ |
5839 | |
5840 | case PT_ALNUM: |
5841 | SET_TYPE_OFFSET(ucp_Ll); |
5842 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); |
5843 | OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL); |
5844 | SET_TYPE_OFFSET(ucp_Nd); |
5845 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); |
5846 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); |
5847 | jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5848 | break; |
5849 | |
5850 | case PT_CLIST: |
5851 | other_cases = PRIV(ucd_caseless_sets) + cc[1]; |
5852 | |
5853 | /* At least three characters are required. |
5854 | Otherwise this case would be handled by the normal code path. */ |
5855 | SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR); |
5856 | SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); |
5857 | |
5858 | /* Optimizing character pairs, if their difference is power of 2. */ |
5859 | if (is_powerof2(other_cases[1] ^ other_cases[0])) |
5860 | { |
5861 | if (charoffset == 0) |
5862 | OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); |
5863 | else |
5864 | { |
5865 | OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); |
5866 | OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); |
5867 | } |
5868 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); |
5869 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
5870 | other_cases += 2; |
5871 | } |
5872 | else if (is_powerof2(other_cases[2] ^ other_cases[1])) |
5873 | { |
5874 | if (charoffset == 0) |
5875 | OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]); |
5876 | else |
5877 | { |
5878 | OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); |
5879 | OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); |
5880 | } |
5881 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); |
5882 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
5883 | |
5884 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); |
5885 | OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5886 | |
5887 | other_cases += 3; |
5888 | } |
5889 | else |
5890 | { |
5891 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); |
5892 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
5893 | } |
5894 | |
5895 | while (*other_cases != NOTACHAR) |
5896 | { |
5897 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); |
5898 | OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5899 | } |
5900 | jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5901 | break; |
5902 | |
5903 | case PT_UCNC: |
5904 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); |
5905 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
5906 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); |
5907 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5908 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); |
5909 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5910 | |
5911 | SET_CHAR_OFFSET(0xa0); |
5912 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); |
5913 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); |
5914 | SET_CHAR_OFFSET(0); |
5915 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0); |
5916 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL); |
5917 | jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5918 | break; |
5919 | |
5920 | case PT_PXGRAPH: |
5921 | /* C and Z groups are the farthest two groups. */ |
5922 | SET_TYPE_OFFSET(ucp_Ll); |
5923 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); |
5924 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER); |
5925 | |
5926 | jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); |
5927 | |
5928 | /* In case of ucp_Cf, we overwrite the result. */ |
5929 | SET_CHAR_OFFSET(0x2066); |
5930 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); |
5931 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); |
5932 | |
5933 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); |
5934 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5935 | |
5936 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); |
5937 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5938 | |
5939 | JUMPHERE(jump); |
5940 | jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); |
5941 | break; |
5942 | |
5943 | case PT_PXPRINT: |
5944 | /* C and Z groups are the farthest two groups. */ |
5945 | SET_TYPE_OFFSET(ucp_Ll); |
5946 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); |
5947 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER); |
5948 | |
5949 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); |
5950 | OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL); |
5951 | |
5952 | jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); |
5953 | |
5954 | /* In case of ucp_Cf, we overwrite the result. */ |
5955 | SET_CHAR_OFFSET(0x2066); |
5956 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); |
5957 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); |
5958 | |
5959 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); |
5960 | OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); |
5961 | |
5962 | JUMPHERE(jump); |
5963 | jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); |
5964 | break; |
5965 | |
5966 | case PT_PXPUNCT: |
5967 | SET_TYPE_OFFSET(ucp_Sc); |
5968 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); |
5969 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); |
5970 | |
5971 | SET_CHAR_OFFSET(0); |
5972 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f); |
5973 | OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); |
5974 | |
5975 | SET_TYPE_OFFSET(ucp_Pc); |
5976 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); |
5977 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); |
5978 | jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5979 | break; |
5980 | |
5981 | default: |
5982 | SLJIT_ASSERT_STOP(); |
5983 | break; |
5984 | } |
5985 | cc += 2; |
5986 | } |
5987 | #endif |
5988 | |
5989 | if (jump != NULL) |
5990 | add_jump(compiler, compares > 0 ? list : backtracks, jump); |
5991 | } |
5992 | |
5993 | if (found != NULL) |
5994 | set_jumps(found, LABEL()); |
5995 | } |
5996 | |
5997 | #undef SET_TYPE_OFFSET |
5998 | #undef SET_CHAR_OFFSET |
5999 | |
6000 | #endif |
6001 | |
6002 | static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) |
6003 | { |
6004 | DEFINE_COMPILER; |
6005 | int length; |
6006 | struct sljit_jump *jump[4]; |
6007 | #ifdef SUPPORT_UTF |
6008 | struct sljit_label *label; |
6009 | #endif /* SUPPORT_UTF */ |
6010 | |
6011 | switch(type) |
6012 | { |
6013 | case OP_SOD: |
6014 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
6015 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
6016 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); |
6017 | return cc; |
6018 | |
6019 | case OP_SOM: |
6020 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
6021 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
6022 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); |
6023 | return cc; |
6024 | |
6025 | case OP_NOT_WORD_BOUNDARY: |
6026 | case OP_WORD_BOUNDARY: |
6027 | add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); |
6028 | add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO)); |
6029 | return cc; |
6030 | |
6031 | case OP_EODN: |
6032 | /* Requires rather complex checks. */ |
6033 | jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
6034 | if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
6035 | { |
6036 | OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
6037 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
6038 | if (common->mode == JIT_COMPILE) |
6039 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); |
6040 | else |
6041 | { |
6042 | jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); |
6043 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); |
6044 | OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS); |
6045 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); |
6046 | OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL); |
6047 | add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); |
6048 | check_partial(common, TRUE); |
6049 | add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
6050 | JUMPHERE(jump[1]); |
6051 | } |
6052 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
6053 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); |
6054 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); |
6055 | } |
6056 | else if (common->nltype == NLTYPE_FIXED) |
6057 | { |
6058 | OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6059 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
6060 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); |
6061 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); |
6062 | } |
6063 | else |
6064 | { |
6065 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
6066 | jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); |
6067 | OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
6068 | OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); |
6069 | jump[2] = JUMP(SLJIT_GREATER); |
6070 | add_jump(compiler, backtracks, JUMP(SLJIT_LESS)); |
6071 | /* Equal. */ |
6072 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
6073 | jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); |
6074 | add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
6075 | |
6076 | JUMPHERE(jump[1]); |
6077 | if (common->nltype == NLTYPE_ANYCRLF) |
6078 | { |
6079 | OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6080 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0)); |
6081 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); |
6082 | } |
6083 | else |
6084 | { |
6085 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); |
6086 | read_char_range(common, common->nlmin, common->nlmax, TRUE); |
6087 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); |
6088 | add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); |
6089 | add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); |
6090 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); |
6091 | } |
6092 | JUMPHERE(jump[2]); |
6093 | JUMPHERE(jump[3]); |
6094 | } |
6095 | JUMPHERE(jump[0]); |
6096 | check_partial(common, FALSE); |
6097 | return cc; |
6098 | |
6099 | case OP_EOD: |
6100 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); |
6101 | check_partial(common, FALSE); |
6102 | return cc; |
6103 | |
6104 | case OP_DOLL: |
6105 | OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); |
6106 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); |
6107 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6108 | |
6109 | if (!common->endonly) |
6110 | compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); |
6111 | else |
6112 | { |
6113 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); |
6114 | check_partial(common, FALSE); |
6115 | } |
6116 | return cc; |
6117 | |
6118 | case OP_DOLLM: |
6119 | jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
6120 | OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); |
6121 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); |
6122 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6123 | check_partial(common, FALSE); |
6124 | jump[0] = JUMP(SLJIT_JUMP); |
6125 | JUMPHERE(jump[1]); |
6126 | |
6127 | if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
6128 | { |
6129 | OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
6130 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
6131 | if (common->mode == JIT_COMPILE) |
6132 | add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0)); |
6133 | else |
6134 | { |
6135 | jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); |
6136 | /* STR_PTR = STR_END - IN_UCHARS(1) */ |
6137 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); |
6138 | check_partial(common, TRUE); |
6139 | add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
6140 | JUMPHERE(jump[1]); |
6141 | } |
6142 | |
6143 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
6144 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); |
6145 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); |
6146 | } |
6147 | else |
6148 | { |
6149 | peek_char(common, common->nlmax); |
6150 | check_newlinechar(common, common->nltype, backtracks, FALSE); |
6151 | } |
6152 | JUMPHERE(jump[0]); |
6153 | return cc; |
6154 | |
6155 | case OP_CIRC: |
6156 | OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); |
6157 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); |
6158 | add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); |
6159 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); |
6160 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6161 | return cc; |
6162 | |
6163 | case OP_CIRCM: |
6164 | OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); |
6165 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); |
6166 | jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0); |
6167 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); |
6168 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6169 | jump[0] = JUMP(SLJIT_JUMP); |
6170 | JUMPHERE(jump[1]); |
6171 | |
6172 | add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
6173 | if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
6174 | { |
6175 | OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
6176 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0)); |
6177 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); |
6178 | OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
6179 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); |
6180 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); |
6181 | } |
6182 | else |
6183 | { |
6184 | skip_char_back(common); |
6185 | read_char_range(common, common->nlmin, common->nlmax, TRUE); |
6186 | check_newlinechar(common, common->nltype, backtracks, FALSE); |
6187 | } |
6188 | JUMPHERE(jump[0]); |
6189 | return cc; |
6190 | |
6191 | case OP_REVERSE: |
6192 | length = GET(cc, 0); |
6193 | if (length == 0) |
6194 | return cc + LINK_SIZE; |
6195 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
6196 | #ifdef SUPPORT_UTF |
6197 | if (common->utf) |
6198 | { |
6199 | OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
6200 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); |
6201 | label = LABEL(); |
6202 | add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); |
6203 | skip_char_back(common); |
6204 | OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); |
6205 | JUMPTO(SLJIT_NOT_ZERO, label); |
6206 | } |
6207 | else |
6208 | #endif |
6209 | { |
6210 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
6211 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); |
6212 | add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0)); |
6213 | } |
6214 | check_start_used_ptr(common); |
6215 | return cc + LINK_SIZE; |
6216 | } |
6217 | SLJIT_ASSERT_STOP(); |
6218 | return cc; |
6219 | } |
6220 | |
6221 | static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr) |
6222 | { |
6223 | DEFINE_COMPILER; |
6224 | int length; |
6225 | unsigned int c, oc, bit; |
6226 | compare_context context; |
6227 | struct sljit_jump *jump[3]; |
6228 | jump_list *end_list; |
6229 | #ifdef SUPPORT_UTF |
6230 | struct sljit_label *label; |
6231 | #ifdef SUPPORT_UCP |
6232 | pcre_uchar propdata[5]; |
6233 | #endif |
6234 | #endif /* SUPPORT_UTF */ |
6235 | |
6236 | switch(type) |
6237 | { |
6238 | case OP_NOT_DIGIT: |
6239 | case OP_DIGIT: |
6240 | /* Digits are usually 0-9, so it is worth to optimize them. */ |
6241 | if (check_str_ptr) |
6242 | detect_partial_match(common, backtracks); |
6243 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
6244 | if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) |
6245 | read_char7_type(common, type == OP_NOT_DIGIT); |
6246 | else |
6247 | #endif |
6248 | read_char8_type(common, type == OP_NOT_DIGIT); |
6249 | /* Flip the starting bit in the negative case. */ |
6250 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); |
6251 | add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); |
6252 | return cc; |
6253 | |
6254 | case OP_NOT_WHITESPACE: |
6255 | case OP_WHITESPACE: |
6256 | if (check_str_ptr) |
6257 | detect_partial_match(common, backtracks); |
6258 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
6259 | if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) |
6260 | read_char7_type(common, type == OP_NOT_WHITESPACE); |
6261 | else |
6262 | #endif |
6263 | read_char8_type(common, type == OP_NOT_WHITESPACE); |
6264 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); |
6265 | add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); |
6266 | return cc; |
6267 | |
6268 | case OP_NOT_WORDCHAR: |
6269 | case OP_WORDCHAR: |
6270 | if (check_str_ptr) |
6271 | detect_partial_match(common, backtracks); |
6272 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
6273 | if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) |
6274 | read_char7_type(common, type == OP_NOT_WORDCHAR); |
6275 | else |
6276 | #endif |
6277 | read_char8_type(common, type == OP_NOT_WORDCHAR); |
6278 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); |
6279 | add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); |
6280 | return cc; |
6281 | |
6282 | case OP_ANY: |
6283 | if (check_str_ptr) |
6284 | detect_partial_match(common, backtracks); |
6285 | read_char_range(common, common->nlmin, common->nlmax, TRUE); |
6286 | if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
6287 | { |
6288 | jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); |
6289 | end_list = NULL; |
6290 | if (common->mode != JIT_PARTIAL_HARD_COMPILE) |
6291 | add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
6292 | else |
6293 | check_str_end(common, &end_list); |
6294 | |
6295 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
6296 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); |
6297 | set_jumps(end_list, LABEL()); |
6298 | JUMPHERE(jump[0]); |
6299 | } |
6300 | else |
6301 | check_newlinechar(common, common->nltype, backtracks, TRUE); |
6302 | return cc; |
6303 | |
6304 | case OP_ALLANY: |
6305 | if (check_str_ptr) |
6306 | detect_partial_match(common, backtracks); |
6307 | #ifdef SUPPORT_UTF |
6308 | if (common->utf) |
6309 | { |
6310 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
6311 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6312 | #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 |
6313 | #if defined COMPILE_PCRE8 |
6314 | jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
6315 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
6316 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
6317 | #elif defined COMPILE_PCRE16 |
6318 | jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); |
6319 | OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
6320 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
6321 | OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); |
6322 | OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
6323 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
6324 | #endif |
6325 | JUMPHERE(jump[0]); |
6326 | #endif /* COMPILE_PCRE[8|16] */ |
6327 | return cc; |
6328 | } |
6329 | #endif |
6330 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6331 | return cc; |
6332 | |
6333 | case OP_ANYBYTE: |
6334 | if (check_str_ptr) |
6335 | detect_partial_match(common, backtracks); |
6336 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6337 | return cc; |
6338 | |
6339 | #ifdef SUPPORT_UTF |
6340 | #ifdef SUPPORT_UCP |
6341 | case OP_NOTPROP: |
6342 | case OP_PROP: |
6343 | propdata[0] = XCL_HASPROP; |
6344 | propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; |
6345 | propdata[2] = cc[0]; |
6346 | propdata[3] = cc[1]; |
6347 | propdata[4] = XCL_END; |
6348 | if (check_str_ptr) |
6349 | detect_partial_match(common, backtracks); |
6350 | compile_xclass_matchingpath(common, propdata, backtracks); |
6351 | return cc + 2; |
6352 | #endif |
6353 | #endif |
6354 | |
6355 | case OP_ANYNL: |
6356 | if (check_str_ptr) |
6357 | detect_partial_match(common, backtracks); |
6358 | read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE); |
6359 | jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); |
6360 | /* We don't need to handle soft partial matching case. */ |
6361 | end_list = NULL; |
6362 | if (common->mode != JIT_PARTIAL_HARD_COMPILE) |
6363 | add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
6364 | else |
6365 | check_str_end(common, &end_list); |
6366 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
6367 | jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); |
6368 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6369 | jump[2] = JUMP(SLJIT_JUMP); |
6370 | JUMPHERE(jump[0]); |
6371 | check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); |
6372 | set_jumps(end_list, LABEL()); |
6373 | JUMPHERE(jump[1]); |
6374 | JUMPHERE(jump[2]); |
6375 | return cc; |
6376 | |
6377 | case OP_NOT_HSPACE: |
6378 | case OP_HSPACE: |
6379 | if (check_str_ptr) |
6380 | detect_partial_match(common, backtracks); |
6381 | read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE); |
6382 | add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); |
6383 | add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); |
6384 | return cc; |
6385 | |
6386 | case OP_NOT_VSPACE: |
6387 | case OP_VSPACE: |
6388 | if (check_str_ptr) |
6389 | detect_partial_match(common, backtracks); |
6390 | read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE); |
6391 | add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); |
6392 | add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); |
6393 | return cc; |
6394 | |
6395 | #ifdef SUPPORT_UCP |
6396 | case OP_EXTUNI: |
6397 | if (check_str_ptr) |
6398 | detect_partial_match(common, backtracks); |
6399 | read_char(common); |
6400 | add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); |
6401 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); |
6402 | /* Optimize register allocation: use a real register. */ |
6403 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); |
6404 | OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3); |
6405 | |
6406 | label = LABEL(); |
6407 | jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
6408 | OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); |
6409 | read_char(common); |
6410 | add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); |
6411 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); |
6412 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3); |
6413 | |
6414 | OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2); |
6415 | OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable)); |
6416 | OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0); |
6417 | OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
6418 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
6419 | JUMPTO(SLJIT_NOT_ZERO, label); |
6420 | |
6421 | OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); |
6422 | JUMPHERE(jump[0]); |
6423 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
6424 | |
6425 | if (common->mode == JIT_PARTIAL_HARD_COMPILE) |
6426 | { |
6427 | jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
6428 | /* Since we successfully read a char above, partial matching must occur. */ |
6429 | check_partial(common, TRUE); |
6430 | JUMPHERE(jump[0]); |
6431 | } |
6432 | return cc; |
6433 | #endif |
6434 | |
6435 | case OP_CHAR: |
6436 | case OP_CHARI: |
6437 | length = 1; |
6438 | #ifdef SUPPORT_UTF |
6439 | if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); |
6440 | #endif |
6441 | if (common->mode == JIT_COMPILE && check_str_ptr |
6442 | && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)) |
6443 | { |
6444 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); |
6445 | add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); |
6446 | |
6447 | context.length = IN_UCHARS(length); |
6448 | context.sourcereg = -1; |
6449 | #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
6450 | context.ucharptr = 0; |
6451 | #endif |
6452 | return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); |
6453 | } |
6454 | |
6455 | if (check_str_ptr) |
6456 | detect_partial_match(common, backtracks); |
6457 | #ifdef SUPPORT_UTF |
6458 | if (common->utf) |
6459 | { |
6460 | GETCHAR(c, cc); |
6461 | } |
6462 | else |
6463 | #endif |
6464 | c = *cc; |
6465 | |
6466 | if (type == OP_CHAR || !char_has_othercase(common, cc)) |
6467 | { |
6468 | read_char_range(common, c, c, FALSE); |
6469 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); |
6470 | return cc + length; |
6471 | } |
6472 | oc = char_othercase(common, c); |
6473 | read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE); |
6474 | bit = c ^ oc; |
6475 | if (is_powerof2(bit)) |
6476 | { |
6477 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); |
6478 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); |
6479 | return cc + length; |
6480 | } |
6481 | jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c); |
6482 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); |
6483 | JUMPHERE(jump[0]); |
6484 | return cc + length; |
6485 | |
6486 | case OP_NOT: |
6487 | case OP_NOTI: |
6488 | if (check_str_ptr) |
6489 | detect_partial_match(common, backtracks); |
6490 | length = 1; |
6491 | #ifdef SUPPORT_UTF |
6492 | if (common->utf) |
6493 | { |
6494 | #ifdef COMPILE_PCRE8 |
6495 | c = *cc; |
6496 | if (c < 128) |
6497 | { |
6498 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
6499 | if (type == OP_NOT || !char_has_othercase(common, cc)) |
6500 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); |
6501 | else |
6502 | { |
6503 | /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ |
6504 | OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); |
6505 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); |
6506 | } |
6507 | /* Skip the variable-length character. */ |
6508 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6509 | jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
6510 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
6511 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
6512 | JUMPHERE(jump[0]); |
6513 | return cc + 1; |
6514 | } |
6515 | else |
6516 | #endif /* COMPILE_PCRE8 */ |
6517 | { |
6518 | GETCHARLEN(c, cc, length); |
6519 | } |
6520 | } |
6521 | else |
6522 | #endif /* SUPPORT_UTF */ |
6523 | c = *cc; |
6524 | |
6525 | if (type == OP_NOT || !char_has_othercase(common, cc)) |
6526 | { |
6527 | read_char_range(common, c, c, TRUE); |
6528 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); |
6529 | } |
6530 | else |
6531 | { |
6532 | oc = char_othercase(common, c); |
6533 | read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE); |
6534 | bit = c ^ oc; |
6535 | if (is_powerof2(bit)) |
6536 | { |
6537 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); |
6538 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); |
6539 | } |
6540 | else |
6541 | { |
6542 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); |
6543 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); |
6544 | } |
6545 | } |
6546 | return cc + length; |
6547 | |
6548 | case OP_CLASS: |
6549 | case OP_NCLASS: |
6550 | if (check_str_ptr) |
6551 | detect_partial_match(common, backtracks); |
6552 | |
6553 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
6554 | bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255; |
6555 | read_char_range(common, 0, bit, type == OP_NCLASS); |
6556 | #else |
6557 | read_char_range(common, 0, 255, type == OP_NCLASS); |
6558 | #endif |
6559 | |
6560 | if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks)) |
6561 | return cc + 32 / sizeof(pcre_uchar); |
6562 | |
6563 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
6564 | jump[0] = NULL; |
6565 | if (common->utf) |
6566 | { |
6567 | jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit); |
6568 | if (type == OP_CLASS) |
6569 | { |
6570 | add_jump(compiler, backtracks, jump[0]); |
6571 | jump[0] = NULL; |
6572 | } |
6573 | } |
6574 | #elif !defined COMPILE_PCRE8 |
6575 | jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
6576 | if (type == OP_CLASS) |
6577 | { |
6578 | add_jump(compiler, backtracks, jump[0]); |
6579 | jump[0] = NULL; |
6580 | } |
6581 | #endif /* SUPPORT_UTF && COMPILE_PCRE8 */ |
6582 | |
6583 | OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
6584 | OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
6585 | OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
6586 | OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
6587 | OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
6588 | add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); |
6589 | |
6590 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
6591 | if (jump[0] != NULL) |
6592 | JUMPHERE(jump[0]); |
6593 | #endif |
6594 | return cc + 32 / sizeof(pcre_uchar); |
6595 | |
6596 | #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
6597 | case OP_XCLASS: |
6598 | if (check_str_ptr) |
6599 | detect_partial_match(common, backtracks); |
6600 | compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); |
6601 | return cc + GET(cc, 0) - 1; |
6602 | #endif |
6603 | } |
6604 | SLJIT_ASSERT_STOP(); |
6605 | return cc; |
6606 | } |
6607 | |
6608 | static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks) |
6609 | { |
6610 | /* This function consumes at least one input character. */ |
6611 | /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ |
6612 | DEFINE_COMPILER; |
6613 | pcre_uchar *ccbegin = cc; |
6614 | compare_context context; |
6615 | int size; |
6616 | |
6617 | context.length = 0; |
6618 | do |
6619 | { |
6620 | if (cc >= ccend) |
6621 | break; |
6622 | |
6623 | if (*cc == OP_CHAR) |
6624 | { |
6625 | size = 1; |
6626 | #ifdef SUPPORT_UTF |
6627 | if (common->utf && HAS_EXTRALEN(cc[1])) |
6628 | size += GET_EXTRALEN(cc[1]); |
6629 | #endif |
6630 | } |
6631 | else if (*cc == OP_CHARI) |
6632 | { |
6633 | size = 1; |
6634 | #ifdef SUPPORT_UTF |
6635 | if (common->utf) |
6636 | { |
6637 | if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) |
6638 | size = 0; |
6639 | else if (HAS_EXTRALEN(cc[1])) |
6640 | size += GET_EXTRALEN(cc[1]); |
6641 | } |
6642 | else |
6643 | #endif |
6644 | if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) |
6645 | size = 0; |
6646 | } |
6647 | else |
6648 | size = 0; |
6649 | |
6650 | cc += 1 + size; |
6651 | context.length += IN_UCHARS(size); |
6652 | } |
6653 | while (size > 0 && context.length <= 128); |
6654 | |
6655 | cc = ccbegin; |
6656 | if (context.length > 0) |
6657 | { |
6658 | /* We have a fixed-length byte sequence. */ |
6659 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); |
6660 | add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); |
6661 | |
6662 | context.sourcereg = -1; |
6663 | #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
6664 | context.ucharptr = 0; |
6665 | #endif |
6666 | do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); |
6667 | return cc; |
6668 | } |
6669 | |
6670 | /* A non-fixed length character will be checked if length == 0. */ |
6671 | return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE); |
6672 | } |
6673 | |
6674 | /* Forward definitions. */ |
6675 | static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *); |
6676 | static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); |
6677 | |
6678 | #define PUSH_BACKTRACK(size, ccstart, error) \ |
6679 | do \ |
6680 | { \ |
6681 | backtrack = sljit_alloc_memory(compiler, (size)); \ |
6682 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ |
6683 | return error; \ |
6684 | memset(backtrack, 0, size); \ |
6685 | backtrack->prev = parent->top; \ |
6686 | backtrack->cc = (ccstart); \ |
6687 | parent->top = backtrack; \ |
6688 | } \ |
6689 | while (0) |
6690 | |
6691 | #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \ |
6692 | do \ |
6693 | { \ |
6694 | backtrack = sljit_alloc_memory(compiler, (size)); \ |
6695 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ |
6696 | return; \ |
6697 | memset(backtrack, 0, size); \ |
6698 | backtrack->prev = parent->top; \ |
6699 | backtrack->cc = (ccstart); \ |
6700 | parent->top = backtrack; \ |
6701 | } \ |
6702 | while (0) |
6703 | |
6704 | #define BACKTRACK_AS(type) ((type *)backtrack) |
6705 | |
6706 | static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) |
6707 | { |
6708 | /* The OVECTOR offset goes to TMP2. */ |
6709 | DEFINE_COMPILER; |
6710 | int count = GET2(cc, 1 + IMM2_SIZE); |
6711 | pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size; |
6712 | unsigned int offset; |
6713 | jump_list *found = NULL; |
6714 | |
6715 | SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI); |
6716 | |
6717 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); |
6718 | |
6719 | count--; |
6720 | while (count-- > 0) |
6721 | { |
6722 | offset = GET2(slot, 0) << 1; |
6723 | GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); |
6724 | add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); |
6725 | slot += common->name_entry_size; |
6726 | } |
6727 | |
6728 | offset = GET2(slot, 0) << 1; |
6729 | GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); |
6730 | if (backtracks != NULL && !common->jscript_compat) |
6731 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); |
6732 | |
6733 | set_jumps(found, LABEL()); |
6734 | } |
6735 | |
6736 | static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) |
6737 | { |
6738 | DEFINE_COMPILER; |
6739 | BOOL ref = (*cc == OP_REF || *cc == OP_REFI); |
6740 | int offset = 0; |
6741 | struct sljit_jump *jump = NULL; |
6742 | struct sljit_jump *partial; |
6743 | struct sljit_jump *nopartial; |
6744 | |
6745 | if (ref) |
6746 | { |
6747 | offset = GET2(cc, 1) << 1; |
6748 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
6749 | /* OVECTOR(1) contains the "string begin - 1" constant. */ |
6750 | if (withchecks && !common->jscript_compat) |
6751 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); |
6752 | } |
6753 | else |
6754 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
6755 | |
6756 | #if defined SUPPORT_UTF && defined SUPPORT_UCP |
6757 | if (common->utf && *cc == OP_REFI) |
6758 | { |
6759 | SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2); |
6760 | if (ref) |
6761 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
6762 | else |
6763 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
6764 | |
6765 | if (withchecks) |
6766 | jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0); |
6767 | |
6768 | /* Needed to save important temporary registers. */ |
6769 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); |
6770 | OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); |
6771 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0); |
6772 | sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); |
6773 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
6774 | if (common->mode == JIT_COMPILE) |
6775 | add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1)); |
6776 | else |
6777 | { |
6778 | add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); |
6779 | nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); |
6780 | check_partial(common, FALSE); |
6781 | add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
6782 | JUMPHERE(nopartial); |
6783 | } |
6784 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); |
6785 | } |
6786 | else |
6787 | #endif /* SUPPORT_UTF && SUPPORT_UCP */ |
6788 | { |
6789 | if (ref) |
6790 | OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); |
6791 | else |
6792 | OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); |
6793 | |
6794 | if (withchecks) |
6795 | jump = JUMP(SLJIT_ZERO); |
6796 | |
6797 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
6798 | partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0); |
6799 | if (common->mode == JIT_COMPILE) |
6800 | add_jump(compiler, backtracks, partial); |
6801 | |
6802 | add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); |
6803 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6804 | |
6805 | if (common->mode != JIT_COMPILE) |
6806 | { |
6807 | nopartial = JUMP(SLJIT_JUMP); |
6808 | JUMPHERE(partial); |
6809 | /* TMP2 -= STR_END - STR_PTR */ |
6810 | OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0); |
6811 | OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); |
6812 | partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0); |
6813 | OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); |
6814 | add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); |
6815 | add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6816 | JUMPHERE(partial); |
6817 | check_partial(common, FALSE); |
6818 | add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
6819 | JUMPHERE(nopartial); |
6820 | } |
6821 | } |
6822 | |
6823 | if (jump != NULL) |
6824 | { |
6825 | if (emptyfail) |
6826 | add_jump(compiler, backtracks, jump); |
6827 | else |
6828 | JUMPHERE(jump); |
6829 | } |
6830 | } |
6831 | |
6832 | static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
6833 | { |
6834 | DEFINE_COMPILER; |
6835 | BOOL ref = (*cc == OP_REF || *cc == OP_REFI); |
6836 | backtrack_common *backtrack; |
6837 | pcre_uchar type; |
6838 | int offset = 0; |
6839 | struct sljit_label *label; |
6840 | struct sljit_jump *zerolength; |
6841 | struct sljit_jump *jump = NULL; |
6842 | pcre_uchar *ccbegin = cc; |
6843 | int min = 0, max = 0; |
6844 | BOOL minimize; |
6845 | |
6846 | PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL); |
6847 | |
6848 | if (ref) |
6849 | offset = GET2(cc, 1) << 1; |
6850 | else |
6851 | cc += IMM2_SIZE; |
6852 | type = cc[1 + IMM2_SIZE]; |
6853 | |
6854 | SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); |
6855 | minimize = (type & 0x1) != 0; |
6856 | switch(type) |
6857 | { |
6858 | case OP_CRSTAR: |
6859 | case OP_CRMINSTAR: |
6860 | min = 0; |
6861 | max = 0; |
6862 | cc += 1 + IMM2_SIZE + 1; |
6863 | break; |
6864 | case OP_CRPLUS: |
6865 | case OP_CRMINPLUS: |
6866 | min = 1; |
6867 | max = 0; |
6868 | cc += 1 + IMM2_SIZE + 1; |
6869 | break; |
6870 | case OP_CRQUERY: |
6871 | case OP_CRMINQUERY: |
6872 | min = 0; |
6873 | max = 1; |
6874 | cc += 1 + IMM2_SIZE + 1; |
6875 | break; |
6876 | case OP_CRRANGE: |
6877 | case OP_CRMINRANGE: |
6878 | min = GET2(cc, 1 + IMM2_SIZE + 1); |
6879 | max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE); |
6880 | cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE; |
6881 | break; |
6882 | default: |
6883 | SLJIT_ASSERT_STOP(); |
6884 | break; |
6885 | } |
6886 | |
6887 | if (!minimize) |
6888 | { |
6889 | if (min == 0) |
6890 | { |
6891 | allocate_stack(common, 2); |
6892 | if (ref) |
6893 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
6894 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
6895 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); |
6896 | /* Temporary release of STR_PTR. */ |
6897 | OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
6898 | /* Handles both invalid and empty cases. Since the minimum repeat, |
6899 | is zero the invalid case is basically the same as an empty case. */ |
6900 | if (ref) |
6901 | zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
6902 | else |
6903 | { |
6904 | compile_dnref_search(common, ccbegin, NULL); |
6905 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
6906 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); |
6907 | zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
6908 | } |
6909 | /* Restore if not zero length. */ |
6910 | OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
6911 | } |
6912 | else |
6913 | { |
6914 | allocate_stack(common, 1); |
6915 | if (ref) |
6916 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
6917 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
6918 | if (ref) |
6919 | { |
6920 | add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); |
6921 | zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
6922 | } |
6923 | else |
6924 | { |
6925 | compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); |
6926 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
6927 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); |
6928 | zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
6929 | } |
6930 | } |
6931 | |
6932 | if (min > 1 || max > 1) |
6933 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); |
6934 | |
6935 | label = LABEL(); |
6936 | if (!ref) |
6937 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); |
6938 | compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); |
6939 | |
6940 | if (min > 1 || max > 1) |
6941 | { |
6942 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); |
6943 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
6944 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); |
6945 | if (min > 1) |
6946 | CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label); |
6947 | if (max > 1) |
6948 | { |
6949 | jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); |
6950 | allocate_stack(common, 1); |
6951 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
6952 | JUMPTO(SLJIT_JUMP, label); |
6953 | JUMPHERE(jump); |
6954 | } |
6955 | } |
6956 | |
6957 | if (max == 0) |
6958 | { |
6959 | /* Includes min > 1 case as well. */ |
6960 | allocate_stack(common, 1); |
6961 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
6962 | JUMPTO(SLJIT_JUMP, label); |
6963 | } |
6964 | |
6965 | JUMPHERE(zerolength); |
6966 | BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); |
6967 | |
6968 | count_match(common); |
6969 | return cc; |
6970 | } |
6971 | |
6972 | allocate_stack(common, ref ? 2 : 3); |
6973 | if (ref) |
6974 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
6975 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
6976 | if (type != OP_CRMINSTAR) |
6977 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); |
6978 | |
6979 | if (min == 0) |
6980 | { |
6981 | /* Handles both invalid and empty cases. Since the minimum repeat, |
6982 | is zero the invalid case is basically the same as an empty case. */ |
6983 | if (ref) |
6984 | zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
6985 | else |
6986 | { |
6987 | compile_dnref_search(common, ccbegin, NULL); |
6988 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
6989 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); |
6990 | zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
6991 | } |
6992 | /* Length is non-zero, we can match real repeats. */ |
6993 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
6994 | jump = JUMP(SLJIT_JUMP); |
6995 | } |
6996 | else |
6997 | { |
6998 | if (ref) |
6999 | { |
7000 | add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); |
7001 | zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
7002 | } |
7003 | else |
7004 | { |
7005 | compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); |
7006 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
7007 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); |
7008 | zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
7009 | } |
7010 | } |
7011 | |
7012 | BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); |
7013 | if (max > 0) |
7014 | add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); |
7015 | |
7016 | if (!ref) |
7017 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); |
7018 | compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); |
7019 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7020 | |
7021 | if (min > 1) |
7022 | { |
7023 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
7024 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
7025 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); |
7026 | CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath); |
7027 | } |
7028 | else if (max > 0) |
7029 | OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); |
7030 | |
7031 | if (jump != NULL) |
7032 | JUMPHERE(jump); |
7033 | JUMPHERE(zerolength); |
7034 | |
7035 | count_match(common); |
7036 | return cc; |
7037 | } |
7038 | |
7039 | static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
7040 | { |
7041 | DEFINE_COMPILER; |
7042 | backtrack_common *backtrack; |
7043 | recurse_entry *entry = common->entries; |
7044 | recurse_entry *prev = NULL; |
7045 | sljit_sw start = GET(cc, 1); |
7046 | pcre_uchar *start_cc; |
7047 | BOOL needs_control_head; |
7048 | |
7049 | PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL); |
7050 | |
7051 | /* Inlining simple patterns. */ |
7052 | if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack) |
7053 | { |
7054 | start_cc = common->start + start; |
7055 | compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack); |
7056 | BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE; |
7057 | return cc + 1 + LINK_SIZE; |
7058 | } |
7059 | |
7060 | while (entry != NULL) |
7061 | { |
7062 | if (entry->start == start) |
7063 | break; |
7064 | prev = entry; |
7065 | entry = entry->next; |
7066 | } |
7067 | |
7068 | if (entry == NULL) |
7069 | { |
7070 | entry = sljit_alloc_memory(compiler, sizeof(recurse_entry)); |
7071 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
7072 | return NULL; |
7073 | entry->next = NULL; |
7074 | entry->entry = NULL; |
7075 | entry->calls = NULL; |
7076 | entry->start = start; |
7077 | |
7078 | if (prev != NULL) |
7079 | prev->next = entry; |
7080 | else |
7081 | common->entries = entry; |
7082 | } |
7083 | |
7084 | if (common->has_set_som && common->mark_ptr != 0) |
7085 | { |
7086 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
7087 | allocate_stack(common, 2); |
7088 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); |
7089 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
7090 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); |
7091 | } |
7092 | else if (common->has_set_som || common->mark_ptr != 0) |
7093 | { |
7094 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr); |
7095 | allocate_stack(common, 1); |
7096 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
7097 | } |
7098 | |
7099 | if (entry->entry == NULL) |
7100 | add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL)); |
7101 | else |
7102 | JUMPTO(SLJIT_FAST_CALL, entry->entry); |
7103 | /* Leave if the match is failed. */ |
7104 | add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); |
7105 | return cc + 1 + LINK_SIZE; |
7106 | } |
7107 | |
7108 | static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector) |
7109 | { |
7110 | const pcre_uchar *begin = arguments->begin; |
7111 | int *offset_vector = arguments->offsets; |
7112 | int offset_count = arguments->offset_count; |
7113 | int i; |
7114 | |
7115 | if (PUBL(callout) == NULL) |
7116 | return 0; |
7117 | |
7118 | callout_block->version = 2; |
7119 | callout_block->callout_data = arguments->callout_data; |
7120 | |
7121 | /* Offsets in subject. */ |
7122 | callout_block->subject_length = arguments->end - arguments->begin; |
7123 | callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin; |
7124 | callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin; |
7125 | #if defined COMPILE_PCRE8 |
7126 | callout_block->subject = (PCRE_SPTR)begin; |
7127 | #elif defined COMPILE_PCRE16 |
7128 | callout_block->subject = (PCRE_SPTR16)begin; |
7129 | #elif defined COMPILE_PCRE32 |
7130 | callout_block->subject = (PCRE_SPTR32)begin; |
7131 | #endif |
7132 | |
7133 | /* Convert and copy the JIT offset vector to the offset_vector array. */ |
7134 | callout_block->capture_top = 0; |
7135 | callout_block->offset_vector = offset_vector; |
7136 | for (i = 2; i < offset_count; i += 2) |
7137 | { |
7138 | offset_vector[i] = jit_ovector[i] - begin; |
7139 | offset_vector[i + 1] = jit_ovector[i + 1] - begin; |
7140 | if (jit_ovector[i] >= begin) |
7141 | callout_block->capture_top = i; |
7142 | } |
7143 | |
7144 | callout_block->capture_top = (callout_block->capture_top >> 1) + 1; |
7145 | if (offset_count > 0) |
7146 | offset_vector[0] = -1; |
7147 | if (offset_count > 1) |
7148 | offset_vector[1] = -1; |
7149 | return (*PUBL(callout))(callout_block); |
7150 | } |
7151 | |
7152 | /* Aligning to 8 byte. */ |
7153 | #define CALLOUT_ARG_SIZE \ |
7154 | (((int)sizeof(PUBL(callout_block)) + 7) & ~7) |
7155 | |
7156 | #define CALLOUT_ARG_OFFSET(arg) \ |
7157 | (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg)) |
7158 | |
7159 | static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
7160 | { |
7161 | DEFINE_COMPILER; |
7162 | backtrack_common *backtrack; |
7163 | |
7164 | PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); |
7165 | |
7166 | allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); |
7167 | |
7168 | SLJIT_ASSERT(common->capture_last_ptr != 0); |
7169 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); |
7170 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
7171 | OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]); |
7172 | OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); |
7173 | |
7174 | /* These pointer sized fields temporarly stores internal variables. */ |
7175 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
7176 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0); |
7177 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0); |
7178 | |
7179 | if (common->mark_ptr != 0) |
7180 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); |
7181 | OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2)); |
7182 | OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE)); |
7183 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0); |
7184 | |
7185 | /* Needed to save important temporary registers. */ |
7186 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); |
7187 | OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE); |
7188 | GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); |
7189 | sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); |
7190 | OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0); |
7191 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
7192 | free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); |
7193 | |
7194 | /* Check return value. */ |
7195 | OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); |
7196 | add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER)); |
7197 | if (common->forced_quit_label == NULL) |
7198 | add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS)); |
7199 | else |
7200 | JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label); |
7201 | return cc + 2 + 2 * LINK_SIZE; |
7202 | } |
7203 | |
7204 | #undef CALLOUT_ARG_SIZE |
7205 | #undef CALLOUT_ARG_OFFSET |
7206 | |
7207 | static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc) |
7208 | { |
7209 | while (TRUE) |
7210 | { |
7211 | switch (*cc) |
7212 | { |
7213 | case OP_NOT_WORD_BOUNDARY: |
7214 | case OP_WORD_BOUNDARY: |
7215 | case OP_CIRC: |
7216 | case OP_CIRCM: |
7217 | case OP_DOLL: |
7218 | case OP_DOLLM: |
7219 | case OP_CALLOUT: |
7220 | case OP_ALT: |
7221 | cc += PRIV(OP_lengths)[*cc]; |
7222 | break; |
7223 | |
7224 | case OP_KET: |
7225 | return FALSE; |
7226 | |
7227 | default: |
7228 | return TRUE; |
7229 | } |
7230 | } |
7231 | } |
7232 | |
7233 | static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional) |
7234 | { |
7235 | DEFINE_COMPILER; |
7236 | int framesize; |
7237 | int extrasize; |
7238 | BOOL needs_control_head; |
7239 | int private_data_ptr; |
7240 | backtrack_common altbacktrack; |
7241 | pcre_uchar *ccbegin; |
7242 | pcre_uchar opcode; |
7243 | pcre_uchar bra = OP_BRA; |
7244 | jump_list *tmp = NULL; |
7245 | jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks; |
7246 | jump_list **found; |
7247 | /* Saving previous accept variables. */ |
7248 | BOOL save_local_exit = common->local_exit; |
7249 | BOOL save_positive_assert = common->positive_assert; |
7250 | then_trap_backtrack *save_then_trap = common->then_trap; |
7251 | struct sljit_label *save_quit_label = common->quit_label; |
7252 | struct sljit_label *save_accept_label = common->accept_label; |
7253 | jump_list *save_quit = common->quit; |
7254 | jump_list *save_positive_assert_quit = common->positive_assert_quit; |
7255 | jump_list *save_accept = common->accept; |
7256 | struct sljit_jump *jump; |
7257 | struct sljit_jump *brajump = NULL; |
7258 | |
7259 | /* Assert captures then. */ |
7260 | common->then_trap = NULL; |
7261 | |
7262 | if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) |
7263 | { |
7264 | SLJIT_ASSERT(!conditional); |
7265 | bra = *cc; |
7266 | cc++; |
7267 | } |
7268 | private_data_ptr = PRIVATE_DATA(cc); |
7269 | SLJIT_ASSERT(private_data_ptr != 0); |
7270 | framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); |
7271 | backtrack->framesize = framesize; |
7272 | backtrack->private_data_ptr = private_data_ptr; |
7273 | opcode = *cc; |
7274 | SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT); |
7275 | found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target; |
7276 | ccbegin = cc; |
7277 | cc += GET(cc, 1); |
7278 | |
7279 | if (bra == OP_BRAMINZERO) |
7280 | { |
7281 | /* This is a braminzero backtrack path. */ |
7282 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7283 | free_stack(common, 1); |
7284 | brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
7285 | } |
7286 | |
7287 | if (framesize < 0) |
7288 | { |
7289 | extrasize = 1; |
7290 | if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE)) |
7291 | extrasize = 0; |
7292 | |
7293 | if (needs_control_head) |
7294 | extrasize++; |
7295 | |
7296 | if (framesize == no_frame) |
7297 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); |
7298 | |
7299 | if (extrasize > 0) |
7300 | allocate_stack(common, extrasize); |
7301 | |
7302 | if (needs_control_head) |
7303 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
7304 | |
7305 | if (extrasize > 0) |
7306 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7307 | |
7308 | if (needs_control_head) |
7309 | { |
7310 | SLJIT_ASSERT(extrasize == 2); |
7311 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); |
7312 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); |
7313 | } |
7314 | } |
7315 | else |
7316 | { |
7317 | extrasize = needs_control_head ? 3 : 2; |
7318 | allocate_stack(common, framesize + extrasize); |
7319 | |
7320 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7321 | OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); |
7322 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); |
7323 | if (needs_control_head) |
7324 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
7325 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7326 | |
7327 | if (needs_control_head) |
7328 | { |
7329 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); |
7330 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); |
7331 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); |
7332 | } |
7333 | else |
7334 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); |
7335 | |
7336 | init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE); |
7337 | } |
7338 | |
7339 | memset(&altbacktrack, 0, sizeof(backtrack_common)); |
7340 | if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7341 | { |
7342 | /* Negative assert is stronger than positive assert. */ |
7343 | common->local_exit = TRUE; |
7344 | common->quit_label = NULL; |
7345 | common->quit = NULL; |
7346 | common->positive_assert = FALSE; |
7347 | } |
7348 | else |
7349 | common->positive_assert = TRUE; |
7350 | common->positive_assert_quit = NULL; |
7351 | |
7352 | while (1) |
7353 | { |
7354 | common->accept_label = NULL; |
7355 | common->accept = NULL; |
7356 | altbacktrack.top = NULL; |
7357 | altbacktrack.topbacktracks = NULL; |
7358 | |
7359 | if (*ccbegin == OP_ALT && extrasize > 0) |
7360 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7361 | |
7362 | altbacktrack.cc = ccbegin; |
7363 | compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack); |
7364 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
7365 | { |
7366 | if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7367 | { |
7368 | common->local_exit = save_local_exit; |
7369 | common->quit_label = save_quit_label; |
7370 | common->quit = save_quit; |
7371 | } |
7372 | common->positive_assert = save_positive_assert; |
7373 | common->then_trap = save_then_trap; |
7374 | common->accept_label = save_accept_label; |
7375 | common->positive_assert_quit = save_positive_assert_quit; |
7376 | common->accept = save_accept; |
7377 | return NULL; |
7378 | } |
7379 | common->accept_label = LABEL(); |
7380 | if (common->accept != NULL) |
7381 | set_jumps(common->accept, common->accept_label); |
7382 | |
7383 | /* Reset stack. */ |
7384 | if (framesize < 0) |
7385 | { |
7386 | if (framesize == no_frame) |
7387 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7388 | else if (extrasize > 0) |
7389 | free_stack(common, extrasize); |
7390 | |
7391 | if (needs_control_head) |
7392 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); |
7393 | } |
7394 | else |
7395 | { |
7396 | if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional) |
7397 | { |
7398 | /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ |
7399 | OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); |
7400 | if (needs_control_head) |
7401 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); |
7402 | } |
7403 | else |
7404 | { |
7405 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7406 | if (needs_control_head) |
7407 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw)); |
7408 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
7409 | } |
7410 | } |
7411 | |
7412 | if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7413 | { |
7414 | /* We know that STR_PTR was stored on the top of the stack. */ |
7415 | if (conditional) |
7416 | { |
7417 | if (extrasize > 0) |
7418 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0); |
7419 | } |
7420 | else if (bra == OP_BRAZERO) |
7421 | { |
7422 | if (framesize < 0) |
7423 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw)); |
7424 | else |
7425 | { |
7426 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); |
7427 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw)); |
7428 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); |
7429 | } |
7430 | OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
7431 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7432 | } |
7433 | else if (framesize >= 0) |
7434 | { |
7435 | /* For OP_BRA and OP_BRAMINZERO. */ |
7436 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); |
7437 | } |
7438 | } |
7439 | add_jump(compiler, found, JUMP(SLJIT_JUMP)); |
7440 | |
7441 | compile_backtrackingpath(common, altbacktrack.top); |
7442 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
7443 | { |
7444 | if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7445 | { |
7446 | common->local_exit = save_local_exit; |
7447 | common->quit_label = save_quit_label; |
7448 | common->quit = save_quit; |
7449 | } |
7450 | common->positive_assert = save_positive_assert; |
7451 | common->then_trap = save_then_trap; |
7452 | common->accept_label = save_accept_label; |
7453 | common->positive_assert_quit = save_positive_assert_quit; |
7454 | common->accept = save_accept; |
7455 | return NULL; |
7456 | } |
7457 | set_jumps(altbacktrack.topbacktracks, LABEL()); |
7458 | |
7459 | if (*cc != OP_ALT) |
7460 | break; |
7461 | |
7462 | ccbegin = cc; |
7463 | cc += GET(cc, 1); |
7464 | } |
7465 | |
7466 | if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7467 | { |
7468 | SLJIT_ASSERT(common->positive_assert_quit == NULL); |
7469 | /* Makes the check less complicated below. */ |
7470 | common->positive_assert_quit = common->quit; |
7471 | } |
7472 | |
7473 | /* None of them matched. */ |
7474 | if (common->positive_assert_quit != NULL) |
7475 | { |
7476 | jump = JUMP(SLJIT_JUMP); |
7477 | set_jumps(common->positive_assert_quit, LABEL()); |
7478 | SLJIT_ASSERT(framesize != no_stack); |
7479 | if (framesize < 0) |
7480 | OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw)); |
7481 | else |
7482 | { |
7483 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7484 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
7485 | OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); |
7486 | } |
7487 | JUMPHERE(jump); |
7488 | } |
7489 | |
7490 | if (needs_control_head) |
7491 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1)); |
7492 | |
7493 | if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) |
7494 | { |
7495 | /* Assert is failed. */ |
7496 | if ((conditional && extrasize > 0) || bra == OP_BRAZERO) |
7497 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7498 | |
7499 | if (framesize < 0) |
7500 | { |
7501 | /* The topmost item should be 0. */ |
7502 | if (bra == OP_BRAZERO) |
7503 | { |
7504 | if (extrasize == 2) |
7505 | free_stack(common, 1); |
7506 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7507 | } |
7508 | else if (extrasize > 0) |
7509 | free_stack(common, extrasize); |
7510 | } |
7511 | else |
7512 | { |
7513 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); |
7514 | /* The topmost item should be 0. */ |
7515 | if (bra == OP_BRAZERO) |
7516 | { |
7517 | free_stack(common, framesize + extrasize - 1); |
7518 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7519 | } |
7520 | else |
7521 | free_stack(common, framesize + extrasize); |
7522 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); |
7523 | } |
7524 | jump = JUMP(SLJIT_JUMP); |
7525 | if (bra != OP_BRAZERO) |
7526 | add_jump(compiler, target, jump); |
7527 | |
7528 | /* Assert is successful. */ |
7529 | set_jumps(tmp, LABEL()); |
7530 | if (framesize < 0) |
7531 | { |
7532 | /* We know that STR_PTR was stored on the top of the stack. */ |
7533 | if (extrasize > 0) |
7534 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw)); |
7535 | |
7536 | /* Keep the STR_PTR on the top of the stack. */ |
7537 | if (bra == OP_BRAZERO) |
7538 | { |
7539 | OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
7540 | if (extrasize == 2) |
7541 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7542 | } |
7543 | else if (bra == OP_BRAMINZERO) |
7544 | { |
7545 | OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
7546 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7547 | } |
7548 | } |
7549 | else |
7550 | { |
7551 | if (bra == OP_BRA) |
7552 | { |
7553 | /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ |
7554 | OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); |
7555 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw)); |
7556 | } |
7557 | else |
7558 | { |
7559 | /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ |
7560 | OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); |
7561 | if (extrasize == 2) |
7562 | { |
7563 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7564 | if (bra == OP_BRAMINZERO) |
7565 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7566 | } |
7567 | else |
7568 | { |
7569 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); |
7570 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); |
7571 | } |
7572 | } |
7573 | } |
7574 | |
7575 | if (bra == OP_BRAZERO) |
7576 | { |
7577 | backtrack->matchingpath = LABEL(); |
7578 | SET_LABEL(jump, backtrack->matchingpath); |
7579 | } |
7580 | else if (bra == OP_BRAMINZERO) |
7581 | { |
7582 | JUMPTO(SLJIT_JUMP, backtrack->matchingpath); |
7583 | JUMPHERE(brajump); |
7584 | if (framesize >= 0) |
7585 | { |
7586 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7587 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
7588 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); |
7589 | } |
7590 | set_jumps(backtrack->common.topbacktracks, LABEL()); |
7591 | } |
7592 | } |
7593 | else |
7594 | { |
7595 | /* AssertNot is successful. */ |
7596 | if (framesize < 0) |
7597 | { |
7598 | if (extrasize > 0) |
7599 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7600 | |
7601 | if (bra != OP_BRA) |
7602 | { |
7603 | if (extrasize == 2) |
7604 | free_stack(common, 1); |
7605 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7606 | } |
7607 | else if (extrasize > 0) |
7608 | free_stack(common, extrasize); |
7609 | } |
7610 | else |
7611 | { |
7612 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7613 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); |
7614 | /* The topmost item should be 0. */ |
7615 | if (bra != OP_BRA) |
7616 | { |
7617 | free_stack(common, framesize + extrasize - 1); |
7618 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7619 | } |
7620 | else |
7621 | free_stack(common, framesize + extrasize); |
7622 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); |
7623 | } |
7624 | |
7625 | if (bra == OP_BRAZERO) |
7626 | backtrack->matchingpath = LABEL(); |
7627 | else if (bra == OP_BRAMINZERO) |
7628 | { |
7629 | JUMPTO(SLJIT_JUMP, backtrack->matchingpath); |
7630 | JUMPHERE(brajump); |
7631 | } |
7632 | |
7633 | if (bra != OP_BRA) |
7634 | { |
7635 | SLJIT_ASSERT(found == &backtrack->common.topbacktracks); |
7636 | set_jumps(backtrack->common.topbacktracks, LABEL()); |
7637 | backtrack->common.topbacktracks = NULL; |
7638 | } |
7639 | } |
7640 | |
7641 | if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7642 | { |
7643 | common->local_exit = save_local_exit; |
7644 | common->quit_label = save_quit_label; |
7645 | common->quit = save_quit; |
7646 | } |
7647 | common->positive_assert = save_positive_assert; |
7648 | common->then_trap = save_then_trap; |
7649 | common->accept_label = save_accept_label; |
7650 | common->positive_assert_quit = save_positive_assert_quit; |
7651 | common->accept = save_accept; |
7652 | return cc + 1 + LINK_SIZE; |
7653 | } |
7654 | |
7655 | static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head) |
7656 | { |
7657 | DEFINE_COMPILER; |
7658 | int stacksize; |
7659 | |
7660 | if (framesize < 0) |
7661 | { |
7662 | if (framesize == no_frame) |
7663 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7664 | else |
7665 | { |
7666 | stacksize = needs_control_head ? 1 : 0; |
7667 | if (ket != OP_KET || has_alternatives) |
7668 | stacksize++; |
7669 | |
7670 | if (stacksize > 0) |
7671 | free_stack(common, stacksize); |
7672 | } |
7673 | |
7674 | if (needs_control_head) |
7675 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0); |
7676 | |
7677 | /* TMP2 which is set here used by OP_KETRMAX below. */ |
7678 | if (ket == OP_KETRMAX) |
7679 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0); |
7680 | else if (ket == OP_KETRMIN) |
7681 | { |
7682 | /* Move the STR_PTR to the private_data_ptr. */ |
7683 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0); |
7684 | } |
7685 | } |
7686 | else |
7687 | { |
7688 | stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1; |
7689 | OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw)); |
7690 | if (needs_control_head) |
7691 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0); |
7692 | |
7693 | if (ket == OP_KETRMAX) |
7694 | { |
7695 | /* TMP2 which is set here used by OP_KETRMAX below. */ |
7696 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7697 | } |
7698 | } |
7699 | if (needs_control_head) |
7700 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); |
7701 | } |
7702 | |
7703 | static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr) |
7704 | { |
7705 | DEFINE_COMPILER; |
7706 | |
7707 | if (common->capture_last_ptr != 0) |
7708 | { |
7709 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); |
7710 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); |
7711 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); |
7712 | stacksize++; |
7713 | } |
7714 | if (common->optimized_cbracket[offset >> 1] == 0) |
7715 | { |
7716 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
7717 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
7718 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); |
7719 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7720 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); |
7721 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
7722 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
7723 | stacksize += 2; |
7724 | } |
7725 | return stacksize; |
7726 | } |
7727 | |
7728 | /* |
7729 | Handling bracketed expressions is probably the most complex part. |
7730 | |
7731 | Stack layout naming characters: |
7732 | S - Push the current STR_PTR |
7733 | 0 - Push a 0 (NULL) |
7734 | A - Push the current STR_PTR. Needed for restoring the STR_PTR |
7735 | before the next alternative. Not pushed if there are no alternatives. |
7736 | M - Any values pushed by the current alternative. Can be empty, or anything. |
7737 | C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack. |
7738 | L - Push the previous local (pointed by localptr) to the stack |
7739 | () - opional values stored on the stack |
7740 | ()* - optonal, can be stored multiple times |
7741 | |
7742 | The following list shows the regular expression templates, their PCRE byte codes |
7743 | and stack layout supported by pcre-sljit. |
7744 | |
7745 | (?:) OP_BRA | OP_KET A M |
7746 | () OP_CBRA | OP_KET C M |
7747 | (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )* |
7748 | OP_SBRA | OP_KETRMAX 0 L M S ( L M S )* |
7749 | (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )* |
7750 | OP_SBRA | OP_KETRMIN 0 L M S ( L M S )* |
7751 | ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )* |
7752 | OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )* |
7753 | ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )* |
7754 | OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )* |
7755 | (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 ) |
7756 | (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 ) |
7757 | ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 ) |
7758 | ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 ) |
7759 | (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )* |
7760 | OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )* |
7761 | (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )* |
7762 | OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )* |
7763 | ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )* |
7764 | OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )* |
7765 | ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )* |
7766 | OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )* |
7767 | |
7768 | |
7769 | Stack layout naming characters: |
7770 | A - Push the alternative index (starting from 0) on the stack. |
7771 | Not pushed if there is no alternatives. |
7772 | M - Any values pushed by the current alternative. Can be empty, or anything. |
7773 | |
7774 | The next list shows the possible content of a bracket: |
7775 | (|) OP_*BRA | OP_ALT ... M A |
7776 | (?()|) OP_*COND | OP_ALT M A |
7777 | (?>|) OP_ONCE | OP_ALT ... [stack trace] M A |
7778 | (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A |
7779 | Or nothing, if trace is unnecessary |
7780 | */ |
7781 | |
7782 | static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
7783 | { |
7784 | DEFINE_COMPILER; |
7785 | backtrack_common *backtrack; |
7786 | pcre_uchar opcode; |
7787 | int private_data_ptr = 0; |
7788 | int offset = 0; |
7789 | int i, stacksize; |
7790 | int repeat_ptr = 0, repeat_length = 0; |
7791 | int repeat_type = 0, repeat_count = 0; |
7792 | pcre_uchar *ccbegin; |
7793 | pcre_uchar *matchingpath; |
7794 | pcre_uchar *slot; |
7795 | pcre_uchar bra = OP_BRA; |
7796 | pcre_uchar ket; |
7797 | assert_backtrack *assert; |
7798 | BOOL has_alternatives; |
7799 | BOOL needs_control_head = FALSE; |
7800 | struct sljit_jump *jump; |
7801 | struct sljit_jump *skip; |
7802 | struct sljit_label *rmax_label = NULL; |
7803 | struct sljit_jump *braminzero = NULL; |
7804 | |
7805 | PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); |
7806 | |
7807 | if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) |
7808 | { |
7809 | bra = *cc; |
7810 | cc++; |
7811 | opcode = *cc; |
7812 | } |
7813 | |
7814 | opcode = *cc; |
7815 | ccbegin = cc; |
7816 | matchingpath = bracketend(cc) - 1 - LINK_SIZE; |
7817 | ket = *matchingpath; |
7818 | if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0) |
7819 | { |
7820 | repeat_ptr = PRIVATE_DATA(matchingpath); |
7821 | repeat_length = PRIVATE_DATA(matchingpath + 1); |
7822 | repeat_type = PRIVATE_DATA(matchingpath + 2); |
7823 | repeat_count = PRIVATE_DATA(matchingpath + 3); |
7824 | SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0); |
7825 | if (repeat_type == OP_UPTO) |
7826 | ket = OP_KETRMAX; |
7827 | if (repeat_type == OP_MINUPTO) |
7828 | ket = OP_KETRMIN; |
7829 | } |
7830 | |
7831 | if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF) |
7832 | { |
7833 | /* Drop this bracket_backtrack. */ |
7834 | parent->top = backtrack->prev; |
7835 | return matchingpath + 1 + LINK_SIZE + repeat_length; |
7836 | } |
7837 | |
7838 | matchingpath = ccbegin + 1 + LINK_SIZE; |
7839 | SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN); |
7840 | SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX))); |
7841 | cc += GET(cc, 1); |
7842 | |
7843 | has_alternatives = *cc == OP_ALT; |
7844 | if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) |
7845 | has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE; |
7846 | |
7847 | if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) |
7848 | opcode = OP_SCOND; |
7849 | if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) |
7850 | opcode = OP_ONCE; |
7851 | |
7852 | if (opcode == OP_CBRA || opcode == OP_SCBRA) |
7853 | { |
7854 | /* Capturing brackets has a pre-allocated space. */ |
7855 | offset = GET2(ccbegin, 1 + LINK_SIZE); |
7856 | if (common->optimized_cbracket[offset] == 0) |
7857 | { |
7858 | private_data_ptr = OVECTOR_PRIV(offset); |
7859 | offset <<= 1; |
7860 | } |
7861 | else |
7862 | { |
7863 | offset <<= 1; |
7864 | private_data_ptr = OVECTOR(offset); |
7865 | } |
7866 | BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; |
7867 | matchingpath += IMM2_SIZE; |
7868 | } |
7869 | else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND) |
7870 | { |
7871 | /* Other brackets simply allocate the next entry. */ |
7872 | private_data_ptr = PRIVATE_DATA(ccbegin); |
7873 | SLJIT_ASSERT(private_data_ptr != 0); |
7874 | BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; |
7875 | if (opcode == OP_ONCE) |
7876 | BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head); |
7877 | } |
7878 | |
7879 | /* Instructions before the first alternative. */ |
7880 | stacksize = 0; |
7881 | if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) |
7882 | stacksize++; |
7883 | if (bra == OP_BRAZERO) |
7884 | stacksize++; |
7885 | |
7886 | if (stacksize > 0) |
7887 | allocate_stack(common, stacksize); |
7888 | |
7889 | stacksize = 0; |
7890 | if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) |
7891 | { |
7892 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); |
7893 | stacksize++; |
7894 | } |
7895 | |
7896 | if (bra == OP_BRAZERO) |
7897 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
7898 | |
7899 | if (bra == OP_BRAMINZERO) |
7900 | { |
7901 | /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */ |
7902 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7903 | if (ket != OP_KETRMIN) |
7904 | { |
7905 | free_stack(common, 1); |
7906 | braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
7907 | } |
7908 | else |
7909 | { |
7910 | if (opcode == OP_ONCE || opcode >= OP_SBRA) |
7911 | { |
7912 | jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
7913 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
7914 | /* Nothing stored during the first run. */ |
7915 | skip = JUMP(SLJIT_JUMP); |
7916 | JUMPHERE(jump); |
7917 | /* Checking zero-length iteration. */ |
7918 | if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) |
7919 | { |
7920 | /* When we come from outside, private_data_ptr contains the previous STR_PTR. */ |
7921 | braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7922 | } |
7923 | else |
7924 | { |
7925 | /* Except when the whole stack frame must be saved. */ |
7926 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7927 | braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw)); |
7928 | } |
7929 | JUMPHERE(skip); |
7930 | } |
7931 | else |
7932 | { |
7933 | jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
7934 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
7935 | JUMPHERE(jump); |
7936 | } |
7937 | } |
7938 | } |
7939 | |
7940 | if (repeat_type != 0) |
7941 | { |
7942 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count); |
7943 | if (repeat_type == OP_EXACT) |
7944 | rmax_label = LABEL(); |
7945 | } |
7946 | |
7947 | if (ket == OP_KETRMIN) |
7948 | BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); |
7949 | |
7950 | if (ket == OP_KETRMAX) |
7951 | { |
7952 | rmax_label = LABEL(); |
7953 | if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0) |
7954 | BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label; |
7955 | } |
7956 | |
7957 | /* Handling capturing brackets and alternatives. */ |
7958 | if (opcode == OP_ONCE) |
7959 | { |
7960 | stacksize = 0; |
7961 | if (needs_control_head) |
7962 | { |
7963 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
7964 | stacksize++; |
7965 | } |
7966 | |
7967 | if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) |
7968 | { |
7969 | /* Neither capturing brackets nor recursions are found in the block. */ |
7970 | if (ket == OP_KETRMIN) |
7971 | { |
7972 | stacksize += 2; |
7973 | if (!needs_control_head) |
7974 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7975 | } |
7976 | else |
7977 | { |
7978 | if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) |
7979 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); |
7980 | if (ket == OP_KETRMAX || has_alternatives) |
7981 | stacksize++; |
7982 | } |
7983 | |
7984 | if (stacksize > 0) |
7985 | allocate_stack(common, stacksize); |
7986 | |
7987 | stacksize = 0; |
7988 | if (needs_control_head) |
7989 | { |
7990 | stacksize++; |
7991 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
7992 | } |
7993 | |
7994 | if (ket == OP_KETRMIN) |
7995 | { |
7996 | if (needs_control_head) |
7997 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7998 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
7999 | if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) |
8000 | OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw)); |
8001 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); |
8002 | } |
8003 | else if (ket == OP_KETRMAX || has_alternatives) |
8004 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
8005 | } |
8006 | else |
8007 | { |
8008 | if (ket != OP_KET || has_alternatives) |
8009 | stacksize++; |
8010 | |
8011 | stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1; |
8012 | allocate_stack(common, stacksize); |
8013 | |
8014 | if (needs_control_head) |
8015 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
8016 | |
8017 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8018 | OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); |
8019 | |
8020 | stacksize = needs_control_head ? 1 : 0; |
8021 | if (ket != OP_KET || has_alternatives) |
8022 | { |
8023 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
8024 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); |
8025 | stacksize++; |
8026 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); |
8027 | } |
8028 | else |
8029 | { |
8030 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); |
8031 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); |
8032 | } |
8033 | init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE); |
8034 | } |
8035 | } |
8036 | else if (opcode == OP_CBRA || opcode == OP_SCBRA) |
8037 | { |
8038 | /* Saving the previous values. */ |
8039 | if (common->optimized_cbracket[offset >> 1] != 0) |
8040 | { |
8041 | SLJIT_ASSERT(private_data_ptr == OVECTOR(offset)); |
8042 | allocate_stack(common, 2); |
8043 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8044 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); |
8045 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); |
8046 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); |
8047 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); |
8048 | } |
8049 | else |
8050 | { |
8051 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8052 | allocate_stack(common, 1); |
8053 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); |
8054 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
8055 | } |
8056 | } |
8057 | else if (opcode == OP_SBRA || opcode == OP_SCOND) |
8058 | { |
8059 | /* Saving the previous value. */ |
8060 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8061 | allocate_stack(common, 1); |
8062 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); |
8063 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
8064 | } |
8065 | else if (has_alternatives) |
8066 | { |
8067 | /* Pushing the starting string pointer. */ |
8068 | allocate_stack(common, 1); |
8069 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
8070 | } |
8071 | |
8072 | /* Generating code for the first alternative. */ |
8073 | if (opcode == OP_COND || opcode == OP_SCOND) |
8074 | { |
8075 | if (*matchingpath == OP_CREF) |
8076 | { |
8077 | SLJIT_ASSERT(has_alternatives); |
8078 | add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), |
8079 | CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); |
8080 | matchingpath += 1 + IMM2_SIZE; |
8081 | } |
8082 | else if (*matchingpath == OP_DNCREF) |
8083 | { |
8084 | SLJIT_ASSERT(has_alternatives); |
8085 | |
8086 | i = GET2(matchingpath, 1 + IMM2_SIZE); |
8087 | slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; |
8088 | OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); |
8089 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); |
8090 | OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); |
8091 | slot += common->name_entry_size; |
8092 | i--; |
8093 | while (i-- > 0) |
8094 | { |
8095 | OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); |
8096 | OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0); |
8097 | slot += common->name_entry_size; |
8098 | } |
8099 | OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); |
8100 | add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO)); |
8101 | matchingpath += 1 + 2 * IMM2_SIZE; |
8102 | } |
8103 | else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) |
8104 | { |
8105 | /* Never has other case. */ |
8106 | BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; |
8107 | SLJIT_ASSERT(!has_alternatives); |
8108 | |
8109 | if (*matchingpath == OP_FAIL) |
8110 | stacksize = 0; |
8111 | if (*matchingpath == OP_RREF) |
8112 | { |
8113 | stacksize = GET2(matchingpath, 1); |
8114 | if (common->currententry == NULL) |
8115 | stacksize = 0; |
8116 | else if (stacksize == RREF_ANY) |
8117 | stacksize = 1; |
8118 | else if (common->currententry->start == 0) |
8119 | stacksize = stacksize == 0; |
8120 | else |
8121 | stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); |
8122 | |
8123 | if (stacksize != 0) |
8124 | matchingpath += 1 + IMM2_SIZE; |
8125 | } |
8126 | else |
8127 | { |
8128 | if (common->currententry == NULL || common->currententry->start == 0) |
8129 | stacksize = 0; |
8130 | else |
8131 | { |
8132 | stacksize = GET2(matchingpath, 1 + IMM2_SIZE); |
8133 | slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; |
8134 | i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); |
8135 | while (stacksize > 0) |
8136 | { |
8137 | if ((int)GET2(slot, 0) == i) |
8138 | break; |
8139 | slot += common->name_entry_size; |
8140 | stacksize--; |
8141 | } |
8142 | } |
8143 | |
8144 | if (stacksize != 0) |
8145 | matchingpath += 1 + 2 * IMM2_SIZE; |
8146 | } |
8147 | |
8148 | /* The stacksize == 0 is a common "else" case. */ |
8149 | if (stacksize == 0) |
8150 | { |
8151 | if (*cc == OP_ALT) |
8152 | { |
8153 | matchingpath = cc + 1 + LINK_SIZE; |
8154 | cc += GET(cc, 1); |
8155 | } |
8156 | else |
8157 | matchingpath = cc; |
8158 | } |
8159 | } |
8160 | else |
8161 | { |
8162 | SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT); |
8163 | /* Similar code as PUSH_BACKTRACK macro. */ |
8164 | assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack)); |
8165 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
8166 | return NULL; |
8167 | memset(assert, 0, sizeof(assert_backtrack)); |
8168 | assert->common.cc = matchingpath; |
8169 | BACKTRACK_AS(bracket_backtrack)->u.assert = assert; |
8170 | matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE); |
8171 | } |
8172 | } |
8173 | |
8174 | compile_matchingpath(common, matchingpath, cc, backtrack); |
8175 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
8176 | return NULL; |
8177 | |
8178 | if (opcode == OP_ONCE) |
8179 | match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); |
8180 | |
8181 | stacksize = 0; |
8182 | if (repeat_type == OP_MINUPTO) |
8183 | { |
8184 | /* We need to preserve the counter. TMP2 will be used below. */ |
8185 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); |
8186 | stacksize++; |
8187 | } |
8188 | if (ket != OP_KET || bra != OP_BRA) |
8189 | stacksize++; |
8190 | if (offset != 0) |
8191 | { |
8192 | if (common->capture_last_ptr != 0) |
8193 | stacksize++; |
8194 | if (common->optimized_cbracket[offset >> 1] == 0) |
8195 | stacksize += 2; |
8196 | } |
8197 | if (has_alternatives && opcode != OP_ONCE) |
8198 | stacksize++; |
8199 | |
8200 | if (stacksize > 0) |
8201 | allocate_stack(common, stacksize); |
8202 | |
8203 | stacksize = 0; |
8204 | if (repeat_type == OP_MINUPTO) |
8205 | { |
8206 | /* TMP2 was set above. */ |
8207 | OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); |
8208 | stacksize++; |
8209 | } |
8210 | |
8211 | if (ket != OP_KET || bra != OP_BRA) |
8212 | { |
8213 | if (ket != OP_KET) |
8214 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
8215 | else |
8216 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); |
8217 | stacksize++; |
8218 | } |
8219 | |
8220 | if (offset != 0) |
8221 | stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); |
8222 | |
8223 | if (has_alternatives) |
8224 | { |
8225 | if (opcode != OP_ONCE) |
8226 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); |
8227 | if (ket != OP_KETRMAX) |
8228 | BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); |
8229 | } |
8230 | |
8231 | /* Must be after the matchingpath label. */ |
8232 | if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0) |
8233 | { |
8234 | SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); |
8235 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
8236 | } |
8237 | |
8238 | if (ket == OP_KETRMAX) |
8239 | { |
8240 | if (repeat_type != 0) |
8241 | { |
8242 | if (has_alternatives) |
8243 | BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); |
8244 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); |
8245 | JUMPTO(SLJIT_NOT_ZERO, rmax_label); |
8246 | /* Drop STR_PTR for greedy plus quantifier. */ |
8247 | if (opcode != OP_ONCE) |
8248 | free_stack(common, 1); |
8249 | } |
8250 | else if (opcode == OP_ONCE || opcode >= OP_SBRA) |
8251 | { |
8252 | if (has_alternatives) |
8253 | BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); |
8254 | /* Checking zero-length iteration. */ |
8255 | if (opcode != OP_ONCE) |
8256 | { |
8257 | CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label); |
8258 | /* Drop STR_PTR for greedy plus quantifier. */ |
8259 | if (bra != OP_BRAZERO) |
8260 | free_stack(common, 1); |
8261 | } |
8262 | else |
8263 | /* TMP2 must contain the starting STR_PTR. */ |
8264 | CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label); |
8265 | } |
8266 | else |
8267 | JUMPTO(SLJIT_JUMP, rmax_label); |
8268 | BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); |
8269 | } |
8270 | |
8271 | if (repeat_type == OP_EXACT) |
8272 | { |
8273 | count_match(common); |
8274 | OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); |
8275 | JUMPTO(SLJIT_NOT_ZERO, rmax_label); |
8276 | } |
8277 | else if (repeat_type == OP_UPTO) |
8278 | { |
8279 | /* We need to preserve the counter. */ |
8280 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); |
8281 | allocate_stack(common, 1); |
8282 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
8283 | } |
8284 | |
8285 | if (bra == OP_BRAZERO) |
8286 | BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL(); |
8287 | |
8288 | if (bra == OP_BRAMINZERO) |
8289 | { |
8290 | /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */ |
8291 | JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath); |
8292 | if (braminzero != NULL) |
8293 | { |
8294 | JUMPHERE(braminzero); |
8295 | /* We need to release the end pointer to perform the |
8296 | backtrack for the zero-length iteration. When |
8297 | framesize is < 0, OP_ONCE will do the release itself. */ |
8298 | if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) |
8299 | { |
8300 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8301 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
8302 | } |
8303 | else if (ket == OP_KETRMIN && opcode != OP_ONCE) |
8304 | free_stack(common, 1); |
8305 | } |
8306 | /* Continue to the normal backtrack. */ |
8307 | } |
8308 | |
8309 | if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) |
8310 | count_match(common); |
8311 | |
8312 | /* Skip the other alternatives. */ |
8313 | while (*cc == OP_ALT) |
8314 | cc += GET(cc, 1); |
8315 | cc += 1 + LINK_SIZE; |
8316 | |
8317 | if (opcode == OP_ONCE) |
8318 | { |
8319 | /* We temporarily encode the needs_control_head in the lowest bit. |
8320 | Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns |
8321 | the same value for small signed numbers (including negative numbers). */ |
8322 | BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0); |
8323 | } |
8324 | return cc + repeat_length; |
8325 | } |
8326 | |
8327 | static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
8328 | { |
8329 | DEFINE_COMPILER; |
8330 | backtrack_common *backtrack; |
8331 | pcre_uchar opcode; |
8332 | int private_data_ptr; |
8333 | int cbraprivptr = 0; |
8334 | BOOL needs_control_head; |
8335 | int framesize; |
8336 | int stacksize; |
8337 | int offset = 0; |
8338 | BOOL zero = FALSE; |
8339 | pcre_uchar *ccbegin = NULL; |
8340 | int stack; /* Also contains the offset of control head. */ |
8341 | struct sljit_label *loop = NULL; |
8342 | struct jump_list *emptymatch = NULL; |
8343 | |
8344 | PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL); |
8345 | if (*cc == OP_BRAPOSZERO) |
8346 | { |
8347 | zero = TRUE; |
8348 | cc++; |
8349 | } |
8350 | |
8351 | opcode = *cc; |
8352 | private_data_ptr = PRIVATE_DATA(cc); |
8353 | SLJIT_ASSERT(private_data_ptr != 0); |
8354 | BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr; |
8355 | switch(opcode) |
8356 | { |
8357 | case OP_BRAPOS: |
8358 | case OP_SBRAPOS: |
8359 | ccbegin = cc + 1 + LINK_SIZE; |
8360 | break; |
8361 | |
8362 | case OP_CBRAPOS: |
8363 | case OP_SCBRAPOS: |
8364 | offset = GET2(cc, 1 + LINK_SIZE); |
8365 | /* This case cannot be optimized in the same was as |
8366 | normal capturing brackets. */ |
8367 | SLJIT_ASSERT(common->optimized_cbracket[offset] == 0); |
8368 | cbraprivptr = OVECTOR_PRIV(offset); |
8369 | offset <<= 1; |
8370 | ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE; |
8371 | break; |
8372 | |
8373 | default: |
8374 | SLJIT_ASSERT_STOP(); |
8375 | break; |
8376 | } |
8377 | |
8378 | framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); |
8379 | BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize; |
8380 | if (framesize < 0) |
8381 | { |
8382 | if (offset != 0) |
8383 | { |
8384 | stacksize = 2; |
8385 | if (common->capture_last_ptr != 0) |
8386 | stacksize++; |
8387 | } |
8388 | else |
8389 | stacksize = 1; |
8390 | |
8391 | if (needs_control_head) |
8392 | stacksize++; |
8393 | if (!zero) |
8394 | stacksize++; |
8395 | |
8396 | BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; |
8397 | allocate_stack(common, stacksize); |
8398 | if (framesize == no_frame) |
8399 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); |
8400 | |
8401 | stack = 0; |
8402 | if (offset != 0) |
8403 | { |
8404 | stack = 2; |
8405 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
8406 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
8407 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); |
8408 | if (common->capture_last_ptr != 0) |
8409 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); |
8410 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); |
8411 | if (needs_control_head) |
8412 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
8413 | if (common->capture_last_ptr != 0) |
8414 | { |
8415 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); |
8416 | stack = 3; |
8417 | } |
8418 | } |
8419 | else |
8420 | { |
8421 | if (needs_control_head) |
8422 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
8423 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
8424 | stack = 1; |
8425 | } |
8426 | |
8427 | if (needs_control_head) |
8428 | stack++; |
8429 | if (!zero) |
8430 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1); |
8431 | if (needs_control_head) |
8432 | { |
8433 | stack--; |
8434 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); |
8435 | } |
8436 | } |
8437 | else |
8438 | { |
8439 | stacksize = framesize + 1; |
8440 | if (!zero) |
8441 | stacksize++; |
8442 | if (needs_control_head) |
8443 | stacksize++; |
8444 | if (offset == 0) |
8445 | stacksize++; |
8446 | BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; |
8447 | |
8448 | allocate_stack(common, stacksize); |
8449 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8450 | if (needs_control_head) |
8451 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
8452 | OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1)); |
8453 | |
8454 | stack = 0; |
8455 | if (!zero) |
8456 | { |
8457 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1); |
8458 | stack = 1; |
8459 | } |
8460 | if (needs_control_head) |
8461 | { |
8462 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); |
8463 | stack++; |
8464 | } |
8465 | if (offset == 0) |
8466 | { |
8467 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0); |
8468 | stack++; |
8469 | } |
8470 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0); |
8471 | init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE); |
8472 | stack -= 1 + (offset == 0); |
8473 | } |
8474 | |
8475 | if (offset != 0) |
8476 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); |
8477 | |
8478 | loop = LABEL(); |
8479 | while (*cc != OP_KETRPOS) |
8480 | { |
8481 | backtrack->top = NULL; |
8482 | backtrack->topbacktracks = NULL; |
8483 | cc += GET(cc, 1); |
8484 | |
8485 | compile_matchingpath(common, ccbegin, cc, backtrack); |
8486 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
8487 | return NULL; |
8488 | |
8489 | if (framesize < 0) |
8490 | { |
8491 | if (framesize == no_frame) |
8492 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8493 | |
8494 | if (offset != 0) |
8495 | { |
8496 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); |
8497 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
8498 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); |
8499 | if (common->capture_last_ptr != 0) |
8500 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); |
8501 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
8502 | } |
8503 | else |
8504 | { |
8505 | if (opcode == OP_SBRAPOS) |
8506 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
8507 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
8508 | } |
8509 | |
8510 | /* Even if the match is empty, we need to reset the control head. */ |
8511 | if (needs_control_head) |
8512 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); |
8513 | |
8514 | if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) |
8515 | add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); |
8516 | |
8517 | if (!zero) |
8518 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); |
8519 | } |
8520 | else |
8521 | { |
8522 | if (offset != 0) |
8523 | { |
8524 | OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw)); |
8525 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); |
8526 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
8527 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); |
8528 | if (common->capture_last_ptr != 0) |
8529 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); |
8530 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
8531 | } |
8532 | else |
8533 | { |
8534 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8535 | OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); |
8536 | if (opcode == OP_SBRAPOS) |
8537 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); |
8538 | OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0); |
8539 | } |
8540 | |
8541 | /* Even if the match is empty, we need to reset the control head. */ |
8542 | if (needs_control_head) |
8543 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); |
8544 | |
8545 | if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) |
8546 | add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); |
8547 | |
8548 | if (!zero) |
8549 | { |
8550 | if (framesize < 0) |
8551 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); |
8552 | else |
8553 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
8554 | } |
8555 | } |
8556 | |
8557 | JUMPTO(SLJIT_JUMP, loop); |
8558 | flush_stubs(common); |
8559 | |
8560 | compile_backtrackingpath(common, backtrack->top); |
8561 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
8562 | return NULL; |
8563 | set_jumps(backtrack->topbacktracks, LABEL()); |
8564 | |
8565 | if (framesize < 0) |
8566 | { |
8567 | if (offset != 0) |
8568 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); |
8569 | else |
8570 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
8571 | } |
8572 | else |
8573 | { |
8574 | if (offset != 0) |
8575 | { |
8576 | /* Last alternative. */ |
8577 | if (*cc == OP_KETRPOS) |
8578 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8579 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); |
8580 | } |
8581 | else |
8582 | { |
8583 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8584 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); |
8585 | } |
8586 | } |
8587 | |
8588 | if (*cc == OP_KETRPOS) |
8589 | break; |
8590 | ccbegin = cc + 1 + LINK_SIZE; |
8591 | } |
8592 | |
8593 | /* We don't have to restore the control head in case of a failed match. */ |
8594 | |
8595 | backtrack->topbacktracks = NULL; |
8596 | if (!zero) |
8597 | { |
8598 | if (framesize < 0) |
8599 | add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); |
8600 | else /* TMP2 is set to [private_data_ptr] above. */ |
8601 | add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0)); |
8602 | } |
8603 | |
8604 | /* None of them matched. */ |
8605 | set_jumps(emptymatch, LABEL()); |
8606 | count_match(common); |
8607 | return cc + 1 + LINK_SIZE; |
8608 | } |
8609 | |
8610 | static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end) |
8611 | { |
8612 | int class_len; |
8613 | |
8614 | *opcode = *cc; |
8615 | *exact = 0; |
8616 | |
8617 | if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO) |
8618 | { |
8619 | cc++; |
8620 | *type = OP_CHAR; |
8621 | } |
8622 | else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI) |
8623 | { |
8624 | cc++; |
8625 | *type = OP_CHARI; |
8626 | *opcode -= OP_STARI - OP_STAR; |
8627 | } |
8628 | else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO) |
8629 | { |
8630 | cc++; |
8631 | *type = OP_NOT; |
8632 | *opcode -= OP_NOTSTAR - OP_STAR; |
8633 | } |
8634 | else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI) |
8635 | { |
8636 | cc++; |
8637 | *type = OP_NOTI; |
8638 | *opcode -= OP_NOTSTARI - OP_STAR; |
8639 | } |
8640 | else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO) |
8641 | { |
8642 | cc++; |
8643 | *opcode -= OP_TYPESTAR - OP_STAR; |
8644 | *type = OP_END; |
8645 | } |
8646 | else |
8647 | { |
8648 | SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); |
8649 | *type = *opcode; |
8650 | cc++; |
8651 | class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0); |
8652 | *opcode = cc[class_len - 1]; |
8653 | |
8654 | if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) |
8655 | { |
8656 | *opcode -= OP_CRSTAR - OP_STAR; |
8657 | *end = cc + class_len; |
8658 | |
8659 | if (*opcode == OP_PLUS || *opcode == OP_MINPLUS) |
8660 | { |
8661 | *exact = 1; |
8662 | *opcode -= OP_PLUS - OP_STAR; |
8663 | } |
8664 | } |
8665 | else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) |
8666 | { |
8667 | *opcode -= OP_CRPOSSTAR - OP_POSSTAR; |
8668 | *end = cc + class_len; |
8669 | |
8670 | if (*opcode == OP_POSPLUS) |
8671 | { |
8672 | *exact = 1; |
8673 | *opcode = OP_POSSTAR; |
8674 | } |
8675 | } |
8676 | else |
8677 | { |
8678 | SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); |
8679 | *max = GET2(cc, (class_len + IMM2_SIZE)); |
8680 | *exact = GET2(cc, class_len); |
8681 | |
8682 | if (*max == 0) |
8683 | { |
8684 | if (*opcode == OP_CRPOSRANGE) |
8685 | *opcode = OP_POSSTAR; |
8686 | else |
8687 | *opcode -= OP_CRRANGE - OP_STAR; |
8688 | } |
8689 | else |
8690 | { |
8691 | *max -= *exact; |
8692 | if (*max == 0) |
8693 | *opcode = OP_EXACT; |
8694 | else if (*max == 1) |
8695 | { |
8696 | if (*opcode == OP_CRPOSRANGE) |
8697 | *opcode = OP_POSQUERY; |
8698 | else |
8699 | *opcode -= OP_CRRANGE - OP_QUERY; |
8700 | } |
8701 | else |
8702 | { |
8703 | if (*opcode == OP_CRPOSRANGE) |
8704 | *opcode = OP_POSUPTO; |
8705 | else |
8706 | *opcode -= OP_CRRANGE - OP_UPTO; |
8707 | } |
8708 | } |
8709 | *end = cc + class_len + 2 * IMM2_SIZE; |
8710 | } |
8711 | return cc; |
8712 | } |
8713 | |
8714 | switch(*opcode) |
8715 | { |
8716 | case OP_EXACT: |
8717 | *exact = GET2(cc, 0); |
8718 | cc += IMM2_SIZE; |
8719 | break; |
8720 | |
8721 | case OP_PLUS: |
8722 | case OP_MINPLUS: |
8723 | *exact = 1; |
8724 | *opcode -= OP_PLUS - OP_STAR; |
8725 | break; |
8726 | |
8727 | case OP_POSPLUS: |
8728 | *exact = 1; |
8729 | *opcode = OP_POSSTAR; |
8730 | break; |
8731 | |
8732 | case OP_UPTO: |
8733 | case OP_MINUPTO: |
8734 | case OP_POSUPTO: |
8735 | *max = GET2(cc, 0); |
8736 | cc += IMM2_SIZE; |
8737 | break; |
8738 | } |
8739 | |
8740 | if (*type == OP_END) |
8741 | { |
8742 | *type = *cc; |
8743 | *end = next_opcode(common, cc); |
8744 | cc++; |
8745 | return cc; |
8746 | } |
8747 | |
8748 | *end = cc + 1; |
8749 | #ifdef SUPPORT_UTF |
8750 | if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); |
8751 | #endif |
8752 | return cc; |
8753 | } |
8754 | |
8755 | static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
8756 | { |
8757 | DEFINE_COMPILER; |
8758 | backtrack_common *backtrack; |
8759 | pcre_uchar opcode; |
8760 | pcre_uchar type; |
8761 | sljit_u32 max = 0, exact; |
8762 | BOOL fast_fail; |
8763 | sljit_s32 fast_str_ptr; |
8764 | BOOL charpos_enabled; |
8765 | pcre_uchar charpos_char; |
8766 | unsigned int charpos_othercasebit; |
8767 | pcre_uchar *end; |
8768 | jump_list *no_match = NULL; |
8769 | jump_list *no_char1_match = NULL; |
8770 | struct sljit_jump *jump = NULL; |
8771 | struct sljit_label *label; |
8772 | int private_data_ptr = PRIVATE_DATA(cc); |
8773 | int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); |
8774 | int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; |
8775 | int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); |
8776 | int tmp_base, tmp_offset; |
8777 | |
8778 | PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL); |
8779 | |
8780 | fast_str_ptr = PRIVATE_DATA(cc + 1); |
8781 | fast_fail = TRUE; |
8782 | |
8783 | SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr); |
8784 | |
8785 | if (cc == common->fast_forward_bc_ptr) |
8786 | fast_fail = FALSE; |
8787 | else if (common->fast_fail_start_ptr == 0) |
8788 | fast_str_ptr = 0; |
8789 | |
8790 | SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0 |
8791 | || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr)); |
8792 | |
8793 | cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); |
8794 | |
8795 | if (type != OP_EXTUNI) |
8796 | { |
8797 | tmp_base = TMP3; |
8798 | tmp_offset = 0; |
8799 | } |
8800 | else |
8801 | { |
8802 | tmp_base = SLJIT_MEM1(SLJIT_SP); |
8803 | tmp_offset = POSSESSIVE0; |
8804 | } |
8805 | |
8806 | if (fast_fail && fast_str_ptr != 0) |
8807 | add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr)); |
8808 | |
8809 | /* Handle fixed part first. */ |
8810 | if (exact > 1) |
8811 | { |
8812 | SLJIT_ASSERT(fast_str_ptr == 0); |
8813 | if (common->mode == JIT_COMPILE |
8814 | #ifdef SUPPORT_UTF |
8815 | && !common->utf |
8816 | #endif |
8817 | ) |
8818 | { |
8819 | OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); |
8820 | add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); |
8821 | OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); |
8822 | label = LABEL(); |
8823 | compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); |
8824 | OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
8825 | JUMPTO(SLJIT_NOT_ZERO, label); |
8826 | } |
8827 | else |
8828 | { |
8829 | OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); |
8830 | label = LABEL(); |
8831 | compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); |
8832 | OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
8833 | JUMPTO(SLJIT_NOT_ZERO, label); |
8834 | } |
8835 | } |
8836 | else if (exact == 1) |
8837 | compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); |
8838 | |
8839 | switch(opcode) |
8840 | { |
8841 | case OP_STAR: |
8842 | case OP_UPTO: |
8843 | SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR); |
8844 | |
8845 | if (type == OP_ANYNL || type == OP_EXTUNI) |
8846 | { |
8847 | SLJIT_ASSERT(private_data_ptr == 0); |
8848 | SLJIT_ASSERT(fast_str_ptr == 0); |
8849 | |
8850 | allocate_stack(common, 2); |
8851 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
8852 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); |
8853 | |
8854 | if (opcode == OP_UPTO) |
8855 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max); |
8856 | |
8857 | label = LABEL(); |
8858 | compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); |
8859 | if (opcode == OP_UPTO) |
8860 | { |
8861 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); |
8862 | OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
8863 | jump = JUMP(SLJIT_ZERO); |
8864 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); |
8865 | } |
8866 | |
8867 | /* We cannot use TMP3 because of this allocate_stack. */ |
8868 | allocate_stack(common, 1); |
8869 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
8870 | JUMPTO(SLJIT_JUMP, label); |
8871 | if (jump != NULL) |
8872 | JUMPHERE(jump); |
8873 | } |
8874 | else |
8875 | { |
8876 | charpos_enabled = FALSE; |
8877 | charpos_char = 0; |
8878 | charpos_othercasebit = 0; |
8879 | |
8880 | if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) |
8881 | { |
8882 | charpos_enabled = TRUE; |
8883 | #ifdef SUPPORT_UTF |
8884 | charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]); |
8885 | #endif |
8886 | if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1)) |
8887 | { |
8888 | charpos_othercasebit = char_get_othercase_bit(common, end + 1); |
8889 | if (charpos_othercasebit == 0) |
8890 | charpos_enabled = FALSE; |
8891 | } |
8892 | |
8893 | if (charpos_enabled) |
8894 | { |
8895 | charpos_char = end[1]; |
8896 | /* Consumpe the OP_CHAR opcode. */ |
8897 | end += 2; |
8898 | #if defined COMPILE_PCRE8 |
8899 | SLJIT_ASSERT((charpos_othercasebit >> 8) == 0); |
8900 | #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
8901 | SLJIT_ASSERT((charpos_othercasebit >> 9) == 0); |
8902 | if ((charpos_othercasebit & 0x100) != 0) |
8903 | charpos_othercasebit = (charpos_othercasebit & 0xff) << 8; |
8904 | #endif |
8905 | if (charpos_othercasebit != 0) |
8906 | charpos_char |= charpos_othercasebit; |
8907 | |
8908 | BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; |
8909 | BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; |
8910 | BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; |
8911 | } |
8912 | } |
8913 | |
8914 | if (charpos_enabled) |
8915 | { |
8916 | if (opcode == OP_UPTO) |
8917 | OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); |
8918 | |
8919 | /* Search the first instance of charpos_char. */ |
8920 | jump = JUMP(SLJIT_JUMP); |
8921 | label = LABEL(); |
8922 | if (opcode == OP_UPTO) |
8923 | { |
8924 | OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
8925 | add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO)); |
8926 | } |
8927 | compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); |
8928 | if (fast_str_ptr != 0) |
8929 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
8930 | JUMPHERE(jump); |
8931 | |
8932 | detect_partial_match(common, &backtrack->topbacktracks); |
8933 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
8934 | if (charpos_othercasebit != 0) |
8935 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); |
8936 | CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); |
8937 | |
8938 | if (private_data_ptr == 0) |
8939 | allocate_stack(common, 2); |
8940 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
8941 | OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); |
8942 | if (opcode == OP_UPTO) |
8943 | { |
8944 | OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
8945 | add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); |
8946 | } |
8947 | |
8948 | /* Search the last instance of charpos_char. */ |
8949 | label = LABEL(); |
8950 | compile_char1_matchingpath(common, type, cc, &no_match, FALSE); |
8951 | if (fast_str_ptr != 0) |
8952 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
8953 | detect_partial_match(common, &no_match); |
8954 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
8955 | if (charpos_othercasebit != 0) |
8956 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); |
8957 | if (opcode == OP_STAR) |
8958 | { |
8959 | CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); |
8960 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
8961 | } |
8962 | else |
8963 | { |
8964 | jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); |
8965 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
8966 | JUMPHERE(jump); |
8967 | } |
8968 | |
8969 | if (opcode == OP_UPTO) |
8970 | { |
8971 | OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
8972 | JUMPTO(SLJIT_NOT_ZERO, label); |
8973 | } |
8974 | else |
8975 | JUMPTO(SLJIT_JUMP, label); |
8976 | |
8977 | set_jumps(no_match, LABEL()); |
8978 | OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
8979 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
8980 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
8981 | } |
8982 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
8983 | else if (common->utf) |
8984 | { |
8985 | if (private_data_ptr == 0) |
8986 | allocate_stack(common, 2); |
8987 | |
8988 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
8989 | OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); |
8990 | |
8991 | if (opcode == OP_UPTO) |
8992 | OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); |
8993 | |
8994 | label = LABEL(); |
8995 | compile_char1_matchingpath(common, type, cc, &no_match, TRUE); |
8996 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
8997 | |
8998 | if (opcode == OP_UPTO) |
8999 | { |
9000 | OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9001 | JUMPTO(SLJIT_NOT_ZERO, label); |
9002 | } |
9003 | else |
9004 | JUMPTO(SLJIT_JUMP, label); |
9005 | |
9006 | set_jumps(no_match, LABEL()); |
9007 | OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9008 | if (fast_str_ptr != 0) |
9009 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9010 | } |
9011 | #endif |
9012 | else |
9013 | { |
9014 | if (private_data_ptr == 0) |
9015 | allocate_stack(common, 2); |
9016 | |
9017 | OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); |
9018 | if (opcode == OP_UPTO) |
9019 | OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); |
9020 | |
9021 | label = LABEL(); |
9022 | detect_partial_match(common, &no_match); |
9023 | compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); |
9024 | if (opcode == OP_UPTO) |
9025 | { |
9026 | OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9027 | JUMPTO(SLJIT_NOT_ZERO, label); |
9028 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9029 | } |
9030 | else |
9031 | JUMPTO(SLJIT_JUMP, label); |
9032 | |
9033 | set_jumps(no_char1_match, LABEL()); |
9034 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9035 | set_jumps(no_match, LABEL()); |
9036 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9037 | if (fast_str_ptr != 0) |
9038 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9039 | } |
9040 | } |
9041 | BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); |
9042 | break; |
9043 | |
9044 | case OP_MINSTAR: |
9045 | if (private_data_ptr == 0) |
9046 | allocate_stack(common, 1); |
9047 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9048 | BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); |
9049 | if (fast_str_ptr != 0) |
9050 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9051 | break; |
9052 | |
9053 | case OP_MINUPTO: |
9054 | SLJIT_ASSERT(fast_str_ptr == 0); |
9055 | if (private_data_ptr == 0) |
9056 | allocate_stack(common, 2); |
9057 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9058 | OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1); |
9059 | BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); |
9060 | break; |
9061 | |
9062 | case OP_QUERY: |
9063 | case OP_MINQUERY: |
9064 | SLJIT_ASSERT(fast_str_ptr == 0); |
9065 | if (private_data_ptr == 0) |
9066 | allocate_stack(common, 1); |
9067 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9068 | if (opcode == OP_QUERY) |
9069 | compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); |
9070 | BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); |
9071 | break; |
9072 | |
9073 | case OP_EXACT: |
9074 | break; |
9075 | |
9076 | case OP_POSSTAR: |
9077 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
9078 | if (common->utf) |
9079 | { |
9080 | OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
9081 | label = LABEL(); |
9082 | compile_char1_matchingpath(common, type, cc, &no_match, TRUE); |
9083 | OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
9084 | JUMPTO(SLJIT_JUMP, label); |
9085 | set_jumps(no_match, LABEL()); |
9086 | OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); |
9087 | if (fast_str_ptr != 0) |
9088 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9089 | break; |
9090 | } |
9091 | #endif |
9092 | label = LABEL(); |
9093 | detect_partial_match(common, &no_match); |
9094 | compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); |
9095 | JUMPTO(SLJIT_JUMP, label); |
9096 | set_jumps(no_char1_match, LABEL()); |
9097 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9098 | set_jumps(no_match, LABEL()); |
9099 | if (fast_str_ptr != 0) |
9100 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9101 | break; |
9102 | |
9103 | case OP_POSUPTO: |
9104 | SLJIT_ASSERT(fast_str_ptr == 0); |
9105 | #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
9106 | if (common->utf) |
9107 | { |
9108 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); |
9109 | OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); |
9110 | label = LABEL(); |
9111 | compile_char1_matchingpath(common, type, cc, &no_match, TRUE); |
9112 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); |
9113 | OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9114 | JUMPTO(SLJIT_NOT_ZERO, label); |
9115 | set_jumps(no_match, LABEL()); |
9116 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); |
9117 | break; |
9118 | } |
9119 | #endif |
9120 | OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); |
9121 | label = LABEL(); |
9122 | detect_partial_match(common, &no_match); |
9123 | compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); |
9124 | OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9125 | JUMPTO(SLJIT_NOT_ZERO, label); |
9126 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9127 | set_jumps(no_char1_match, LABEL()); |
9128 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9129 | set_jumps(no_match, LABEL()); |
9130 | break; |
9131 | |
9132 | case OP_POSQUERY: |
9133 | SLJIT_ASSERT(fast_str_ptr == 0); |
9134 | OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
9135 | compile_char1_matchingpath(common, type, cc, &no_match, TRUE); |
9136 | OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
9137 | set_jumps(no_match, LABEL()); |
9138 | OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); |
9139 | break; |
9140 | |
9141 | default: |
9142 | SLJIT_ASSERT_STOP(); |
9143 | break; |
9144 | } |
9145 | |
9146 | count_match(common); |
9147 | return end; |
9148 | } |
9149 | |
9150 | static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
9151 | { |
9152 | DEFINE_COMPILER; |
9153 | backtrack_common *backtrack; |
9154 | |
9155 | PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); |
9156 | |
9157 | if (*cc == OP_FAIL) |
9158 | { |
9159 | add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); |
9160 | return cc + 1; |
9161 | } |
9162 | |
9163 | if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) |
9164 | { |
9165 | /* No need to check notempty conditions. */ |
9166 | if (common->accept_label == NULL) |
9167 | add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); |
9168 | else |
9169 | JUMPTO(SLJIT_JUMP, common->accept_label); |
9170 | return cc + 1; |
9171 | } |
9172 | |
9173 | if (common->accept_label == NULL) |
9174 | add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0))); |
9175 | else |
9176 | CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label); |
9177 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
9178 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); |
9179 | add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
9180 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); |
9181 | if (common->accept_label == NULL) |
9182 | add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
9183 | else |
9184 | CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label); |
9185 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
9186 | if (common->accept_label == NULL) |
9187 | add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); |
9188 | else |
9189 | CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); |
9190 | add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); |
9191 | return cc + 1; |
9192 | } |
9193 | |
9194 | static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc) |
9195 | { |
9196 | DEFINE_COMPILER; |
9197 | int offset = GET2(cc, 1); |
9198 | BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0; |
9199 | |
9200 | /* Data will be discarded anyway... */ |
9201 | if (common->currententry != NULL) |
9202 | return cc + 1 + IMM2_SIZE; |
9203 | |
9204 | if (!optimized_cbracket) |
9205 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset)); |
9206 | offset <<= 1; |
9207 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
9208 | if (!optimized_cbracket) |
9209 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
9210 | return cc + 1 + IMM2_SIZE; |
9211 | } |
9212 | |
9213 | static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
9214 | { |
9215 | DEFINE_COMPILER; |
9216 | backtrack_common *backtrack; |
9217 | pcre_uchar opcode = *cc; |
9218 | pcre_uchar *ccend = cc + 1; |
9219 | |
9220 | if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG) |
9221 | ccend += 2 + cc[1]; |
9222 | |
9223 | PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); |
9224 | |
9225 | if (opcode == OP_SKIP) |
9226 | { |
9227 | allocate_stack(common, 1); |
9228 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
9229 | return ccend; |
9230 | } |
9231 | |
9232 | if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) |
9233 | { |
9234 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
9235 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); |
9236 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); |
9237 | OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); |
9238 | } |
9239 | |
9240 | return ccend; |
9241 | } |
9242 | |
9243 | static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP }; |
9244 | |
9245 | static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) |
9246 | { |
9247 | DEFINE_COMPILER; |
9248 | backtrack_common *backtrack; |
9249 | BOOL needs_control_head; |
9250 | int size; |
9251 | |
9252 | PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); |
9253 | common->then_trap = BACKTRACK_AS(then_trap_backtrack); |
9254 | BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; |
9255 | BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start); |
9256 | BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head); |
9257 | |
9258 | size = BACKTRACK_AS(then_trap_backtrack)->framesize; |
9259 | size = 3 + (size < 0 ? 0 : size); |
9260 | |
9261 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
9262 | allocate_stack(common, size); |
9263 | if (size > 3) |
9264 | OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw)); |
9265 | else |
9266 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); |
9267 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start); |
9268 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap); |
9269 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0); |
9270 | |
9271 | size = BACKTRACK_AS(then_trap_backtrack)->framesize; |
9272 | if (size >= 0) |
9273 | init_frame(common, cc, ccend, size - 1, 0, FALSE); |
9274 | } |
9275 | |
9276 | static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) |
9277 | { |
9278 | DEFINE_COMPILER; |
9279 | backtrack_common *backtrack; |
9280 | BOOL has_then_trap = FALSE; |
9281 | then_trap_backtrack *save_then_trap = NULL; |
9282 | |
9283 | SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS)); |
9284 | |
9285 | if (common->has_then && common->then_offsets[cc - common->start] != 0) |
9286 | { |
9287 | SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0); |
9288 | has_then_trap = TRUE; |
9289 | save_then_trap = common->then_trap; |
9290 | /* Tail item on backtrack. */ |
9291 | compile_then_trap_matchingpath(common, cc, ccend, parent); |
9292 | } |
9293 | |
9294 | while (cc < ccend) |
9295 | { |
9296 | switch(*cc) |
9297 | { |
9298 | case OP_SOD: |
9299 | case OP_SOM: |
9300 | case OP_NOT_WORD_BOUNDARY: |
9301 | case OP_WORD_BOUNDARY: |
9302 | case OP_EODN: |
9303 | case OP_EOD: |
9304 | case OP_DOLL: |
9305 | case OP_DOLLM: |
9306 | case OP_CIRC: |
9307 | case OP_CIRCM: |
9308 | case OP_REVERSE: |
9309 | cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
9310 | break; |
9311 | |
9312 | case OP_NOT_DIGIT: |
9313 | case OP_DIGIT: |
9314 | case OP_NOT_WHITESPACE: |
9315 | case OP_WHITESPACE: |
9316 | case OP_NOT_WORDCHAR: |
9317 | case OP_WORDCHAR: |
9318 | case OP_ANY: |
9319 | case OP_ALLANY: |
9320 | case OP_ANYBYTE: |
9321 | case OP_NOTPROP: |
9322 | case OP_PROP: |
9323 | case OP_ANYNL: |
9324 | case OP_NOT_HSPACE: |
9325 | case OP_HSPACE: |
9326 | case OP_NOT_VSPACE: |
9327 | case OP_VSPACE: |
9328 | case OP_EXTUNI: |
9329 | case OP_NOT: |
9330 | case OP_NOTI: |
9331 | cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); |
9332 | break; |
9333 | |
9334 | case OP_SET_SOM: |
9335 | PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); |
9336 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
9337 | allocate_stack(common, 1); |
9338 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); |
9339 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
9340 | cc++; |
9341 | break; |
9342 | |
9343 | case OP_CHAR: |
9344 | case OP_CHARI: |
9345 | if (common->mode == JIT_COMPILE) |
9346 | cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
9347 | else |
9348 | cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); |
9349 | break; |
9350 | |
9351 | case OP_STAR: |
9352 | case OP_MINSTAR: |
9353 | case OP_PLUS: |
9354 | case OP_MINPLUS: |
9355 | case OP_QUERY: |
9356 | case OP_MINQUERY: |
9357 | case OP_UPTO: |
9358 | case OP_MINUPTO: |
9359 | case OP_EXACT: |
9360 | case OP_POSSTAR: |
9361 | case OP_POSPLUS: |
9362 | case OP_POSQUERY: |
9363 | case OP_POSUPTO: |
9364 | case OP_STARI: |
9365 | case OP_MINSTARI: |
9366 | case OP_PLUSI: |
9367 | case OP_MINPLUSI: |
9368 | case OP_QUERYI: |
9369 | case OP_MINQUERYI: |
9370 | case OP_UPTOI: |
9371 | case OP_MINUPTOI: |
9372 | case OP_EXACTI: |
9373 | case OP_POSSTARI: |
9374 | case OP_POSPLUSI: |
9375 | case OP_POSQUERYI: |
9376 | case OP_POSUPTOI: |
9377 | case OP_NOTSTAR: |
9378 | case OP_NOTMINSTAR: |
9379 | case OP_NOTPLUS: |
9380 | case OP_NOTMINPLUS: |
9381 | case OP_NOTQUERY: |
9382 | case OP_NOTMINQUERY: |
9383 | case OP_NOTUPTO: |
9384 | case OP_NOTMINUPTO: |
9385 | case OP_NOTEXACT: |
9386 | case OP_NOTPOSSTAR: |
9387 | case OP_NOTPOSPLUS: |
9388 | case OP_NOTPOSQUERY: |
9389 | case OP_NOTPOSUPTO: |
9390 | case OP_NOTSTARI: |
9391 | case OP_NOTMINSTARI: |
9392 | case OP_NOTPLUSI: |
9393 | case OP_NOTMINPLUSI: |
9394 | case OP_NOTQUERYI: |
9395 | case OP_NOTMINQUERYI: |
9396 | case OP_NOTUPTOI: |
9397 | case OP_NOTMINUPTOI: |
9398 | case OP_NOTEXACTI: |
9399 | case OP_NOTPOSSTARI: |
9400 | case OP_NOTPOSPLUSI: |
9401 | case OP_NOTPOSQUERYI: |
9402 | case OP_NOTPOSUPTOI: |
9403 | case OP_TYPESTAR: |
9404 | case OP_TYPEMINSTAR: |
9405 | case OP_TYPEPLUS: |
9406 | case OP_TYPEMINPLUS: |
9407 | case OP_TYPEQUERY: |
9408 | case OP_TYPEMINQUERY: |
9409 | case OP_TYPEUPTO: |
9410 | case OP_TYPEMINUPTO: |
9411 | case OP_TYPEEXACT: |
9412 | case OP_TYPEPOSSTAR: |
9413 | case OP_TYPEPOSPLUS: |
9414 | case OP_TYPEPOSQUERY: |
9415 | case OP_TYPEPOSUPTO: |
9416 | cc = compile_iterator_matchingpath(common, cc, parent); |
9417 | break; |
9418 | |
9419 | case OP_CLASS: |
9420 | case OP_NCLASS: |
9421 | if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE) |
9422 | cc = compile_iterator_matchingpath(common, cc, parent); |
9423 | else |
9424 | cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); |
9425 | break; |
9426 | |
9427 | #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
9428 | case OP_XCLASS: |
9429 | if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) |
9430 | cc = compile_iterator_matchingpath(common, cc, parent); |
9431 | else |
9432 | cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); |
9433 | break; |
9434 | #endif |
9435 | |
9436 | case OP_REF: |
9437 | case OP_REFI: |
9438 | if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) |
9439 | cc = compile_ref_iterator_matchingpath(common, cc, parent); |
9440 | else |
9441 | { |
9442 | compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); |
9443 | cc += 1 + IMM2_SIZE; |
9444 | } |
9445 | break; |
9446 | |
9447 | case OP_DNREF: |
9448 | case OP_DNREFI: |
9449 | if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) |
9450 | cc = compile_ref_iterator_matchingpath(common, cc, parent); |
9451 | else |
9452 | { |
9453 | compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
9454 | compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); |
9455 | cc += 1 + 2 * IMM2_SIZE; |
9456 | } |
9457 | break; |
9458 | |
9459 | case OP_RECURSE: |
9460 | cc = compile_recurse_matchingpath(common, cc, parent); |
9461 | break; |
9462 | |
9463 | case OP_CALLOUT: |
9464 | cc = compile_callout_matchingpath(common, cc, parent); |
9465 | break; |
9466 | |
9467 | case OP_ASSERT: |
9468 | case OP_ASSERT_NOT: |
9469 | case OP_ASSERTBACK: |
9470 | case OP_ASSERTBACK_NOT: |
9471 | PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); |
9472 | cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); |
9473 | break; |
9474 | |
9475 | case OP_BRAMINZERO: |
9476 | PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc); |
9477 | cc = bracketend(cc + 1); |
9478 | if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN) |
9479 | { |
9480 | allocate_stack(common, 1); |
9481 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
9482 | } |
9483 | else |
9484 | { |
9485 | allocate_stack(common, 2); |
9486 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
9487 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); |
9488 | } |
9489 | BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL(); |
9490 | count_match(common); |
9491 | break; |
9492 | |
9493 | case OP_ONCE: |
9494 | case OP_ONCE_NC: |
9495 | case OP_BRA: |
9496 | case OP_CBRA: |
9497 | case OP_COND: |
9498 | case OP_SBRA: |
9499 | case OP_SCBRA: |
9500 | case OP_SCOND: |
9501 | cc = compile_bracket_matchingpath(common, cc, parent); |
9502 | break; |
9503 | |
9504 | case OP_BRAZERO: |
9505 | if (cc[1] > OP_ASSERTBACK_NOT) |
9506 | cc = compile_bracket_matchingpath(common, cc, parent); |
9507 | else |
9508 | { |
9509 | PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); |
9510 | cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); |
9511 | } |
9512 | break; |
9513 | |
9514 | case OP_BRAPOS: |
9515 | case OP_CBRAPOS: |
9516 | case OP_SBRAPOS: |
9517 | case OP_SCBRAPOS: |
9518 | case OP_BRAPOSZERO: |
9519 | cc = compile_bracketpos_matchingpath(common, cc, parent); |
9520 | break; |
9521 | |
9522 | case OP_MARK: |
9523 | PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); |
9524 | SLJIT_ASSERT(common->mark_ptr != 0); |
9525 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); |
9526 | allocate_stack(common, common->has_skip_arg ? 5 : 1); |
9527 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
9528 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0); |
9529 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); |
9530 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); |
9531 | OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); |
9532 | if (common->has_skip_arg) |
9533 | { |
9534 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
9535 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); |
9536 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark); |
9537 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2)); |
9538 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); |
9539 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); |
9540 | } |
9541 | cc += 1 + 2 + cc[1]; |
9542 | break; |
9543 | |
9544 | case OP_PRUNE: |
9545 | case OP_PRUNE_ARG: |
9546 | case OP_SKIP: |
9547 | case OP_SKIP_ARG: |
9548 | case OP_THEN: |
9549 | case OP_THEN_ARG: |
9550 | case OP_COMMIT: |
9551 | cc = compile_control_verb_matchingpath(common, cc, parent); |
9552 | break; |
9553 | |
9554 | case OP_FAIL: |
9555 | case OP_ACCEPT: |
9556 | case OP_ASSERT_ACCEPT: |
9557 | cc = compile_fail_accept_matchingpath(common, cc, parent); |
9558 | break; |
9559 | |
9560 | case OP_CLOSE: |
9561 | cc = compile_close_matchingpath(common, cc); |
9562 | break; |
9563 | |
9564 | case OP_SKIPZERO: |
9565 | cc = bracketend(cc + 1); |
9566 | break; |
9567 | |
9568 | default: |
9569 | SLJIT_ASSERT_STOP(); |
9570 | return; |
9571 | } |
9572 | if (cc == NULL) |
9573 | return; |
9574 | } |
9575 | |
9576 | if (has_then_trap) |
9577 | { |
9578 | /* Head item on backtrack. */ |
9579 | PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); |
9580 | BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; |
9581 | BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap; |
9582 | common->then_trap = save_then_trap; |
9583 | } |
9584 | SLJIT_ASSERT(cc == ccend); |
9585 | } |
9586 | |
9587 | #undef PUSH_BACKTRACK |
9588 | #undef PUSH_BACKTRACK_NOVALUE |
9589 | #undef BACKTRACK_AS |
9590 | |
9591 | #define COMPILE_BACKTRACKINGPATH(current) \ |
9592 | do \ |
9593 | { \ |
9594 | compile_backtrackingpath(common, (current)); \ |
9595 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ |
9596 | return; \ |
9597 | } \ |
9598 | while (0) |
9599 | |
9600 | #define CURRENT_AS(type) ((type *)current) |
9601 | |
9602 | static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
9603 | { |
9604 | DEFINE_COMPILER; |
9605 | pcre_uchar *cc = current->cc; |
9606 | pcre_uchar opcode; |
9607 | pcre_uchar type; |
9608 | sljit_u32 max = 0, exact; |
9609 | struct sljit_label *label = NULL; |
9610 | struct sljit_jump *jump = NULL; |
9611 | jump_list *jumplist = NULL; |
9612 | pcre_uchar *end; |
9613 | int private_data_ptr = PRIVATE_DATA(cc); |
9614 | int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); |
9615 | int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; |
9616 | int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); |
9617 | |
9618 | cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); |
9619 | |
9620 | switch(opcode) |
9621 | { |
9622 | case OP_STAR: |
9623 | case OP_UPTO: |
9624 | if (type == OP_ANYNL || type == OP_EXTUNI) |
9625 | { |
9626 | SLJIT_ASSERT(private_data_ptr == 0); |
9627 | set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); |
9628 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9629 | free_stack(common, 1); |
9630 | CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9631 | } |
9632 | else |
9633 | { |
9634 | if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled) |
9635 | { |
9636 | OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9637 | OP1(SLJIT_MOV, TMP2, 0, base, offset1); |
9638 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9639 | |
9640 | jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); |
9641 | label = LABEL(); |
9642 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
9643 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9644 | if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0) |
9645 | OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit); |
9646 | CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9647 | skip_char_back(common); |
9648 | CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label); |
9649 | } |
9650 | else |
9651 | { |
9652 | OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9653 | jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); |
9654 | skip_char_back(common); |
9655 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9656 | JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9657 | } |
9658 | JUMPHERE(jump); |
9659 | if (private_data_ptr == 0) |
9660 | free_stack(common, 2); |
9661 | } |
9662 | break; |
9663 | |
9664 | case OP_MINSTAR: |
9665 | OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9666 | compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); |
9667 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9668 | JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9669 | set_jumps(jumplist, LABEL()); |
9670 | if (private_data_ptr == 0) |
9671 | free_stack(common, 1); |
9672 | break; |
9673 | |
9674 | case OP_MINUPTO: |
9675 | OP1(SLJIT_MOV, TMP1, 0, base, offset1); |
9676 | OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9677 | OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
9678 | add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO)); |
9679 | |
9680 | OP1(SLJIT_MOV, base, offset1, TMP1, 0); |
9681 | compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); |
9682 | OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9683 | JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9684 | |
9685 | set_jumps(jumplist, LABEL()); |
9686 | if (private_data_ptr == 0) |
9687 | free_stack(common, 2); |
9688 | break; |
9689 | |
9690 | case OP_QUERY: |
9691 | OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9692 | OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); |
9693 | CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9694 | jump = JUMP(SLJIT_JUMP); |
9695 | set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); |
9696 | OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9697 | OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); |
9698 | JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9699 | JUMPHERE(jump); |
9700 | if (private_data_ptr == 0) |
9701 | free_stack(common, 1); |
9702 | break; |
9703 | |
9704 | case OP_MINQUERY: |
9705 | OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9706 | OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); |
9707 | jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
9708 | compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); |
9709 | JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9710 | set_jumps(jumplist, LABEL()); |
9711 | JUMPHERE(jump); |
9712 | if (private_data_ptr == 0) |
9713 | free_stack(common, 1); |
9714 | break; |
9715 | |
9716 | case OP_EXACT: |
9717 | case OP_POSSTAR: |
9718 | case OP_POSQUERY: |
9719 | case OP_POSUPTO: |
9720 | break; |
9721 | |
9722 | default: |
9723 | SLJIT_ASSERT_STOP(); |
9724 | break; |
9725 | } |
9726 | |
9727 | set_jumps(current->topbacktracks, LABEL()); |
9728 | } |
9729 | |
9730 | static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
9731 | { |
9732 | DEFINE_COMPILER; |
9733 | pcre_uchar *cc = current->cc; |
9734 | BOOL ref = (*cc == OP_REF || *cc == OP_REFI); |
9735 | pcre_uchar type; |
9736 | |
9737 | type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; |
9738 | |
9739 | if ((type & 0x1) == 0) |
9740 | { |
9741 | /* Maximize case. */ |
9742 | set_jumps(current->topbacktracks, LABEL()); |
9743 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9744 | free_stack(common, 1); |
9745 | CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); |
9746 | return; |
9747 | } |
9748 | |
9749 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9750 | CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); |
9751 | set_jumps(current->topbacktracks, LABEL()); |
9752 | free_stack(common, ref ? 2 : 3); |
9753 | } |
9754 | |
9755 | static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
9756 | { |
9757 | DEFINE_COMPILER; |
9758 | |
9759 | if (CURRENT_AS(recurse_backtrack)->inlined_pattern) |
9760 | compile_backtrackingpath(common, current->top); |
9761 | set_jumps(current->topbacktracks, LABEL()); |
9762 | if (CURRENT_AS(recurse_backtrack)->inlined_pattern) |
9763 | return; |
9764 | |
9765 | if (common->has_set_som && common->mark_ptr != 0) |
9766 | { |
9767 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9768 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
9769 | free_stack(common, 2); |
9770 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0); |
9771 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); |
9772 | } |
9773 | else if (common->has_set_som || common->mark_ptr != 0) |
9774 | { |
9775 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9776 | free_stack(common, 1); |
9777 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0); |
9778 | } |
9779 | } |
9780 | |
9781 | static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
9782 | { |
9783 | DEFINE_COMPILER; |
9784 | pcre_uchar *cc = current->cc; |
9785 | pcre_uchar bra = OP_BRA; |
9786 | struct sljit_jump *brajump = NULL; |
9787 | |
9788 | SLJIT_ASSERT(*cc != OP_BRAMINZERO); |
9789 | if (*cc == OP_BRAZERO) |
9790 | { |
9791 | bra = *cc; |
9792 | cc++; |
9793 | } |
9794 | |
9795 | if (bra == OP_BRAZERO) |
9796 | { |
9797 | SLJIT_ASSERT(current->topbacktracks == NULL); |
9798 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9799 | } |
9800 | |
9801 | if (CURRENT_AS(assert_backtrack)->framesize < 0) |
9802 | { |
9803 | set_jumps(current->topbacktracks, LABEL()); |
9804 | |
9805 | if (bra == OP_BRAZERO) |
9806 | { |
9807 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
9808 | CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); |
9809 | free_stack(common, 1); |
9810 | } |
9811 | return; |
9812 | } |
9813 | |
9814 | if (bra == OP_BRAZERO) |
9815 | { |
9816 | if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT) |
9817 | { |
9818 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
9819 | CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); |
9820 | free_stack(common, 1); |
9821 | return; |
9822 | } |
9823 | free_stack(common, 1); |
9824 | brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
9825 | } |
9826 | |
9827 | if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) |
9828 | { |
9829 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr); |
9830 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
9831 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw)); |
9832 | |
9833 | set_jumps(current->topbacktracks, LABEL()); |
9834 | } |
9835 | else |
9836 | set_jumps(current->topbacktracks, LABEL()); |
9837 | |
9838 | if (bra == OP_BRAZERO) |
9839 | { |
9840 | /* We know there is enough place on the stack. */ |
9841 | OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
9842 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
9843 | JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath); |
9844 | JUMPHERE(brajump); |
9845 | } |
9846 | } |
9847 | |
9848 | static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
9849 | { |
9850 | DEFINE_COMPILER; |
9851 | int opcode, stacksize, alt_count, alt_max; |
9852 | int offset = 0; |
9853 | int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr; |
9854 | int repeat_ptr = 0, repeat_type = 0, repeat_count = 0; |
9855 | pcre_uchar *cc = current->cc; |
9856 | pcre_uchar *ccbegin; |
9857 | pcre_uchar *ccprev; |
9858 | pcre_uchar bra = OP_BRA; |
9859 | pcre_uchar ket; |
9860 | assert_backtrack *assert; |
9861 | sljit_uw *next_update_addr = NULL; |
9862 | BOOL has_alternatives; |
9863 | BOOL needs_control_head = FALSE; |
9864 | struct sljit_jump *brazero = NULL; |
9865 | struct sljit_jump *alt1 = NULL; |
9866 | struct sljit_jump *alt2 = NULL; |
9867 | struct sljit_jump *once = NULL; |
9868 | struct sljit_jump *cond = NULL; |
9869 | struct sljit_label *rmin_label = NULL; |
9870 | struct sljit_label *exact_label = NULL; |
9871 | |
9872 | if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) |
9873 | { |
9874 | bra = *cc; |
9875 | cc++; |
9876 | } |
9877 | |
9878 | opcode = *cc; |
9879 | ccbegin = bracketend(cc) - 1 - LINK_SIZE; |
9880 | ket = *ccbegin; |
9881 | if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0) |
9882 | { |
9883 | repeat_ptr = PRIVATE_DATA(ccbegin); |
9884 | repeat_type = PRIVATE_DATA(ccbegin + 2); |
9885 | repeat_count = PRIVATE_DATA(ccbegin + 3); |
9886 | SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0); |
9887 | if (repeat_type == OP_UPTO) |
9888 | ket = OP_KETRMAX; |
9889 | if (repeat_type == OP_MINUPTO) |
9890 | ket = OP_KETRMIN; |
9891 | } |
9892 | ccbegin = cc; |
9893 | cc += GET(cc, 1); |
9894 | has_alternatives = *cc == OP_ALT; |
9895 | if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) |
9896 | has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; |
9897 | if (opcode == OP_CBRA || opcode == OP_SCBRA) |
9898 | offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; |
9899 | if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) |
9900 | opcode = OP_SCOND; |
9901 | if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) |
9902 | opcode = OP_ONCE; |
9903 | |
9904 | alt_max = has_alternatives ? no_alternatives(ccbegin) : 0; |
9905 | |
9906 | /* Decoding the needs_control_head in framesize. */ |
9907 | if (opcode == OP_ONCE) |
9908 | { |
9909 | needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0; |
9910 | CURRENT_AS(bracket_backtrack)->u.framesize >>= 1; |
9911 | } |
9912 | |
9913 | if (ket != OP_KET && repeat_type != 0) |
9914 | { |
9915 | /* TMP1 is used in OP_KETRMIN below. */ |
9916 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9917 | free_stack(common, 1); |
9918 | if (repeat_type == OP_UPTO) |
9919 | OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1); |
9920 | else |
9921 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); |
9922 | } |
9923 | |
9924 | if (ket == OP_KETRMAX) |
9925 | { |
9926 | if (bra == OP_BRAZERO) |
9927 | { |
9928 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9929 | free_stack(common, 1); |
9930 | brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0); |
9931 | } |
9932 | } |
9933 | else if (ket == OP_KETRMIN) |
9934 | { |
9935 | if (bra != OP_BRAMINZERO) |
9936 | { |
9937 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9938 | if (repeat_type != 0) |
9939 | { |
9940 | /* TMP1 was set a few lines above. */ |
9941 | CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); |
9942 | /* Drop STR_PTR for non-greedy plus quantifier. */ |
9943 | if (opcode != OP_ONCE) |
9944 | free_stack(common, 1); |
9945 | } |
9946 | else if (opcode >= OP_SBRA || opcode == OP_ONCE) |
9947 | { |
9948 | /* Checking zero-length iteration. */ |
9949 | if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0) |
9950 | CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); |
9951 | else |
9952 | { |
9953 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
9954 | CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath); |
9955 | } |
9956 | /* Drop STR_PTR for non-greedy plus quantifier. */ |
9957 | if (opcode != OP_ONCE) |
9958 | free_stack(common, 1); |
9959 | } |
9960 | else |
9961 | JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); |
9962 | } |
9963 | rmin_label = LABEL(); |
9964 | if (repeat_type != 0) |
9965 | OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); |
9966 | } |
9967 | else if (bra == OP_BRAZERO) |
9968 | { |
9969 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9970 | free_stack(common, 1); |
9971 | brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); |
9972 | } |
9973 | else if (repeat_type == OP_EXACT) |
9974 | { |
9975 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); |
9976 | exact_label = LABEL(); |
9977 | } |
9978 | |
9979 | if (offset != 0) |
9980 | { |
9981 | if (common->capture_last_ptr != 0) |
9982 | { |
9983 | SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0); |
9984 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9985 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
9986 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); |
9987 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); |
9988 | free_stack(common, 3); |
9989 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0); |
9990 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); |
9991 | } |
9992 | else if (common->optimized_cbracket[offset >> 1] == 0) |
9993 | { |
9994 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9995 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
9996 | free_stack(common, 2); |
9997 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
9998 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); |
9999 | } |
10000 | } |
10001 | |
10002 | if (SLJIT_UNLIKELY(opcode == OP_ONCE)) |
10003 | { |
10004 | if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) |
10005 | { |
10006 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
10007 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10008 | } |
10009 | once = JUMP(SLJIT_JUMP); |
10010 | } |
10011 | else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) |
10012 | { |
10013 | if (has_alternatives) |
10014 | { |
10015 | /* Always exactly one alternative. */ |
10016 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10017 | free_stack(common, 1); |
10018 | |
10019 | alt_max = 2; |
10020 | alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); |
10021 | } |
10022 | } |
10023 | else if (has_alternatives) |
10024 | { |
10025 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10026 | free_stack(common, 1); |
10027 | |
10028 | if (alt_max > 4) |
10029 | { |
10030 | /* Table jump if alt_max is greater than 4. */ |
10031 | next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw)); |
10032 | if (SLJIT_UNLIKELY(next_update_addr == NULL)) |
10033 | return; |
10034 | sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr); |
10035 | add_label_addr(common, next_update_addr++); |
10036 | } |
10037 | else |
10038 | { |
10039 | if (alt_max == 4) |
10040 | alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); |
10041 | alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); |
10042 | } |
10043 | } |
10044 | |
10045 | COMPILE_BACKTRACKINGPATH(current->top); |
10046 | if (current->topbacktracks) |
10047 | set_jumps(current->topbacktracks, LABEL()); |
10048 | |
10049 | if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) |
10050 | { |
10051 | /* Conditional block always has at most one alternative. */ |
10052 | if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) |
10053 | { |
10054 | SLJIT_ASSERT(has_alternatives); |
10055 | assert = CURRENT_AS(bracket_backtrack)->u.assert; |
10056 | if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) |
10057 | { |
10058 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); |
10059 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10060 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); |
10061 | } |
10062 | cond = JUMP(SLJIT_JUMP); |
10063 | set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); |
10064 | } |
10065 | else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) |
10066 | { |
10067 | SLJIT_ASSERT(has_alternatives); |
10068 | cond = JUMP(SLJIT_JUMP); |
10069 | set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); |
10070 | } |
10071 | else |
10072 | SLJIT_ASSERT(!has_alternatives); |
10073 | } |
10074 | |
10075 | if (has_alternatives) |
10076 | { |
10077 | alt_count = sizeof(sljit_uw); |
10078 | do |
10079 | { |
10080 | current->top = NULL; |
10081 | current->topbacktracks = NULL; |
10082 | current->nextbacktracks = NULL; |
10083 | /* Conditional blocks always have an additional alternative, even if it is empty. */ |
10084 | if (*cc == OP_ALT) |
10085 | { |
10086 | ccprev = cc + 1 + LINK_SIZE; |
10087 | cc += GET(cc, 1); |
10088 | if (opcode != OP_COND && opcode != OP_SCOND) |
10089 | { |
10090 | if (opcode != OP_ONCE) |
10091 | { |
10092 | if (private_data_ptr != 0) |
10093 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
10094 | else |
10095 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10096 | } |
10097 | else |
10098 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); |
10099 | } |
10100 | compile_matchingpath(common, ccprev, cc, current); |
10101 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
10102 | return; |
10103 | } |
10104 | |
10105 | /* Instructions after the current alternative is successfully matched. */ |
10106 | /* There is a similar code in compile_bracket_matchingpath. */ |
10107 | if (opcode == OP_ONCE) |
10108 | match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); |
10109 | |
10110 | stacksize = 0; |
10111 | if (repeat_type == OP_MINUPTO) |
10112 | { |
10113 | /* We need to preserve the counter. TMP2 will be used below. */ |
10114 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); |
10115 | stacksize++; |
10116 | } |
10117 | if (ket != OP_KET || bra != OP_BRA) |
10118 | stacksize++; |
10119 | if (offset != 0) |
10120 | { |
10121 | if (common->capture_last_ptr != 0) |
10122 | stacksize++; |
10123 | if (common->optimized_cbracket[offset >> 1] == 0) |
10124 | stacksize += 2; |
10125 | } |
10126 | if (opcode != OP_ONCE) |
10127 | stacksize++; |
10128 | |
10129 | if (stacksize > 0) |
10130 | allocate_stack(common, stacksize); |
10131 | |
10132 | stacksize = 0; |
10133 | if (repeat_type == OP_MINUPTO) |
10134 | { |
10135 | /* TMP2 was set above. */ |
10136 | OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); |
10137 | stacksize++; |
10138 | } |
10139 | |
10140 | if (ket != OP_KET || bra != OP_BRA) |
10141 | { |
10142 | if (ket != OP_KET) |
10143 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
10144 | else |
10145 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); |
10146 | stacksize++; |
10147 | } |
10148 | |
10149 | if (offset != 0) |
10150 | stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); |
10151 | |
10152 | if (opcode != OP_ONCE) |
10153 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); |
10154 | |
10155 | if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) |
10156 | { |
10157 | /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */ |
10158 | SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); |
10159 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
10160 | } |
10161 | |
10162 | JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath); |
10163 | |
10164 | if (opcode != OP_ONCE) |
10165 | { |
10166 | if (alt_max > 4) |
10167 | add_label_addr(common, next_update_addr++); |
10168 | else |
10169 | { |
10170 | if (alt_count != 2 * sizeof(sljit_uw)) |
10171 | { |
10172 | JUMPHERE(alt1); |
10173 | if (alt_max == 3 && alt_count == sizeof(sljit_uw)) |
10174 | alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); |
10175 | } |
10176 | else |
10177 | { |
10178 | JUMPHERE(alt2); |
10179 | if (alt_max == 4) |
10180 | alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw)); |
10181 | } |
10182 | } |
10183 | alt_count += sizeof(sljit_uw); |
10184 | } |
10185 | |
10186 | COMPILE_BACKTRACKINGPATH(current->top); |
10187 | if (current->topbacktracks) |
10188 | set_jumps(current->topbacktracks, LABEL()); |
10189 | SLJIT_ASSERT(!current->nextbacktracks); |
10190 | } |
10191 | while (*cc == OP_ALT); |
10192 | |
10193 | if (cond != NULL) |
10194 | { |
10195 | SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); |
10196 | assert = CURRENT_AS(bracket_backtrack)->u.assert; |
10197 | if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) |
10198 | { |
10199 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); |
10200 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10201 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); |
10202 | } |
10203 | JUMPHERE(cond); |
10204 | } |
10205 | |
10206 | /* Free the STR_PTR. */ |
10207 | if (private_data_ptr == 0) |
10208 | free_stack(common, 1); |
10209 | } |
10210 | |
10211 | if (offset != 0) |
10212 | { |
10213 | /* Using both tmp register is better for instruction scheduling. */ |
10214 | if (common->optimized_cbracket[offset >> 1] != 0) |
10215 | { |
10216 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10217 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
10218 | free_stack(common, 2); |
10219 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
10220 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); |
10221 | } |
10222 | else |
10223 | { |
10224 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10225 | free_stack(common, 1); |
10226 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); |
10227 | } |
10228 | } |
10229 | else if (opcode == OP_SBRA || opcode == OP_SCOND) |
10230 | { |
10231 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10232 | free_stack(common, 1); |
10233 | } |
10234 | else if (opcode == OP_ONCE) |
10235 | { |
10236 | cc = ccbegin + GET(ccbegin, 1); |
10237 | stacksize = needs_control_head ? 1 : 0; |
10238 | |
10239 | if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) |
10240 | { |
10241 | /* Reset head and drop saved frame. */ |
10242 | stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1); |
10243 | } |
10244 | else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN)) |
10245 | { |
10246 | /* The STR_PTR must be released. */ |
10247 | stacksize++; |
10248 | } |
10249 | |
10250 | if (stacksize > 0) |
10251 | free_stack(common, stacksize); |
10252 | |
10253 | JUMPHERE(once); |
10254 | /* Restore previous private_data_ptr */ |
10255 | if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) |
10256 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw)); |
10257 | else if (ket == OP_KETRMIN) |
10258 | { |
10259 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
10260 | /* See the comment below. */ |
10261 | free_stack(common, 2); |
10262 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); |
10263 | } |
10264 | } |
10265 | |
10266 | if (repeat_type == OP_EXACT) |
10267 | { |
10268 | OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); |
10269 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); |
10270 | CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label); |
10271 | } |
10272 | else if (ket == OP_KETRMAX) |
10273 | { |
10274 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10275 | if (bra != OP_BRAZERO) |
10276 | free_stack(common, 1); |
10277 | |
10278 | CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); |
10279 | if (bra == OP_BRAZERO) |
10280 | { |
10281 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
10282 | JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); |
10283 | JUMPHERE(brazero); |
10284 | free_stack(common, 1); |
10285 | } |
10286 | } |
10287 | else if (ket == OP_KETRMIN) |
10288 | { |
10289 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10290 | |
10291 | /* OP_ONCE removes everything in case of a backtrack, so we don't |
10292 | need to explicitly release the STR_PTR. The extra release would |
10293 | affect badly the free_stack(2) above. */ |
10294 | if (opcode != OP_ONCE) |
10295 | free_stack(common, 1); |
10296 | CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label); |
10297 | if (opcode == OP_ONCE) |
10298 | free_stack(common, bra == OP_BRAMINZERO ? 2 : 1); |
10299 | else if (bra == OP_BRAMINZERO) |
10300 | free_stack(common, 1); |
10301 | } |
10302 | else if (bra == OP_BRAZERO) |
10303 | { |
10304 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10305 | JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); |
10306 | JUMPHERE(brazero); |
10307 | } |
10308 | } |
10309 | |
10310 | static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10311 | { |
10312 | DEFINE_COMPILER; |
10313 | int offset; |
10314 | struct sljit_jump *jump; |
10315 | |
10316 | if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) |
10317 | { |
10318 | if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS) |
10319 | { |
10320 | offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1; |
10321 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10322 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
10323 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
10324 | if (common->capture_last_ptr != 0) |
10325 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); |
10326 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); |
10327 | if (common->capture_last_ptr != 0) |
10328 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); |
10329 | } |
10330 | set_jumps(current->topbacktracks, LABEL()); |
10331 | free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); |
10332 | return; |
10333 | } |
10334 | |
10335 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr); |
10336 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10337 | |
10338 | if (current->topbacktracks) |
10339 | { |
10340 | jump = JUMP(SLJIT_JUMP); |
10341 | set_jumps(current->topbacktracks, LABEL()); |
10342 | /* Drop the stack frame. */ |
10343 | free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); |
10344 | JUMPHERE(jump); |
10345 | } |
10346 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw)); |
10347 | } |
10348 | |
10349 | static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10350 | { |
10351 | assert_backtrack backtrack; |
10352 | |
10353 | current->top = NULL; |
10354 | current->topbacktracks = NULL; |
10355 | current->nextbacktracks = NULL; |
10356 | if (current->cc[1] > OP_ASSERTBACK_NOT) |
10357 | { |
10358 | /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */ |
10359 | compile_bracket_matchingpath(common, current->cc, current); |
10360 | compile_bracket_backtrackingpath(common, current->top); |
10361 | } |
10362 | else |
10363 | { |
10364 | memset(&backtrack, 0, sizeof(backtrack)); |
10365 | backtrack.common.cc = current->cc; |
10366 | backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath; |
10367 | /* Manual call of compile_assert_matchingpath. */ |
10368 | compile_assert_matchingpath(common, current->cc, &backtrack, FALSE); |
10369 | } |
10370 | SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks); |
10371 | } |
10372 | |
10373 | static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10374 | { |
10375 | DEFINE_COMPILER; |
10376 | pcre_uchar opcode = *current->cc; |
10377 | struct sljit_label *loop; |
10378 | struct sljit_jump *jump; |
10379 | |
10380 | if (opcode == OP_THEN || opcode == OP_THEN_ARG) |
10381 | { |
10382 | if (common->then_trap != NULL) |
10383 | { |
10384 | SLJIT_ASSERT(common->control_head_ptr != 0); |
10385 | |
10386 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
10387 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap); |
10388 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start); |
10389 | jump = JUMP(SLJIT_JUMP); |
10390 | |
10391 | loop = LABEL(); |
10392 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw)); |
10393 | JUMPHERE(jump); |
10394 | CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop); |
10395 | CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop); |
10396 | add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP)); |
10397 | return; |
10398 | } |
10399 | else if (common->positive_assert) |
10400 | { |
10401 | add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP)); |
10402 | return; |
10403 | } |
10404 | } |
10405 | |
10406 | if (common->local_exit) |
10407 | { |
10408 | if (common->quit_label == NULL) |
10409 | add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); |
10410 | else |
10411 | JUMPTO(SLJIT_JUMP, common->quit_label); |
10412 | return; |
10413 | } |
10414 | |
10415 | if (opcode == OP_SKIP_ARG) |
10416 | { |
10417 | SLJIT_ASSERT(common->control_head_ptr != 0); |
10418 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
10419 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); |
10420 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); |
10421 | sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); |
10422 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
10423 | |
10424 | OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); |
10425 | add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1)); |
10426 | return; |
10427 | } |
10428 | |
10429 | if (opcode == OP_SKIP) |
10430 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10431 | else |
10432 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0); |
10433 | add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); |
10434 | } |
10435 | |
10436 | static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10437 | { |
10438 | DEFINE_COMPILER; |
10439 | struct sljit_jump *jump; |
10440 | int size; |
10441 | |
10442 | if (CURRENT_AS(then_trap_backtrack)->then_trap) |
10443 | { |
10444 | common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap; |
10445 | return; |
10446 | } |
10447 | |
10448 | size = CURRENT_AS(then_trap_backtrack)->framesize; |
10449 | size = 3 + (size < 0 ? 0 : size); |
10450 | |
10451 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3)); |
10452 | free_stack(common, size); |
10453 | jump = JUMP(SLJIT_JUMP); |
10454 | |
10455 | set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL()); |
10456 | /* STACK_TOP is set by THEN. */ |
10457 | if (CURRENT_AS(then_trap_backtrack)->framesize >= 0) |
10458 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10459 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10460 | free_stack(common, 3); |
10461 | |
10462 | JUMPHERE(jump); |
10463 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); |
10464 | } |
10465 | |
10466 | static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10467 | { |
10468 | DEFINE_COMPILER; |
10469 | then_trap_backtrack *save_then_trap = common->then_trap; |
10470 | |
10471 | while (current) |
10472 | { |
10473 | if (current->nextbacktracks != NULL) |
10474 | set_jumps(current->nextbacktracks, LABEL()); |
10475 | switch(*current->cc) |
10476 | { |
10477 | case OP_SET_SOM: |
10478 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10479 | free_stack(common, 1); |
10480 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0); |
10481 | break; |
10482 | |
10483 | case OP_STAR: |
10484 | case OP_MINSTAR: |
10485 | case OP_PLUS: |
10486 | case OP_MINPLUS: |
10487 | case OP_QUERY: |
10488 | case OP_MINQUERY: |
10489 | case OP_UPTO: |
10490 | case OP_MINUPTO: |
10491 | case OP_EXACT: |
10492 | case OP_POSSTAR: |
10493 | case OP_POSPLUS: |
10494 | case OP_POSQUERY: |
10495 | case OP_POSUPTO: |
10496 | case OP_STARI: |
10497 | case OP_MINSTARI: |
10498 | case OP_PLUSI: |
10499 | case OP_MINPLUSI: |
10500 | case OP_QUERYI: |
10501 | case OP_MINQUERYI: |
10502 | case OP_UPTOI: |
10503 | case OP_MINUPTOI: |
10504 | case OP_EXACTI: |
10505 | case OP_POSSTARI: |
10506 | case OP_POSPLUSI: |
10507 | case OP_POSQUERYI: |
10508 | case OP_POSUPTOI: |
10509 | case OP_NOTSTAR: |
10510 | case OP_NOTMINSTAR: |
10511 | case OP_NOTPLUS: |
10512 | case OP_NOTMINPLUS: |
10513 | case OP_NOTQUERY: |
10514 | case OP_NOTMINQUERY: |
10515 | case OP_NOTUPTO: |
10516 | case OP_NOTMINUPTO: |
10517 | case OP_NOTEXACT: |
10518 | case OP_NOTPOSSTAR: |
10519 | case OP_NOTPOSPLUS: |
10520 | case OP_NOTPOSQUERY: |
10521 | case OP_NOTPOSUPTO: |
10522 | case OP_NOTSTARI: |
10523 | case OP_NOTMINSTARI: |
10524 | case OP_NOTPLUSI: |
10525 | case OP_NOTMINPLUSI: |
10526 | case OP_NOTQUERYI: |
10527 | case OP_NOTMINQUERYI: |
10528 | case OP_NOTUPTOI: |
10529 | case OP_NOTMINUPTOI: |
10530 | case OP_NOTEXACTI: |
10531 | case OP_NOTPOSSTARI: |
10532 | case OP_NOTPOSPLUSI: |
10533 | case OP_NOTPOSQUERYI: |
10534 | case OP_NOTPOSUPTOI: |
10535 | case OP_TYPESTAR: |
10536 | case OP_TYPEMINSTAR: |
10537 | case OP_TYPEPLUS: |
10538 | case OP_TYPEMINPLUS: |
10539 | case OP_TYPEQUERY: |
10540 | case OP_TYPEMINQUERY: |
10541 | case OP_TYPEUPTO: |
10542 | case OP_TYPEMINUPTO: |
10543 | case OP_TYPEEXACT: |
10544 | case OP_TYPEPOSSTAR: |
10545 | case OP_TYPEPOSPLUS: |
10546 | case OP_TYPEPOSQUERY: |
10547 | case OP_TYPEPOSUPTO: |
10548 | case OP_CLASS: |
10549 | case OP_NCLASS: |
10550 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
10551 | case OP_XCLASS: |
10552 | #endif |
10553 | compile_iterator_backtrackingpath(common, current); |
10554 | break; |
10555 | |
10556 | case OP_REF: |
10557 | case OP_REFI: |
10558 | case OP_DNREF: |
10559 | case OP_DNREFI: |
10560 | compile_ref_iterator_backtrackingpath(common, current); |
10561 | break; |
10562 | |
10563 | case OP_RECURSE: |
10564 | compile_recurse_backtrackingpath(common, current); |
10565 | break; |
10566 | |
10567 | case OP_ASSERT: |
10568 | case OP_ASSERT_NOT: |
10569 | case OP_ASSERTBACK: |
10570 | case OP_ASSERTBACK_NOT: |
10571 | compile_assert_backtrackingpath(common, current); |
10572 | break; |
10573 | |
10574 | case OP_ONCE: |
10575 | case OP_ONCE_NC: |
10576 | case OP_BRA: |
10577 | case OP_CBRA: |
10578 | case OP_COND: |
10579 | case OP_SBRA: |
10580 | case OP_SCBRA: |
10581 | case OP_SCOND: |
10582 | compile_bracket_backtrackingpath(common, current); |
10583 | break; |
10584 | |
10585 | case OP_BRAZERO: |
10586 | if (current->cc[1] > OP_ASSERTBACK_NOT) |
10587 | compile_bracket_backtrackingpath(common, current); |
10588 | else |
10589 | compile_assert_backtrackingpath(common, current); |
10590 | break; |
10591 | |
10592 | case OP_BRAPOS: |
10593 | case OP_CBRAPOS: |
10594 | case OP_SBRAPOS: |
10595 | case OP_SCBRAPOS: |
10596 | case OP_BRAPOSZERO: |
10597 | compile_bracketpos_backtrackingpath(common, current); |
10598 | break; |
10599 | |
10600 | case OP_BRAMINZERO: |
10601 | compile_braminzero_backtrackingpath(common, current); |
10602 | break; |
10603 | |
10604 | case OP_MARK: |
10605 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0)); |
10606 | if (common->has_skip_arg) |
10607 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10608 | free_stack(common, common->has_skip_arg ? 5 : 1); |
10609 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); |
10610 | if (common->has_skip_arg) |
10611 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); |
10612 | break; |
10613 | |
10614 | case OP_THEN: |
10615 | case OP_THEN_ARG: |
10616 | case OP_PRUNE: |
10617 | case OP_PRUNE_ARG: |
10618 | case OP_SKIP: |
10619 | case OP_SKIP_ARG: |
10620 | compile_control_verb_backtrackingpath(common, current); |
10621 | break; |
10622 | |
10623 | case OP_COMMIT: |
10624 | if (!common->local_exit) |
10625 | OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); |
10626 | if (common->quit_label == NULL) |
10627 | add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); |
10628 | else |
10629 | JUMPTO(SLJIT_JUMP, common->quit_label); |
10630 | break; |
10631 | |
10632 | case OP_CALLOUT: |
10633 | case OP_FAIL: |
10634 | case OP_ACCEPT: |
10635 | case OP_ASSERT_ACCEPT: |
10636 | set_jumps(current->topbacktracks, LABEL()); |
10637 | break; |
10638 | |
10639 | case OP_THEN_TRAP: |
10640 | /* A virtual opcode for then traps. */ |
10641 | compile_then_trap_backtrackingpath(common, current); |
10642 | break; |
10643 | |
10644 | default: |
10645 | SLJIT_ASSERT_STOP(); |
10646 | break; |
10647 | } |
10648 | current = current->prev; |
10649 | } |
10650 | common->then_trap = save_then_trap; |
10651 | } |
10652 | |
10653 | static SLJIT_INLINE void compile_recurse(compiler_common *common) |
10654 | { |
10655 | DEFINE_COMPILER; |
10656 | pcre_uchar *cc = common->start + common->currententry->start; |
10657 | pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); |
10658 | pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE); |
10659 | BOOL needs_control_head; |
10660 | int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head); |
10661 | int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head); |
10662 | int alternativesize; |
10663 | BOOL needs_frame; |
10664 | backtrack_common altbacktrack; |
10665 | struct sljit_jump *jump; |
10666 | |
10667 | /* Recurse captures then. */ |
10668 | common->then_trap = NULL; |
10669 | |
10670 | SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS); |
10671 | needs_frame = framesize >= 0; |
10672 | if (!needs_frame) |
10673 | framesize = 0; |
10674 | alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0; |
10675 | |
10676 | SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0); |
10677 | common->currententry->entry = LABEL(); |
10678 | set_jumps(common->currententry->calls, common->currententry->entry); |
10679 | |
10680 | sljit_emit_fast_enter(compiler, TMP2, 0); |
10681 | count_match(common); |
10682 | allocate_stack(common, private_data_size + framesize + alternativesize); |
10683 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0); |
10684 | copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head); |
10685 | if (needs_control_head) |
10686 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); |
10687 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0); |
10688 | if (needs_frame) |
10689 | init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE); |
10690 | |
10691 | if (alternativesize > 0) |
10692 | OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
10693 | |
10694 | memset(&altbacktrack, 0, sizeof(backtrack_common)); |
10695 | common->quit_label = NULL; |
10696 | common->accept_label = NULL; |
10697 | common->quit = NULL; |
10698 | common->accept = NULL; |
10699 | altbacktrack.cc = ccbegin; |
10700 | cc += GET(cc, 1); |
10701 | while (1) |
10702 | { |
10703 | altbacktrack.top = NULL; |
10704 | altbacktrack.topbacktracks = NULL; |
10705 | |
10706 | if (altbacktrack.cc != ccbegin) |
10707 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10708 | |
10709 | compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack); |
10710 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
10711 | return; |
10712 | |
10713 | add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); |
10714 | |
10715 | compile_backtrackingpath(common, altbacktrack.top); |
10716 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
10717 | return; |
10718 | set_jumps(altbacktrack.topbacktracks, LABEL()); |
10719 | |
10720 | if (*cc != OP_ALT) |
10721 | break; |
10722 | |
10723 | altbacktrack.cc = cc + 1 + LINK_SIZE; |
10724 | cc += GET(cc, 1); |
10725 | } |
10726 | |
10727 | /* None of them matched. */ |
10728 | OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); |
10729 | jump = JUMP(SLJIT_JUMP); |
10730 | |
10731 | if (common->quit != NULL) |
10732 | { |
10733 | set_jumps(common->quit, LABEL()); |
10734 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); |
10735 | if (needs_frame) |
10736 | { |
10737 | OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); |
10738 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10739 | OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); |
10740 | } |
10741 | OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); |
10742 | common->quit = NULL; |
10743 | add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); |
10744 | } |
10745 | |
10746 | set_jumps(common->accept, LABEL()); |
10747 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); |
10748 | if (needs_frame) |
10749 | { |
10750 | OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); |
10751 | add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10752 | OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); |
10753 | } |
10754 | OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); |
10755 | |
10756 | JUMPHERE(jump); |
10757 | if (common->quit != NULL) |
10758 | set_jumps(common->quit, LABEL()); |
10759 | copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head); |
10760 | free_stack(common, private_data_size + framesize + alternativesize); |
10761 | if (needs_control_head) |
10762 | { |
10763 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw)); |
10764 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); |
10765 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0); |
10766 | OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); |
10767 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); |
10768 | } |
10769 | else |
10770 | { |
10771 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); |
10772 | OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); |
10773 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0); |
10774 | } |
10775 | sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0); |
10776 | } |
10777 | |
10778 | #undef COMPILE_BACKTRACKINGPATH |
10779 | #undef CURRENT_AS |
10780 | |
10781 | void |
10782 | PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode) |
10783 | { |
10784 | struct sljit_compiler *compiler; |
10785 | backtrack_common rootbacktrack; |
10786 | compiler_common common_data; |
10787 | compiler_common *common = &common_data; |
10788 | const sljit_u8 *tables = re->tables; |
10789 | pcre_study_data *study; |
10790 | int private_data_size; |
10791 | pcre_uchar *ccend; |
10792 | executable_functions *functions; |
10793 | void *executable_func; |
10794 | sljit_uw executable_size; |
10795 | sljit_uw total_length; |
10796 | label_addr_list *label_addr; |
10797 | struct sljit_label *mainloop_label = NULL; |
10798 | struct sljit_label *continue_match_label; |
10799 | struct sljit_label *empty_match_found_label = NULL; |
10800 | struct sljit_label *empty_match_backtrack_label = NULL; |
10801 | struct sljit_label *reset_match_label; |
10802 | struct sljit_label *quit_label; |
10803 | struct sljit_jump *jump; |
10804 | struct sljit_jump *minlength_check_failed = NULL; |
10805 | struct sljit_jump *reqbyte_notfound = NULL; |
10806 | struct sljit_jump *empty_match = NULL; |
10807 | |
10808 | SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0); |
10809 | study = extra->study_data; |
10810 | |
10811 | if (!tables) |
10812 | tables = PRIV(default_tables); |
10813 | |
10814 | memset(&rootbacktrack, 0, sizeof(backtrack_common)); |
10815 | memset(common, 0, sizeof(compiler_common)); |
10816 | rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; |
10817 | |
10818 | common->start = rootbacktrack.cc; |
10819 | common->read_only_data_head = NULL; |
10820 | common->fcc = tables + fcc_offset; |
10821 | common->lcc = (sljit_sw)(tables + lcc_offset); |
10822 | common->mode = mode; |
10823 | common->might_be_empty = study->minlength == 0; |
10824 | common->nltype = NLTYPE_FIXED; |
10825 | switch(re->options & PCRE_NEWLINE_BITS) |
10826 | { |
10827 | case 0: |
10828 | /* Compile-time default */ |
10829 | switch(NEWLINE) |
10830 | { |
10831 | case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; |
10832 | case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; |
10833 | default: common->newline = NEWLINE; break; |
10834 | } |
10835 | break; |
10836 | case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break; |
10837 | case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break; |
10838 | case PCRE_NEWLINE_CR+ |
10839 | PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break; |
10840 | case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; |
10841 | case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; |
10842 | default: return; |
10843 | } |
10844 | common->nlmax = READ_CHAR_MAX; |
10845 | common->nlmin = 0; |
10846 | if ((re->options & PCRE_BSR_ANYCRLF) != 0) |
10847 | common->bsr_nltype = NLTYPE_ANYCRLF; |
10848 | else if ((re->options & PCRE_BSR_UNICODE) != 0) |
10849 | common->bsr_nltype = NLTYPE_ANY; |
10850 | else |
10851 | { |
10852 | #ifdef BSR_ANYCRLF |
10853 | common->bsr_nltype = NLTYPE_ANYCRLF; |
10854 | #else |
10855 | common->bsr_nltype = NLTYPE_ANY; |
10856 | #endif |
10857 | } |
10858 | common->bsr_nlmax = READ_CHAR_MAX; |
10859 | common->bsr_nlmin = 0; |
10860 | common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
10861 | common->ctypes = (sljit_sw)(tables + ctypes_offset); |
10862 | common->name_table = ((pcre_uchar *)re) + re->name_table_offset; |
10863 | common->name_count = re->name_count; |
10864 | common->name_entry_size = re->name_entry_size; |
10865 | common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
10866 | #ifdef SUPPORT_UTF |
10867 | /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ |
10868 | common->utf = (re->options & PCRE_UTF8) != 0; |
10869 | #ifdef SUPPORT_UCP |
10870 | common->use_ucp = (re->options & PCRE_UCP) != 0; |
10871 | #endif |
10872 | if (common->utf) |
10873 | { |
10874 | if (common->nltype == NLTYPE_ANY) |
10875 | common->nlmax = 0x2029; |
10876 | else if (common->nltype == NLTYPE_ANYCRLF) |
10877 | common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; |
10878 | else |
10879 | { |
10880 | /* We only care about the first newline character. */ |
10881 | common->nlmax = common->newline & 0xff; |
10882 | } |
10883 | |
10884 | if (common->nltype == NLTYPE_FIXED) |
10885 | common->nlmin = common->newline & 0xff; |
10886 | else |
10887 | common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; |
10888 | |
10889 | if (common->bsr_nltype == NLTYPE_ANY) |
10890 | common->bsr_nlmax = 0x2029; |
10891 | else |
10892 | common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; |
10893 | common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; |
10894 | } |
10895 | #endif /* SUPPORT_UTF */ |
10896 | ccend = bracketend(common->start); |
10897 | |
10898 | /* Calculate the local space size on the stack. */ |
10899 | common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); |
10900 | common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data); |
10901 | if (!common->optimized_cbracket) |
10902 | return; |
10903 | #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 |
10904 | memset(common->optimized_cbracket, 0, re->top_bracket + 1); |
10905 | #else |
10906 | memset(common->optimized_cbracket, 1, re->top_bracket + 1); |
10907 | #endif |
10908 | |
10909 | SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); |
10910 | #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2 |
10911 | common->capture_last_ptr = common->ovector_start; |
10912 | common->ovector_start += sizeof(sljit_sw); |
10913 | #endif |
10914 | if (!check_opcode_types(common, common->start, ccend)) |
10915 | { |
10916 | SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
10917 | return; |
10918 | } |
10919 | |
10920 | /* Checking flags and updating ovector_start. */ |
10921 | if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) |
10922 | { |
10923 | common->req_char_ptr = common->ovector_start; |
10924 | common->ovector_start += sizeof(sljit_sw); |
10925 | } |
10926 | if (mode != JIT_COMPILE) |
10927 | { |
10928 | common->start_used_ptr = common->ovector_start; |
10929 | common->ovector_start += sizeof(sljit_sw); |
10930 | if (mode == JIT_PARTIAL_SOFT_COMPILE) |
10931 | { |
10932 | common->hit_start = common->ovector_start; |
10933 | common->ovector_start += 2 * sizeof(sljit_sw); |
10934 | } |
10935 | } |
10936 | if ((re->options & PCRE_FIRSTLINE) != 0) |
10937 | { |
10938 | common->match_end_ptr = common->ovector_start; |
10939 | common->ovector_start += sizeof(sljit_sw); |
10940 | } |
10941 | #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD |
10942 | common->control_head_ptr = 1; |
10943 | #endif |
10944 | if (common->control_head_ptr != 0) |
10945 | { |
10946 | common->control_head_ptr = common->ovector_start; |
10947 | common->ovector_start += sizeof(sljit_sw); |
10948 | } |
10949 | if (common->has_set_som) |
10950 | { |
10951 | /* Saving the real start pointer is necessary. */ |
10952 | common->start_ptr = common->ovector_start; |
10953 | common->ovector_start += sizeof(sljit_sw); |
10954 | } |
10955 | |
10956 | /* Aligning ovector to even number of sljit words. */ |
10957 | if ((common->ovector_start & sizeof(sljit_sw)) != 0) |
10958 | common->ovector_start += sizeof(sljit_sw); |
10959 | |
10960 | if (common->start_ptr == 0) |
10961 | common->start_ptr = OVECTOR(0); |
10962 | |
10963 | /* Capturing brackets cannot be optimized if callouts are allowed. */ |
10964 | if (common->capture_last_ptr != 0) |
10965 | memset(common->optimized_cbracket, 0, re->top_bracket + 1); |
10966 | |
10967 | SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); |
10968 | common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); |
10969 | |
10970 | total_length = ccend - common->start; |
10971 | common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data); |
10972 | if (!common->private_data_ptrs) |
10973 | { |
10974 | SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
10975 | return; |
10976 | } |
10977 | memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); |
10978 | |
10979 | private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); |
10980 | set_private_data_ptrs(common, &private_data_size, ccend); |
10981 | if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) |
10982 | { |
10983 | if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back) |
10984 | detect_fast_fail(common, common->start, &private_data_size, 4); |
10985 | } |
10986 | |
10987 | SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr); |
10988 | |
10989 | if (private_data_size > SLJIT_MAX_LOCAL_SIZE) |
10990 | { |
10991 | SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
10992 | SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
10993 | return; |
10994 | } |
10995 | |
10996 | if (common->has_then) |
10997 | { |
10998 | common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length); |
10999 | memset(common->then_offsets, 0, total_length); |
11000 | set_then_offsets(common, common->start, NULL); |
11001 | } |
11002 | |
11003 | compiler = sljit_create_compiler(NULL); |
11004 | if (!compiler) |
11005 | { |
11006 | SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11007 | SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11008 | return; |
11009 | } |
11010 | common->compiler = compiler; |
11011 | |
11012 | /* Main pcre_jit_exec entry. */ |
11013 | sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size); |
11014 | |
11015 | /* Register init. */ |
11016 | reset_ovector(common, (re->top_bracket + 1) * 2); |
11017 | if (common->req_char_ptr != 0) |
11018 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0); |
11019 | |
11020 | OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0); |
11021 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0); |
11022 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
11023 | OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); |
11024 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); |
11025 | OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); |
11026 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base)); |
11027 | OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit)); |
11028 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
11029 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0); |
11030 | |
11031 | if (common->fast_fail_start_ptr < common->fast_fail_end_ptr) |
11032 | reset_fast_fail(common); |
11033 | |
11034 | if (mode == JIT_PARTIAL_SOFT_COMPILE) |
11035 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); |
11036 | if (common->mark_ptr != 0) |
11037 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); |
11038 | if (common->control_head_ptr != 0) |
11039 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); |
11040 | |
11041 | /* Main part of the matching */ |
11042 | if ((re->options & PCRE_ANCHORED) == 0) |
11043 | { |
11044 | mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0); |
11045 | continue_match_label = LABEL(); |
11046 | /* Forward search if possible. */ |
11047 | if ((re->options & PCRE_NO_START_OPTIMIZE) == 0) |
11048 | { |
11049 | if (mode == JIT_COMPILE && fast_forward_first_n_chars(common)) |
11050 | ; |
11051 | else if ((re->flags & PCRE_FIRSTSET) != 0) |
11052 | fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0); |
11053 | else if ((re->flags & PCRE_STARTLINE) != 0) |
11054 | fast_forward_newline(common); |
11055 | else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) |
11056 | fast_forward_start_bits(common, study->start_bits); |
11057 | } |
11058 | } |
11059 | else |
11060 | continue_match_label = LABEL(); |
11061 | |
11062 | if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) |
11063 | { |
11064 | OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); |
11065 | OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); |
11066 | minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0); |
11067 | } |
11068 | if (common->req_char_ptr != 0) |
11069 | reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); |
11070 | |
11071 | /* Store the current STR_PTR in OVECTOR(0). */ |
11072 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); |
11073 | /* Copy the limit of allowed recursions. */ |
11074 | OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH); |
11075 | if (common->capture_last_ptr != 0) |
11076 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1); |
11077 | if (common->fast_forward_bc_ptr != NULL) |
11078 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0); |
11079 | |
11080 | if (common->start_ptr != OVECTOR(0)) |
11081 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0); |
11082 | |
11083 | /* Copy the beginning of the string. */ |
11084 | if (mode == JIT_PARTIAL_SOFT_COMPILE) |
11085 | { |
11086 | jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); |
11087 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
11088 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0); |
11089 | JUMPHERE(jump); |
11090 | } |
11091 | else if (mode == JIT_PARTIAL_HARD_COMPILE) |
11092 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
11093 | |
11094 | compile_matchingpath(common, common->start, ccend, &rootbacktrack); |
11095 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
11096 | { |
11097 | sljit_free_compiler(compiler); |
11098 | SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11099 | SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11100 | free_read_only_data(common->read_only_data_head, compiler->allocator_data); |
11101 | return; |
11102 | } |
11103 | |
11104 | if (common->might_be_empty) |
11105 | { |
11106 | empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
11107 | empty_match_found_label = LABEL(); |
11108 | } |
11109 | |
11110 | common->accept_label = LABEL(); |
11111 | if (common->accept != NULL) |
11112 | set_jumps(common->accept, common->accept_label); |
11113 | |
11114 | /* This means we have a match. Update the ovector. */ |
11115 | copy_ovector(common, re->top_bracket + 1); |
11116 | common->quit_label = common->forced_quit_label = LABEL(); |
11117 | if (common->quit != NULL) |
11118 | set_jumps(common->quit, common->quit_label); |
11119 | if (common->forced_quit != NULL) |
11120 | set_jumps(common->forced_quit, common->forced_quit_label); |
11121 | if (minlength_check_failed != NULL) |
11122 | SET_LABEL(minlength_check_failed, common->forced_quit_label); |
11123 | sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); |
11124 | |
11125 | if (mode != JIT_COMPILE) |
11126 | { |
11127 | common->partialmatchlabel = LABEL(); |
11128 | set_jumps(common->partialmatch, common->partialmatchlabel); |
11129 | return_with_partial_match(common, common->quit_label); |
11130 | } |
11131 | |
11132 | if (common->might_be_empty) |
11133 | empty_match_backtrack_label = LABEL(); |
11134 | compile_backtrackingpath(common, rootbacktrack.top); |
11135 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
11136 | { |
11137 | sljit_free_compiler(compiler); |
11138 | SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11139 | SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11140 | free_read_only_data(common->read_only_data_head, compiler->allocator_data); |
11141 | return; |
11142 | } |
11143 | |
11144 | SLJIT_ASSERT(rootbacktrack.prev == NULL); |
11145 | reset_match_label = LABEL(); |
11146 | |
11147 | if (mode == JIT_PARTIAL_SOFT_COMPILE) |
11148 | { |
11149 | /* Update hit_start only in the first time. */ |
11150 | jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); |
11151 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr); |
11152 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); |
11153 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0); |
11154 | JUMPHERE(jump); |
11155 | } |
11156 | |
11157 | /* Check we have remaining characters. */ |
11158 | if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0) |
11159 | { |
11160 | SLJIT_ASSERT(common->match_end_ptr != 0); |
11161 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
11162 | } |
11163 | |
11164 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), |
11165 | (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr); |
11166 | |
11167 | if ((re->options & PCRE_ANCHORED) == 0) |
11168 | { |
11169 | if (common->ff_newline_shortcut != NULL) |
11170 | { |
11171 | if ((re->options & PCRE_FIRSTLINE) == 0) |
11172 | CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut); |
11173 | /* There cannot be more newlines here. */ |
11174 | } |
11175 | else |
11176 | CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label); |
11177 | } |
11178 | |
11179 | /* No more remaining characters. */ |
11180 | if (reqbyte_notfound != NULL) |
11181 | JUMPHERE(reqbyte_notfound); |
11182 | |
11183 | if (mode == JIT_PARTIAL_SOFT_COMPILE) |
11184 | CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); |
11185 | |
11186 | OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); |
11187 | JUMPTO(SLJIT_JUMP, common->quit_label); |
11188 | |
11189 | flush_stubs(common); |
11190 | |
11191 | if (common->might_be_empty) |
11192 | { |
11193 | JUMPHERE(empty_match); |
11194 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
11195 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); |
11196 | CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label); |
11197 | OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); |
11198 | CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label); |
11199 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
11200 | CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); |
11201 | JUMPTO(SLJIT_JUMP, empty_match_backtrack_label); |
11202 | } |
11203 | |
11204 | common->fast_forward_bc_ptr = NULL; |
11205 | common->fast_fail_start_ptr = 0; |
11206 | common->fast_fail_end_ptr = 0; |
11207 | common->currententry = common->entries; |
11208 | common->local_exit = TRUE; |
11209 | quit_label = common->quit_label; |
11210 | while (common->currententry != NULL) |
11211 | { |
11212 | /* Might add new entries. */ |
11213 | compile_recurse(common); |
11214 | if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
11215 | { |
11216 | sljit_free_compiler(compiler); |
11217 | SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11218 | SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11219 | free_read_only_data(common->read_only_data_head, compiler->allocator_data); |
11220 | return; |
11221 | } |
11222 | flush_stubs(common); |
11223 | common->currententry = common->currententry->next; |
11224 | } |
11225 | common->local_exit = FALSE; |
11226 | common->quit_label = quit_label; |
11227 | |
11228 | /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */ |
11229 | /* This is a (really) rare case. */ |
11230 | set_jumps(common->stackalloc, LABEL()); |
11231 | /* RETURN_ADDR is not a saved register. */ |
11232 | sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
11233 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0); |
11234 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
11235 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); |
11236 | OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0); |
11237 | OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE); |
11238 | |
11239 | sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); |
11240 | jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); |
11241 | OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
11242 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); |
11243 | OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top)); |
11244 | OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit)); |
11245 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); |
11246 | sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
11247 | |
11248 | /* Allocation failed. */ |
11249 | JUMPHERE(jump); |
11250 | /* We break the return address cache here, but this is a really rare case. */ |
11251 | OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT); |
11252 | JUMPTO(SLJIT_JUMP, common->quit_label); |
11253 | |
11254 | /* Call limit reached. */ |
11255 | set_jumps(common->calllimit, LABEL()); |
11256 | OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT); |
11257 | JUMPTO(SLJIT_JUMP, common->quit_label); |
11258 | |
11259 | if (common->revertframes != NULL) |
11260 | { |
11261 | set_jumps(common->revertframes, LABEL()); |
11262 | do_revertframes(common); |
11263 | } |
11264 | if (common->wordboundary != NULL) |
11265 | { |
11266 | set_jumps(common->wordboundary, LABEL()); |
11267 | check_wordboundary(common); |
11268 | } |
11269 | if (common->anynewline != NULL) |
11270 | { |
11271 | set_jumps(common->anynewline, LABEL()); |
11272 | check_anynewline(common); |
11273 | } |
11274 | if (common->hspace != NULL) |
11275 | { |
11276 | set_jumps(common->hspace, LABEL()); |
11277 | check_hspace(common); |
11278 | } |
11279 | if (common->vspace != NULL) |
11280 | { |
11281 | set_jumps(common->vspace, LABEL()); |
11282 | check_vspace(common); |
11283 | } |
11284 | if (common->casefulcmp != NULL) |
11285 | { |
11286 | set_jumps(common->casefulcmp, LABEL()); |
11287 | do_casefulcmp(common); |
11288 | } |
11289 | if (common->caselesscmp != NULL) |
11290 | { |
11291 | set_jumps(common->caselesscmp, LABEL()); |
11292 | do_caselesscmp(common); |
11293 | } |
11294 | if (common->reset_match != NULL) |
11295 | { |
11296 | set_jumps(common->reset_match, LABEL()); |
11297 | do_reset_match(common, (re->top_bracket + 1) * 2); |
11298 | CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); |
11299 | OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); |
11300 | JUMPTO(SLJIT_JUMP, reset_match_label); |
11301 | } |
11302 | #ifdef SUPPORT_UTF |
11303 | #ifdef COMPILE_PCRE8 |
11304 | if (common->utfreadchar != NULL) |
11305 | { |
11306 | set_jumps(common->utfreadchar, LABEL()); |
11307 | do_utfreadchar(common); |
11308 | } |
11309 | if (common->utfreadchar16 != NULL) |
11310 | { |
11311 | set_jumps(common->utfreadchar16, LABEL()); |
11312 | do_utfreadchar16(common); |
11313 | } |
11314 | if (common->utfreadtype8 != NULL) |
11315 | { |
11316 | set_jumps(common->utfreadtype8, LABEL()); |
11317 | do_utfreadtype8(common); |
11318 | } |
11319 | #endif /* COMPILE_PCRE8 */ |
11320 | #endif /* SUPPORT_UTF */ |
11321 | #ifdef SUPPORT_UCP |
11322 | if (common->getucd != NULL) |
11323 | { |
11324 | set_jumps(common->getucd, LABEL()); |
11325 | do_getucd(common); |
11326 | } |
11327 | #endif |
11328 | |
11329 | SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11330 | SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11331 | |
11332 | executable_func = sljit_generate_code(compiler); |
11333 | executable_size = sljit_get_generated_code_size(compiler); |
11334 | label_addr = common->label_addrs; |
11335 | while (label_addr != NULL) |
11336 | { |
11337 | *label_addr->update_addr = sljit_get_label_addr(label_addr->label); |
11338 | label_addr = label_addr->next; |
11339 | } |
11340 | sljit_free_compiler(compiler); |
11341 | if (executable_func == NULL) |
11342 | { |
11343 | free_read_only_data(common->read_only_data_head, compiler->allocator_data); |
11344 | return; |
11345 | } |
11346 | |
11347 | /* Reuse the function descriptor if possible. */ |
11348 | if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) |
11349 | functions = (executable_functions *)extra->executable_jit; |
11350 | else |
11351 | { |
11352 | /* Note: If your memory-checker has flagged the allocation below as a |
11353 | * memory leak, it is probably because you either forgot to call |
11354 | * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or |
11355 | * pcre16_extra) object, or you called said function after having |
11356 | * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field |
11357 | * of the object. (The function will only free the JIT data if the |
11358 | * bit remains set, as the bit indicates that the pointer to the data |
11359 | * is valid.) |
11360 | */ |
11361 | functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data); |
11362 | if (functions == NULL) |
11363 | { |
11364 | /* This case is highly unlikely since we just recently |
11365 | freed a lot of memory. Not impossible though. */ |
11366 | sljit_free_code(executable_func); |
11367 | free_read_only_data(common->read_only_data_head, compiler->allocator_data); |
11368 | return; |
11369 | } |
11370 | memset(functions, 0, sizeof(executable_functions)); |
11371 | functions->top_bracket = (re->top_bracket + 1) * 2; |
11372 | functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0; |
11373 | extra->executable_jit = functions; |
11374 | extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT; |
11375 | } |
11376 | |
11377 | functions->executable_funcs[mode] = executable_func; |
11378 | functions->read_only_data_heads[mode] = common->read_only_data_head; |
11379 | functions->executable_sizes[mode] = executable_size; |
11380 | } |
11381 | |
11382 | static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func) |
11383 | { |
11384 | union { |
11385 | void *executable_func; |
11386 | jit_function call_executable_func; |
11387 | } convert_executable_func; |
11388 | sljit_u8 local_space[MACHINE_STACK_SIZE]; |
11389 | struct sljit_stack local_stack; |
11390 | |
11391 | local_stack.top = (sljit_sw)&local_space; |
11392 | local_stack.base = local_stack.top; |
11393 | local_stack.limit = local_stack.base + MACHINE_STACK_SIZE; |
11394 | local_stack.max_limit = local_stack.limit; |
11395 | arguments->stack = &local_stack; |
11396 | convert_executable_func.executable_func = executable_func; |
11397 | return convert_executable_func.call_executable_func(arguments); |
11398 | } |
11399 | |
11400 | int |
11401 | PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject, |
11402 | int length, int start_offset, int options, int *offsets, int offset_count) |
11403 | { |
11404 | executable_functions *functions = (executable_functions *)extra_data->executable_jit; |
11405 | union { |
11406 | void *executable_func; |
11407 | jit_function call_executable_func; |
11408 | } convert_executable_func; |
11409 | jit_arguments arguments; |
11410 | int max_offset_count; |
11411 | int retval; |
11412 | int mode = JIT_COMPILE; |
11413 | |
11414 | if ((options & PCRE_PARTIAL_HARD) != 0) |
11415 | mode = JIT_PARTIAL_HARD_COMPILE; |
11416 | else if ((options & PCRE_PARTIAL_SOFT) != 0) |
11417 | mode = JIT_PARTIAL_SOFT_COMPILE; |
11418 | |
11419 | if (functions->executable_funcs[mode] == NULL) |
11420 | return PCRE_ERROR_JIT_BADOPTION; |
11421 | |
11422 | /* Sanity checks should be handled by pcre_exec. */ |
11423 | arguments.str = subject + start_offset; |
11424 | arguments.begin = subject; |
11425 | arguments.end = subject + length; |
11426 | arguments.mark_ptr = NULL; |
11427 | /* JIT decreases this value less frequently than the interpreter. */ |
11428 | arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit); |
11429 | if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) |
11430 | arguments.limit_match = functions->limit_match; |
11431 | arguments.notbol = (options & PCRE_NOTBOL) != 0; |
11432 | arguments.noteol = (options & PCRE_NOTEOL) != 0; |
11433 | arguments.notempty = (options & PCRE_NOTEMPTY) != 0; |
11434 | arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; |
11435 | arguments.offsets = offsets; |
11436 | arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL; |
11437 | arguments.real_offset_count = offset_count; |
11438 | |
11439 | /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of |
11440 | the output vector for storing captured strings, with the remainder used as |
11441 | workspace. We don't need the workspace here. For compatibility, we limit the |
11442 | number of captured strings in the same way as pcre_exec(), so that the user |
11443 | gets the same result with and without JIT. */ |
11444 | |
11445 | if (offset_count != 2) |
11446 | offset_count = ((offset_count - (offset_count % 3)) * 2) / 3; |
11447 | max_offset_count = functions->top_bracket; |
11448 | if (offset_count > max_offset_count) |
11449 | offset_count = max_offset_count; |
11450 | arguments.offset_count = offset_count; |
11451 | |
11452 | if (functions->callback) |
11453 | arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata); |
11454 | else |
11455 | arguments.stack = (struct sljit_stack *)functions->userdata; |
11456 | |
11457 | if (arguments.stack == NULL) |
11458 | retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]); |
11459 | else |
11460 | { |
11461 | convert_executable_func.executable_func = functions->executable_funcs[mode]; |
11462 | retval = convert_executable_func.call_executable_func(&arguments); |
11463 | } |
11464 | |
11465 | if (retval * 2 > offset_count) |
11466 | retval = 0; |
11467 | if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) |
11468 | *(extra_data->mark) = arguments.mark_ptr; |
11469 | |
11470 | return retval; |
11471 | } |
11472 | |
11473 | #if defined COMPILE_PCRE8 |
11474 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
11475 | pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data, |
11476 | PCRE_SPTR subject, int length, int start_offset, int options, |
11477 | int *offsets, int offset_count, pcre_jit_stack *stack) |
11478 | #elif defined COMPILE_PCRE16 |
11479 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
11480 | pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, |
11481 | PCRE_SPTR16 subject, int length, int start_offset, int options, |
11482 | int *offsets, int offset_count, pcre16_jit_stack *stack) |
11483 | #elif defined COMPILE_PCRE32 |
11484 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
11485 | pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, |
11486 | PCRE_SPTR32 subject, int length, int start_offset, int options, |
11487 | int *offsets, int offset_count, pcre32_jit_stack *stack) |
11488 | #endif |
11489 | { |
11490 | pcre_uchar *subject_ptr = (pcre_uchar *)subject; |
11491 | executable_functions *functions = (executable_functions *)extra_data->executable_jit; |
11492 | union { |
11493 | void *executable_func; |
11494 | jit_function call_executable_func; |
11495 | } convert_executable_func; |
11496 | jit_arguments arguments; |
11497 | int max_offset_count; |
11498 | int retval; |
11499 | int mode = JIT_COMPILE; |
11500 | |
11501 | SLJIT_UNUSED_ARG(argument_re); |
11502 | |
11503 | /* Plausibility checks */ |
11504 | if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION; |
11505 | |
11506 | if ((options & PCRE_PARTIAL_HARD) != 0) |
11507 | mode = JIT_PARTIAL_HARD_COMPILE; |
11508 | else if ((options & PCRE_PARTIAL_SOFT) != 0) |
11509 | mode = JIT_PARTIAL_SOFT_COMPILE; |
11510 | |
11511 | if (functions->executable_funcs[mode] == NULL) |
11512 | return PCRE_ERROR_JIT_BADOPTION; |
11513 | |
11514 | /* Sanity checks should be handled by pcre_exec. */ |
11515 | arguments.stack = (struct sljit_stack *)stack; |
11516 | arguments.str = subject_ptr + start_offset; |
11517 | arguments.begin = subject_ptr; |
11518 | arguments.end = subject_ptr + length; |
11519 | arguments.mark_ptr = NULL; |
11520 | /* JIT decreases this value less frequently than the interpreter. */ |
11521 | arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit); |
11522 | if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) |
11523 | arguments.limit_match = functions->limit_match; |
11524 | arguments.notbol = (options & PCRE_NOTBOL) != 0; |
11525 | arguments.noteol = (options & PCRE_NOTEOL) != 0; |
11526 | arguments.notempty = (options & PCRE_NOTEMPTY) != 0; |
11527 | arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; |
11528 | arguments.offsets = offsets; |
11529 | arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL; |
11530 | arguments.real_offset_count = offset_count; |
11531 | |
11532 | /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of |
11533 | the output vector for storing captured strings, with the remainder used as |
11534 | workspace. We don't need the workspace here. For compatibility, we limit the |
11535 | number of captured strings in the same way as pcre_exec(), so that the user |
11536 | gets the same result with and without JIT. */ |
11537 | |
11538 | if (offset_count != 2) |
11539 | offset_count = ((offset_count - (offset_count % 3)) * 2) / 3; |
11540 | max_offset_count = functions->top_bracket; |
11541 | if (offset_count > max_offset_count) |
11542 | offset_count = max_offset_count; |
11543 | arguments.offset_count = offset_count; |
11544 | |
11545 | convert_executable_func.executable_func = functions->executable_funcs[mode]; |
11546 | retval = convert_executable_func.call_executable_func(&arguments); |
11547 | |
11548 | if (retval * 2 > offset_count) |
11549 | retval = 0; |
11550 | if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) |
11551 | *(extra_data->mark) = arguments.mark_ptr; |
11552 | |
11553 | return retval; |
11554 | } |
11555 | |
11556 | void |
11557 | PRIV(jit_free)(void *executable_funcs) |
11558 | { |
11559 | int i; |
11560 | executable_functions *functions = (executable_functions *)executable_funcs; |
11561 | for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) |
11562 | { |
11563 | if (functions->executable_funcs[i] != NULL) |
11564 | sljit_free_code(functions->executable_funcs[i]); |
11565 | free_read_only_data(functions->read_only_data_heads[i], NULL); |
11566 | } |
11567 | SLJIT_FREE(functions, compiler->allocator_data); |
11568 | } |
11569 | |
11570 | int |
11571 | PRIV(jit_get_size)(void *executable_funcs) |
11572 | { |
11573 | int i; |
11574 | sljit_uw size = 0; |
11575 | sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes; |
11576 | for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) |
11577 | size += executable_sizes[i]; |
11578 | return (int)size; |
11579 | } |
11580 | |
11581 | const char* |
11582 | PRIV(jit_get_target)(void) |
11583 | { |
11584 | return sljit_get_platform_name(); |
11585 | } |
11586 | |
11587 | #if defined COMPILE_PCRE8 |
11588 | PCRE_EXP_DECL pcre_jit_stack * |
11589 | pcre_jit_stack_alloc(int startsize, int maxsize) |
11590 | #elif defined COMPILE_PCRE16 |
11591 | PCRE_EXP_DECL pcre16_jit_stack * |
11592 | pcre16_jit_stack_alloc(int startsize, int maxsize) |
11593 | #elif defined COMPILE_PCRE32 |
11594 | PCRE_EXP_DECL pcre32_jit_stack * |
11595 | pcre32_jit_stack_alloc(int startsize, int maxsize) |
11596 | #endif |
11597 | { |
11598 | if (startsize < 1 || maxsize < 1) |
11599 | return NULL; |
11600 | if (startsize > maxsize) |
11601 | startsize = maxsize; |
11602 | startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); |
11603 | maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); |
11604 | return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL); |
11605 | } |
11606 | |
11607 | #if defined COMPILE_PCRE8 |
11608 | PCRE_EXP_DECL void |
11609 | pcre_jit_stack_free(pcre_jit_stack *stack) |
11610 | #elif defined COMPILE_PCRE16 |
11611 | PCRE_EXP_DECL void |
11612 | pcre16_jit_stack_free(pcre16_jit_stack *stack) |
11613 | #elif defined COMPILE_PCRE32 |
11614 | PCRE_EXP_DECL void |
11615 | pcre32_jit_stack_free(pcre32_jit_stack *stack) |
11616 | #endif |
11617 | { |
11618 | sljit_free_stack((struct sljit_stack *)stack, NULL); |
11619 | } |
11620 | |
11621 | #if defined COMPILE_PCRE8 |
11622 | PCRE_EXP_DECL void |
11623 | pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) |
11624 | #elif defined COMPILE_PCRE16 |
11625 | PCRE_EXP_DECL void |
11626 | pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) |
11627 | #elif defined COMPILE_PCRE32 |
11628 | PCRE_EXP_DECL void |
11629 | pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) |
11630 | #endif |
11631 | { |
11632 | executable_functions *functions; |
11633 | if (extra != NULL && |
11634 | (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && |
11635 | extra->executable_jit != NULL) |
11636 | { |
11637 | functions = (executable_functions *)extra->executable_jit; |
11638 | functions->callback = callback; |
11639 | functions->userdata = userdata; |
11640 | } |
11641 | } |
11642 | |
11643 | #if defined COMPILE_PCRE8 |
11644 | PCRE_EXP_DECL void |
11645 | pcre_jit_free_unused_memory(void) |
11646 | #elif defined COMPILE_PCRE16 |
11647 | PCRE_EXP_DECL void |
11648 | pcre16_jit_free_unused_memory(void) |
11649 | #elif defined COMPILE_PCRE32 |
11650 | PCRE_EXP_DECL void |
11651 | pcre32_jit_free_unused_memory(void) |
11652 | #endif |
11653 | { |
11654 | sljit_free_unused_memory_exec(); |
11655 | } |
11656 | |
11657 | #else /* SUPPORT_JIT */ |
11658 | |
11659 | /* These are dummy functions to avoid linking errors when JIT support is not |
11660 | being compiled. */ |
11661 | |
11662 | #if defined COMPILE_PCRE8 |
11663 | PCRE_EXP_DECL pcre_jit_stack * |
11664 | pcre_jit_stack_alloc(int startsize, int maxsize) |
11665 | #elif defined COMPILE_PCRE16 |
11666 | PCRE_EXP_DECL pcre16_jit_stack * |
11667 | pcre16_jit_stack_alloc(int startsize, int maxsize) |
11668 | #elif defined COMPILE_PCRE32 |
11669 | PCRE_EXP_DECL pcre32_jit_stack * |
11670 | pcre32_jit_stack_alloc(int startsize, int maxsize) |
11671 | #endif |
11672 | { |
11673 | (void)startsize; |
11674 | (void)maxsize; |
11675 | return NULL; |
11676 | } |
11677 | |
11678 | #if defined COMPILE_PCRE8 |
11679 | PCRE_EXP_DECL void |
11680 | pcre_jit_stack_free(pcre_jit_stack *stack) |
11681 | #elif defined COMPILE_PCRE16 |
11682 | PCRE_EXP_DECL void |
11683 | pcre16_jit_stack_free(pcre16_jit_stack *stack) |
11684 | #elif defined COMPILE_PCRE32 |
11685 | PCRE_EXP_DECL void |
11686 | pcre32_jit_stack_free(pcre32_jit_stack *stack) |
11687 | #endif |
11688 | { |
11689 | (void)stack; |
11690 | } |
11691 | |
11692 | #if defined COMPILE_PCRE8 |
11693 | PCRE_EXP_DECL void |
11694 | pcre_assign_jit_stack(pcre_extra *, pcre_jit_callback callback, void *userdata) |
11695 | #elif defined COMPILE_PCRE16 |
11696 | PCRE_EXP_DECL void |
11697 | pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) |
11698 | #elif defined COMPILE_PCRE32 |
11699 | PCRE_EXP_DECL void |
11700 | pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) |
11701 | #endif |
11702 | { |
11703 | (void)extra; |
11704 | (void)callback; |
11705 | (void)userdata; |
11706 | } |
11707 | |
11708 | #if defined COMPILE_PCRE8 |
11709 | PCRE_EXP_DECL void |
11710 | pcre_jit_free_unused_memory(void) |
11711 | #elif defined COMPILE_PCRE16 |
11712 | PCRE_EXP_DECL void |
11713 | pcre16_jit_free_unused_memory(void) |
11714 | #elif defined COMPILE_PCRE32 |
11715 | PCRE_EXP_DECL void |
11716 | pcre32_jit_free_unused_memory(void) |
11717 | #endif |
11718 | { |
11719 | } |
11720 | |
11721 | #endif |
11722 | |
11723 | /* End of pcre_jit_compile.c */ |
11724 | |