1/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14-----------------------------------------------------------------------------
15Redistribution and use in source and binary forms, with or without
16modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39POSSIBILITY OF SUCH DAMAGE.
40-----------------------------------------------------------------------------
41*/
42
43#include "pcre_config.h"
44#include "pcre_internal.h"
45
46#if defined SUPPORT_JIT
47
48/* All-in-one: Since we use the JIT compiler only from here,
49we just include it. This way we don't need to touch the build
50system files. */
51
52#define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
53#define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
54#define SLJIT_CONFIG_AUTO 1
55#define SLJIT_CONFIG_STATIC 1
56#define SLJIT_VERBOSE 0
57#define SLJIT_DEBUG 0
58
59#include "sljit/sljitLir.c"
60
61#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
62#error Unsupported architecture
63#endif
64
65/* Defines for debugging purposes. */
66
67/* 1 - Use unoptimized capturing brackets.
68 2 - Enable capture_last_ptr (includes option 1). */
69/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
70
71/* 1 - Always have a control head. */
72/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
73
74/* Allocate memory for the regex stack on the real machine stack.
75Fast, but limited size. */
76#define MACHINE_STACK_SIZE 32768
77
78/* Growth rate for stack allocated by the OS. Should be the multiply
79of page size. */
80#define STACK_GROWTH_RATE 8192
81
82/* Enable to check that the allocation could destroy temporaries. */
83#if defined SLJIT_DEBUG && SLJIT_DEBUG
84#define DESTROY_REGISTERS 1
85#endif
86
87/*
88Short summary about the backtracking mechanism empolyed by the jit code generator:
89
90The code generator follows the recursive nature of the PERL compatible regular
91expressions. The basic blocks of regular expressions are condition checkers
92whose execute different commands depending on the result of the condition check.
93The relationship between the operators can be horizontal (concatenation) and
94vertical (sub-expression) (See struct backtrack_common for more details).
95
96 'ab' - 'a' and 'b' regexps are concatenated
97 'a+' - 'a' is the sub-expression of the '+' operator
98
99The condition checkers are boolean (true/false) checkers. Machine code is generated
100for the checker itself and for the actions depending on the result of the checker.
101The 'true' case is called as the matching path (expected path), and the other is called as
102the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
103branches on the matching path.
104
105 Greedy star operator (*) :
106 Matching path: match happens.
107 Backtrack path: match failed.
108 Non-greedy star operator (*?) :
109 Matching path: no need to perform a match.
110 Backtrack path: match is required.
111
112The following example shows how the code generated for a capturing bracket
113with two alternatives. Let A, B, C, D are arbirary regular expressions, and
114we have the following regular expression:
115
116 A(B|C)D
117
118The generated code will be the following:
119
120 A matching path
121 '(' matching path (pushing arguments to the stack)
122 B matching path
123 ')' matching path (pushing arguments to the stack)
124 D matching path
125 return with successful match
126
127 D backtrack path
128 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
129 B backtrack path
130 C expected path
131 jump to D matching path
132 C backtrack path
133 A backtrack path
134
135 Notice, that the order of backtrack code paths are the opposite of the fast
136 code paths. In this way the topmost value on the stack is always belong
137 to the current backtrack code path. The backtrack path must check
138 whether there is a next alternative. If so, it needs to jump back to
139 the matching path eventually. Otherwise it needs to clear out its own stack
140 frame and continue the execution on the backtrack code paths.
141*/
142
143/*
144Saved stack frames:
145
146Atomic blocks and asserts require reloading the values of private data
147when the backtrack mechanism performed. Because of OP_RECURSE, the data
148are not necessarly known in compile time, thus we need a dynamic restore
149mechanism.
150
151The stack frames are stored in a chain list, and have the following format:
152([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
153
154Thus we can restore the private data to a particular point in the stack.
155*/
156
157typedef struct jit_arguments {
158 /* Pointers first. */
159 struct sljit_stack *stack;
160 const pcre_uchar *str;
161 const pcre_uchar *begin;
162 const pcre_uchar *end;
163 int *offsets;
164 pcre_uchar *uchar_ptr;
165 pcre_uchar *mark_ptr;
166 void *callout_data;
167 /* Everything else after. */
168 sljit_u32 limit_match;
169 int real_offset_count;
170 int offset_count;
171 sljit_u8 notbol;
172 sljit_u8 noteol;
173 sljit_u8 notempty;
174 sljit_u8 notempty_atstart;
175} jit_arguments;
176
177typedef struct executable_functions {
178 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
179 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
180 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
181 PUBL(jit_callback) callback;
182 void *userdata;
183 sljit_u32 top_bracket;
184 sljit_u32 limit_match;
185} executable_functions;
186
187typedef struct jump_list {
188 struct sljit_jump *jump;
189 struct jump_list *next;
190} jump_list;
191
192typedef struct stub_list {
193 struct sljit_jump *start;
194 struct sljit_label *quit;
195 struct stub_list *next;
196} stub_list;
197
198typedef struct label_addr_list {
199 struct sljit_label *label;
200 sljit_uw *update_addr;
201 struct label_addr_list *next;
202} label_addr_list;
203
204enum frame_types {
205 no_frame = -1,
206 no_stack = -2
207};
208
209enum control_types {
210 type_mark = 0,
211 type_then_trap = 1
212};
213
214typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
215
216/* The following structure is the key data type for the recursive
217code generator. It is allocated by compile_matchingpath, and contains
218the arguments for compile_backtrackingpath. Must be the first member
219of its descendants. */
220typedef struct backtrack_common {
221 /* Concatenation stack. */
222 struct backtrack_common *prev;
223 jump_list *nextbacktracks;
224 /* Internal stack (for component operators). */
225 struct backtrack_common *top;
226 jump_list *topbacktracks;
227 /* Opcode pointer. */
228 pcre_uchar *cc;
229} backtrack_common;
230
231typedef struct assert_backtrack {
232 backtrack_common common;
233 jump_list *condfailed;
234 /* Less than 0 if a frame is not needed. */
235 int framesize;
236 /* Points to our private memory word on the stack. */
237 int private_data_ptr;
238 /* For iterators. */
239 struct sljit_label *matchingpath;
240} assert_backtrack;
241
242typedef struct bracket_backtrack {
243 backtrack_common common;
244 /* Where to coninue if an alternative is successfully matched. */
245 struct sljit_label *alternative_matchingpath;
246 /* For rmin and rmax iterators. */
247 struct sljit_label *recursive_matchingpath;
248 /* For greedy ? operator. */
249 struct sljit_label *zero_matchingpath;
250 /* Contains the branches of a failed condition. */
251 union {
252 /* Both for OP_COND, OP_SCOND. */
253 jump_list *condfailed;
254 assert_backtrack *assert;
255 /* For OP_ONCE. Less than 0 if not needed. */
256 int framesize;
257 } u;
258 /* Points to our private memory word on the stack. */
259 int private_data_ptr;
260} bracket_backtrack;
261
262typedef struct bracketpos_backtrack {
263 backtrack_common common;
264 /* Points to our private memory word on the stack. */
265 int private_data_ptr;
266 /* Reverting stack is needed. */
267 int framesize;
268 /* Allocated stack size. */
269 int stacksize;
270} bracketpos_backtrack;
271
272typedef struct braminzero_backtrack {
273 backtrack_common common;
274 struct sljit_label *matchingpath;
275} braminzero_backtrack;
276
277typedef struct char_iterator_backtrack {
278 backtrack_common common;
279 /* Next iteration. */
280 struct sljit_label *matchingpath;
281 union {
282 jump_list *backtracks;
283 struct {
284 unsigned int othercasebit;
285 pcre_uchar chr;
286 BOOL enabled;
287 } charpos;
288 } u;
289} char_iterator_backtrack;
290
291typedef struct ref_iterator_backtrack {
292 backtrack_common common;
293 /* Next iteration. */
294 struct sljit_label *matchingpath;
295} ref_iterator_backtrack;
296
297typedef struct recurse_entry {
298 struct recurse_entry *next;
299 /* Contains the function entry. */
300 struct sljit_label *entry;
301 /* Collects the calls until the function is not created. */
302 jump_list *calls;
303 /* Points to the starting opcode. */
304 sljit_sw start;
305} recurse_entry;
306
307typedef struct recurse_backtrack {
308 backtrack_common common;
309 BOOL inlined_pattern;
310} recurse_backtrack;
311
312#define OP_THEN_TRAP OP_TABLE_LENGTH
313
314typedef struct then_trap_backtrack {
315 backtrack_common common;
316 /* If then_trap is not NULL, this structure contains the real
317 then_trap for the backtracking path. */
318 struct then_trap_backtrack *then_trap;
319 /* Points to the starting opcode. */
320 sljit_sw start;
321 /* Exit point for the then opcodes of this alternative. */
322 jump_list *quit;
323 /* Frame size of the current alternative. */
324 int framesize;
325} then_trap_backtrack;
326
327#define MAX_RANGE_SIZE 4
328
329typedef struct compiler_common {
330 /* The sljit ceneric compiler. */
331 struct sljit_compiler *compiler;
332 /* First byte code. */
333 pcre_uchar *start;
334 /* Maps private data offset to each opcode. */
335 sljit_s32 *private_data_ptrs;
336 /* Chain list of read-only data ptrs. */
337 void *read_only_data_head;
338 /* Tells whether the capturing bracket is optimized. */
339 sljit_u8 *optimized_cbracket;
340 /* Tells whether the starting offset is a target of then. */
341 sljit_u8 *then_offsets;
342 /* Current position where a THEN must jump. */
343 then_trap_backtrack *then_trap;
344 /* Starting offset of private data for capturing brackets. */
345 sljit_s32 cbra_ptr;
346 /* Output vector starting point. Must be divisible by 2. */
347 sljit_s32 ovector_start;
348 /* Points to the starting character of the current match. */
349 sljit_s32 start_ptr;
350 /* Last known position of the requested byte. */
351 sljit_s32 req_char_ptr;
352 /* Head of the last recursion. */
353 sljit_s32 recursive_head_ptr;
354 /* First inspected character for partial matching.
355 (Needed for avoiding zero length partial matches.) */
356 sljit_s32 start_used_ptr;
357 /* Starting pointer for partial soft matches. */
358 sljit_s32 hit_start;
359 /* Pointer of the match end position. */
360 sljit_s32 match_end_ptr;
361 /* Points to the marked string. */
362 sljit_s32 mark_ptr;
363 /* Recursive control verb management chain. */
364 sljit_s32 control_head_ptr;
365 /* Points to the last matched capture block index. */
366 sljit_s32 capture_last_ptr;
367 /* Fast forward skipping byte code pointer. */
368 pcre_uchar *fast_forward_bc_ptr;
369 /* Locals used by fast fail optimization. */
370 sljit_s32 fast_fail_start_ptr;
371 sljit_s32 fast_fail_end_ptr;
372
373 /* Flipped and lower case tables. */
374 const sljit_u8 *fcc;
375 sljit_sw lcc;
376 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
377 int mode;
378 /* TRUE, when minlength is greater than 0. */
379 BOOL might_be_empty;
380 /* \K is found in the pattern. */
381 BOOL has_set_som;
382 /* (*SKIP:arg) is found in the pattern. */
383 BOOL has_skip_arg;
384 /* (*THEN) is found in the pattern. */
385 BOOL has_then;
386 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
387 BOOL has_skip_in_assert_back;
388 /* Currently in recurse or negative assert. */
389 BOOL local_exit;
390 /* Currently in a positive assert. */
391 BOOL positive_assert;
392 /* Newline control. */
393 int nltype;
394 sljit_u32 nlmax;
395 sljit_u32 nlmin;
396 int newline;
397 int bsr_nltype;
398 sljit_u32 bsr_nlmax;
399 sljit_u32 bsr_nlmin;
400 /* Dollar endonly. */
401 int endonly;
402 /* Tables. */
403 sljit_sw ctypes;
404 /* Named capturing brackets. */
405 pcre_uchar *name_table;
406 sljit_sw name_count;
407 sljit_sw name_entry_size;
408
409 /* Labels and jump lists. */
410 struct sljit_label *partialmatchlabel;
411 struct sljit_label *quit_label;
412 struct sljit_label *forced_quit_label;
413 struct sljit_label *accept_label;
414 struct sljit_label *ff_newline_shortcut;
415 stub_list *stubs;
416 label_addr_list *label_addrs;
417 recurse_entry *entries;
418 recurse_entry *currententry;
419 jump_list *partialmatch;
420 jump_list *quit;
421 jump_list *positive_assert_quit;
422 jump_list *forced_quit;
423 jump_list *accept;
424 jump_list *calllimit;
425 jump_list *stackalloc;
426 jump_list *revertframes;
427 jump_list *wordboundary;
428 jump_list *anynewline;
429 jump_list *hspace;
430 jump_list *vspace;
431 jump_list *casefulcmp;
432 jump_list *caselesscmp;
433 jump_list *reset_match;
434 BOOL jscript_compat;
435#ifdef SUPPORT_UTF
436 BOOL utf;
437#ifdef SUPPORT_UCP
438 BOOL use_ucp;
439 jump_list *getucd;
440#endif
441#ifdef COMPILE_PCRE8
442 jump_list *utfreadchar;
443 jump_list *utfreadchar16;
444 jump_list *utfreadtype8;
445#endif
446#endif /* SUPPORT_UTF */
447} compiler_common;
448
449/* For byte_sequence_compare. */
450
451typedef struct compare_context {
452 int length;
453 int sourcereg;
454#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
455 int ucharptr;
456 union {
457 sljit_s32 asint;
458 sljit_u16 asushort;
459#if defined COMPILE_PCRE8
460 sljit_u8 asbyte;
461 sljit_u8 asuchars[4];
462#elif defined COMPILE_PCRE16
463 sljit_u16 asuchars[2];
464#elif defined COMPILE_PCRE32
465 sljit_u32 asuchars[1];
466#endif
467 } c;
468 union {
469 sljit_s32 asint;
470 sljit_u16 asushort;
471#if defined COMPILE_PCRE8
472 sljit_u8 asbyte;
473 sljit_u8 asuchars[4];
474#elif defined COMPILE_PCRE16
475 sljit_u16 asuchars[2];
476#elif defined COMPILE_PCRE32
477 sljit_u32 asuchars[1];
478#endif
479 } oc;
480#endif
481} compare_context;
482
483/* Undefine sljit macros. */
484#undef CMP
485
486/* Used for accessing the elements of the stack. */
487#define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
488
489#define TMP1 SLJIT_R0
490#define TMP2 SLJIT_R2
491#define TMP3 SLJIT_R3
492#define STR_PTR SLJIT_S0
493#define STR_END SLJIT_S1
494#define STACK_TOP SLJIT_R1
495#define STACK_LIMIT SLJIT_S2
496#define COUNT_MATCH SLJIT_S3
497#define ARGUMENTS SLJIT_S4
498#define RETURN_ADDR SLJIT_R4
499
500/* Local space layout. */
501/* These two locals can be used by the current opcode. */
502#define LOCALS0 (0 * sizeof(sljit_sw))
503#define LOCALS1 (1 * sizeof(sljit_sw))
504/* Two local variables for possessive quantifiers (char1 cannot use them). */
505#define POSSESSIVE0 (2 * sizeof(sljit_sw))
506#define POSSESSIVE1 (3 * sizeof(sljit_sw))
507/* Max limit of recursions. */
508#define LIMIT_MATCH (4 * sizeof(sljit_sw))
509/* The output vector is stored on the stack, and contains pointers
510to characters. The vector data is divided into two groups: the first
511group contains the start / end character pointers, and the second is
512the start pointers when the end of the capturing group has not yet reached. */
513#define OVECTOR_START (common->ovector_start)
514#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
515#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
516#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
517
518#if defined COMPILE_PCRE8
519#define MOV_UCHAR SLJIT_MOV_U8
520#define MOVU_UCHAR SLJIT_MOVU_U8
521#elif defined COMPILE_PCRE16
522#define MOV_UCHAR SLJIT_MOV_U16
523#define MOVU_UCHAR SLJIT_MOVU_U16
524#elif defined COMPILE_PCRE32
525#define MOV_UCHAR SLJIT_MOV_U32
526#define MOVU_UCHAR SLJIT_MOVU_U32
527#else
528#error Unsupported compiling mode
529#endif
530
531/* Shortcuts. */
532#define DEFINE_COMPILER \
533 struct sljit_compiler *compiler = common->compiler
534#define OP1(op, dst, dstw, src, srcw) \
535 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
536#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
537 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
538#define LABEL() \
539 sljit_emit_label(compiler)
540#define JUMP(type) \
541 sljit_emit_jump(compiler, (type))
542#define JUMPTO(type, label) \
543 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
544#define JUMPHERE(jump) \
545 sljit_set_label((jump), sljit_emit_label(compiler))
546#define SET_LABEL(jump, label) \
547 sljit_set_label((jump), (label))
548#define CMP(type, src1, src1w, src2, src2w) \
549 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
550#define CMPTO(type, src1, src1w, src2, src2w, label) \
551 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
552#define OP_FLAGS(op, dst, dstw, src, srcw, type) \
553 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
554#define GET_LOCAL_BASE(dst, dstw, offset) \
555 sljit_get_local_base(compiler, (dst), (dstw), (offset))
556
557#define READ_CHAR_MAX 0x7fffffff
558
559static pcre_uchar *bracketend(pcre_uchar *cc)
560{
561SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
562do cc += GET(cc, 1); while (*cc == OP_ALT);
563SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564cc += 1 + LINK_SIZE;
565return cc;
566}
567
568static int no_alternatives(pcre_uchar *cc)
569{
570int count = 0;
571SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
572do
573 {
574 cc += GET(cc, 1);
575 count++;
576 }
577while (*cc == OP_ALT);
578SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
579return count;
580}
581
582/* Functions whose might need modification for all new supported opcodes:
583 next_opcode
584 check_opcode_types
585 set_private_data_ptrs
586 get_framesize
587 init_frame
588 get_private_data_copy_length
589 copy_private_data
590 compile_matchingpath
591 compile_backtrackingpath
592*/
593
594static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
595{
596SLJIT_UNUSED_ARG(common);
597switch(*cc)
598 {
599 case OP_SOD:
600 case OP_SOM:
601 case OP_SET_SOM:
602 case OP_NOT_WORD_BOUNDARY:
603 case OP_WORD_BOUNDARY:
604 case OP_NOT_DIGIT:
605 case OP_DIGIT:
606 case OP_NOT_WHITESPACE:
607 case OP_WHITESPACE:
608 case OP_NOT_WORDCHAR:
609 case OP_WORDCHAR:
610 case OP_ANY:
611 case OP_ALLANY:
612 case OP_NOTPROP:
613 case OP_PROP:
614 case OP_ANYNL:
615 case OP_NOT_HSPACE:
616 case OP_HSPACE:
617 case OP_NOT_VSPACE:
618 case OP_VSPACE:
619 case OP_EXTUNI:
620 case OP_EODN:
621 case OP_EOD:
622 case OP_CIRC:
623 case OP_CIRCM:
624 case OP_DOLL:
625 case OP_DOLLM:
626 case OP_CRSTAR:
627 case OP_CRMINSTAR:
628 case OP_CRPLUS:
629 case OP_CRMINPLUS:
630 case OP_CRQUERY:
631 case OP_CRMINQUERY:
632 case OP_CRRANGE:
633 case OP_CRMINRANGE:
634 case OP_CRPOSSTAR:
635 case OP_CRPOSPLUS:
636 case OP_CRPOSQUERY:
637 case OP_CRPOSRANGE:
638 case OP_CLASS:
639 case OP_NCLASS:
640 case OP_REF:
641 case OP_REFI:
642 case OP_DNREF:
643 case OP_DNREFI:
644 case OP_RECURSE:
645 case OP_CALLOUT:
646 case OP_ALT:
647 case OP_KET:
648 case OP_KETRMAX:
649 case OP_KETRMIN:
650 case OP_KETRPOS:
651 case OP_REVERSE:
652 case OP_ASSERT:
653 case OP_ASSERT_NOT:
654 case OP_ASSERTBACK:
655 case OP_ASSERTBACK_NOT:
656 case OP_ONCE:
657 case OP_ONCE_NC:
658 case OP_BRA:
659 case OP_BRAPOS:
660 case OP_CBRA:
661 case OP_CBRAPOS:
662 case OP_COND:
663 case OP_SBRA:
664 case OP_SBRAPOS:
665 case OP_SCBRA:
666 case OP_SCBRAPOS:
667 case OP_SCOND:
668 case OP_CREF:
669 case OP_DNCREF:
670 case OP_RREF:
671 case OP_DNRREF:
672 case OP_DEF:
673 case OP_BRAZERO:
674 case OP_BRAMINZERO:
675 case OP_BRAPOSZERO:
676 case OP_PRUNE:
677 case OP_SKIP:
678 case OP_THEN:
679 case OP_COMMIT:
680 case OP_FAIL:
681 case OP_ACCEPT:
682 case OP_ASSERT_ACCEPT:
683 case OP_CLOSE:
684 case OP_SKIPZERO:
685 return cc + PRIV(OP_lengths)[*cc];
686
687 case OP_CHAR:
688 case OP_CHARI:
689 case OP_NOT:
690 case OP_NOTI:
691 case OP_STAR:
692 case OP_MINSTAR:
693 case OP_PLUS:
694 case OP_MINPLUS:
695 case OP_QUERY:
696 case OP_MINQUERY:
697 case OP_UPTO:
698 case OP_MINUPTO:
699 case OP_EXACT:
700 case OP_POSSTAR:
701 case OP_POSPLUS:
702 case OP_POSQUERY:
703 case OP_POSUPTO:
704 case OP_STARI:
705 case OP_MINSTARI:
706 case OP_PLUSI:
707 case OP_MINPLUSI:
708 case OP_QUERYI:
709 case OP_MINQUERYI:
710 case OP_UPTOI:
711 case OP_MINUPTOI:
712 case OP_EXACTI:
713 case OP_POSSTARI:
714 case OP_POSPLUSI:
715 case OP_POSQUERYI:
716 case OP_POSUPTOI:
717 case OP_NOTSTAR:
718 case OP_NOTMINSTAR:
719 case OP_NOTPLUS:
720 case OP_NOTMINPLUS:
721 case OP_NOTQUERY:
722 case OP_NOTMINQUERY:
723 case OP_NOTUPTO:
724 case OP_NOTMINUPTO:
725 case OP_NOTEXACT:
726 case OP_NOTPOSSTAR:
727 case OP_NOTPOSPLUS:
728 case OP_NOTPOSQUERY:
729 case OP_NOTPOSUPTO:
730 case OP_NOTSTARI:
731 case OP_NOTMINSTARI:
732 case OP_NOTPLUSI:
733 case OP_NOTMINPLUSI:
734 case OP_NOTQUERYI:
735 case OP_NOTMINQUERYI:
736 case OP_NOTUPTOI:
737 case OP_NOTMINUPTOI:
738 case OP_NOTEXACTI:
739 case OP_NOTPOSSTARI:
740 case OP_NOTPOSPLUSI:
741 case OP_NOTPOSQUERYI:
742 case OP_NOTPOSUPTOI:
743 cc += PRIV(OP_lengths)[*cc];
744#ifdef SUPPORT_UTF
745 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
746#endif
747 return cc;
748
749 /* Special cases. */
750 case OP_TYPESTAR:
751 case OP_TYPEMINSTAR:
752 case OP_TYPEPLUS:
753 case OP_TYPEMINPLUS:
754 case OP_TYPEQUERY:
755 case OP_TYPEMINQUERY:
756 case OP_TYPEUPTO:
757 case OP_TYPEMINUPTO:
758 case OP_TYPEEXACT:
759 case OP_TYPEPOSSTAR:
760 case OP_TYPEPOSPLUS:
761 case OP_TYPEPOSQUERY:
762 case OP_TYPEPOSUPTO:
763 return cc + PRIV(OP_lengths)[*cc] - 1;
764
765 case OP_ANYBYTE:
766#ifdef SUPPORT_UTF
767 if (common->utf) return NULL;
768#endif
769 return cc + 1;
770
771#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
772 case OP_XCLASS:
773 return cc + GET(cc, 1);
774#endif
775
776 case OP_MARK:
777 case OP_PRUNE_ARG:
778 case OP_SKIP_ARG:
779 case OP_THEN_ARG:
780 return cc + 1 + 2 + cc[1];
781
782 default:
783 /* All opcodes are supported now! */
784 SLJIT_ASSERT_STOP();
785 return NULL;
786 }
787}
788
789static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
790{
791int count;
792pcre_uchar *slot;
793pcre_uchar *assert_back_end = cc - 1;
794
795/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
796while (cc < ccend)
797 {
798 switch(*cc)
799 {
800 case OP_SET_SOM:
801 common->has_set_som = TRUE;
802 common->might_be_empty = TRUE;
803 cc += 1;
804 break;
805
806 case OP_REF:
807 case OP_REFI:
808 common->optimized_cbracket[GET2(cc, 1)] = 0;
809 cc += 1 + IMM2_SIZE;
810 break;
811
812 case OP_CBRAPOS:
813 case OP_SCBRAPOS:
814 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
815 cc += 1 + LINK_SIZE + IMM2_SIZE;
816 break;
817
818 case OP_COND:
819 case OP_SCOND:
820 /* Only AUTO_CALLOUT can insert this opcode. We do
821 not intend to support this case. */
822 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
823 return FALSE;
824 cc += 1 + LINK_SIZE;
825 break;
826
827 case OP_CREF:
828 common->optimized_cbracket[GET2(cc, 1)] = 0;
829 cc += 1 + IMM2_SIZE;
830 break;
831
832 case OP_DNREF:
833 case OP_DNREFI:
834 case OP_DNCREF:
835 count = GET2(cc, 1 + IMM2_SIZE);
836 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
837 while (count-- > 0)
838 {
839 common->optimized_cbracket[GET2(slot, 0)] = 0;
840 slot += common->name_entry_size;
841 }
842 cc += 1 + 2 * IMM2_SIZE;
843 break;
844
845 case OP_RECURSE:
846 /* Set its value only once. */
847 if (common->recursive_head_ptr == 0)
848 {
849 common->recursive_head_ptr = common->ovector_start;
850 common->ovector_start += sizeof(sljit_sw);
851 }
852 cc += 1 + LINK_SIZE;
853 break;
854
855 case OP_CALLOUT:
856 if (common->capture_last_ptr == 0)
857 {
858 common->capture_last_ptr = common->ovector_start;
859 common->ovector_start += sizeof(sljit_sw);
860 }
861 cc += 2 + 2 * LINK_SIZE;
862 break;
863
864 case OP_ASSERTBACK:
865 slot = bracketend(cc);
866 if (slot > assert_back_end)
867 assert_back_end = slot;
868 cc += 1 + LINK_SIZE;
869 break;
870
871 case OP_THEN_ARG:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE_ARG:
877 case OP_MARK:
878 if (common->mark_ptr == 0)
879 {
880 common->mark_ptr = common->ovector_start;
881 common->ovector_start += sizeof(sljit_sw);
882 }
883 cc += 1 + 2 + cc[1];
884 break;
885
886 case OP_THEN:
887 common->has_then = TRUE;
888 common->control_head_ptr = 1;
889 cc += 1;
890 break;
891
892 case OP_SKIP:
893 if (cc < assert_back_end)
894 common->has_skip_in_assert_back = TRUE;
895 cc += 1;
896 break;
897
898 case OP_SKIP_ARG:
899 common->control_head_ptr = 1;
900 common->has_skip_arg = TRUE;
901 if (cc < assert_back_end)
902 common->has_skip_in_assert_back = TRUE;
903 cc += 1 + 2 + cc[1];
904 break;
905
906 default:
907 cc = next_opcode(common, cc);
908 if (cc == NULL)
909 return FALSE;
910 break;
911 }
912 }
913return TRUE;
914}
915
916static BOOL is_accelerated_repeat(pcre_uchar *cc)
917{
918switch(*cc)
919 {
920 case OP_TYPESTAR:
921 case OP_TYPEMINSTAR:
922 case OP_TYPEPLUS:
923 case OP_TYPEMINPLUS:
924 case OP_TYPEPOSSTAR:
925 case OP_TYPEPOSPLUS:
926 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
927
928 case OP_STAR:
929 case OP_MINSTAR:
930 case OP_PLUS:
931 case OP_MINPLUS:
932 case OP_POSSTAR:
933 case OP_POSPLUS:
934
935 case OP_STARI:
936 case OP_MINSTARI:
937 case OP_PLUSI:
938 case OP_MINPLUSI:
939 case OP_POSSTARI:
940 case OP_POSPLUSI:
941
942 case OP_NOTSTAR:
943 case OP_NOTMINSTAR:
944 case OP_NOTPLUS:
945 case OP_NOTMINPLUS:
946 case OP_NOTPOSSTAR:
947 case OP_NOTPOSPLUS:
948
949 case OP_NOTSTARI:
950 case OP_NOTMINSTARI:
951 case OP_NOTPLUSI:
952 case OP_NOTMINPLUSI:
953 case OP_NOTPOSSTARI:
954 case OP_NOTPOSPLUSI:
955 return TRUE;
956
957 case OP_CLASS:
958 case OP_NCLASS:
959#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
960 case OP_XCLASS:
961 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
962#else
963 cc += (1 + (32 / sizeof(pcre_uchar)));
964#endif
965
966 switch(*cc)
967 {
968 case OP_CRSTAR:
969 case OP_CRMINSTAR:
970 case OP_CRPLUS:
971 case OP_CRMINPLUS:
972 case OP_CRPOSSTAR:
973 case OP_CRPOSPLUS:
974 return TRUE;
975 }
976 break;
977 }
978return FALSE;
979}
980
981static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
982{
983pcre_uchar *cc = common->start;
984pcre_uchar *end;
985
986/* Skip not repeated brackets. */
987while (TRUE)
988 {
989 switch(*cc)
990 {
991 case OP_SOD:
992 case OP_SOM:
993 case OP_SET_SOM:
994 case OP_NOT_WORD_BOUNDARY:
995 case OP_WORD_BOUNDARY:
996 case OP_EODN:
997 case OP_EOD:
998 case OP_CIRC:
999 case OP_CIRCM:
1000 case OP_DOLL:
1001 case OP_DOLLM:
1002 /* Zero width assertions. */
1003 cc++;
1004 continue;
1005 }
1006
1007 if (*cc != OP_BRA && *cc != OP_CBRA)
1008 break;
1009
1010 end = cc + GET(cc, 1);
1011 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1012 return FALSE;
1013 if (*cc == OP_CBRA)
1014 {
1015 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1016 return FALSE;
1017 cc += IMM2_SIZE;
1018 }
1019 cc += 1 + LINK_SIZE;
1020 }
1021
1022if (is_accelerated_repeat(cc))
1023 {
1024 common->fast_forward_bc_ptr = cc;
1025 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1026 *private_data_start += sizeof(sljit_sw);
1027 return TRUE;
1028 }
1029return FALSE;
1030}
1031
1032static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1033{
1034 pcre_uchar *next_alt;
1035
1036 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1037
1038 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1039 return;
1040
1041 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1042 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1043 return;
1044
1045 do
1046 {
1047 next_alt = cc + GET(cc, 1);
1048
1049 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1050
1051 while (TRUE)
1052 {
1053 switch(*cc)
1054 {
1055 case OP_SOD:
1056 case OP_SOM:
1057 case OP_SET_SOM:
1058 case OP_NOT_WORD_BOUNDARY:
1059 case OP_WORD_BOUNDARY:
1060 case OP_EODN:
1061 case OP_EOD:
1062 case OP_CIRC:
1063 case OP_CIRCM:
1064 case OP_DOLL:
1065 case OP_DOLLM:
1066 /* Zero width assertions. */
1067 cc++;
1068 continue;
1069 }
1070 break;
1071 }
1072
1073 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1074 detect_fast_fail(common, cc, private_data_start, depth - 1);
1075
1076 if (is_accelerated_repeat(cc))
1077 {
1078 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1079
1080 if (common->fast_fail_start_ptr == 0)
1081 common->fast_fail_start_ptr = *private_data_start;
1082
1083 *private_data_start += sizeof(sljit_sw);
1084 common->fast_fail_end_ptr = *private_data_start;
1085
1086 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1087 return;
1088 }
1089
1090 cc = next_alt;
1091 }
1092 while (*cc == OP_ALT);
1093}
1094
1095static int get_class_iterator_size(pcre_uchar *cc)
1096{
1097sljit_u32 min;
1098sljit_u32 max;
1099switch(*cc)
1100 {
1101 case OP_CRSTAR:
1102 case OP_CRPLUS:
1103 return 2;
1104
1105 case OP_CRMINSTAR:
1106 case OP_CRMINPLUS:
1107 case OP_CRQUERY:
1108 case OP_CRMINQUERY:
1109 return 1;
1110
1111 case OP_CRRANGE:
1112 case OP_CRMINRANGE:
1113 min = GET2(cc, 1);
1114 max = GET2(cc, 1 + IMM2_SIZE);
1115 if (max == 0)
1116 return (*cc == OP_CRRANGE) ? 2 : 1;
1117 max -= min;
1118 if (max > 2)
1119 max = 2;
1120 return max;
1121
1122 default:
1123 return 0;
1124 }
1125}
1126
1127static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1128{
1129pcre_uchar *end = bracketend(begin);
1130pcre_uchar *next;
1131pcre_uchar *next_end;
1132pcre_uchar *max_end;
1133pcre_uchar type;
1134sljit_sw length = end - begin;
1135int min, max, i;
1136
1137/* Detect fixed iterations first. */
1138if (end[-(1 + LINK_SIZE)] != OP_KET)
1139 return FALSE;
1140
1141/* Already detected repeat. */
1142if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1143 return TRUE;
1144
1145next = end;
1146min = 1;
1147while (1)
1148 {
1149 if (*next != *begin)
1150 break;
1151 next_end = bracketend(next);
1152 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1153 break;
1154 next = next_end;
1155 min++;
1156 }
1157
1158if (min == 2)
1159 return FALSE;
1160
1161max = 0;
1162max_end = next;
1163if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1164 {
1165 type = *next;
1166 while (1)
1167 {
1168 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1169 break;
1170 next_end = bracketend(next + 2 + LINK_SIZE);
1171 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1172 break;
1173 next = next_end;
1174 max++;
1175 }
1176
1177 if (next[0] == type && next[1] == *begin && max >= 1)
1178 {
1179 next_end = bracketend(next + 1);
1180 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1181 {
1182 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1183 if (*next_end != OP_KET)
1184 break;
1185
1186 if (i == max)
1187 {
1188 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1189 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1190 /* +2 the original and the last. */
1191 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1192 if (min == 1)
1193 return TRUE;
1194 min--;
1195 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1196 }
1197 }
1198 }
1199 }
1200
1201if (min >= 3)
1202 {
1203 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1204 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1205 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1206 return TRUE;
1207 }
1208
1209return FALSE;
1210}
1211
1212#define CASE_ITERATOR_PRIVATE_DATA_1 \
1213 case OP_MINSTAR: \
1214 case OP_MINPLUS: \
1215 case OP_QUERY: \
1216 case OP_MINQUERY: \
1217 case OP_MINSTARI: \
1218 case OP_MINPLUSI: \
1219 case OP_QUERYI: \
1220 case OP_MINQUERYI: \
1221 case OP_NOTMINSTAR: \
1222 case OP_NOTMINPLUS: \
1223 case OP_NOTQUERY: \
1224 case OP_NOTMINQUERY: \
1225 case OP_NOTMINSTARI: \
1226 case OP_NOTMINPLUSI: \
1227 case OP_NOTQUERYI: \
1228 case OP_NOTMINQUERYI:
1229
1230#define CASE_ITERATOR_PRIVATE_DATA_2A \
1231 case OP_STAR: \
1232 case OP_PLUS: \
1233 case OP_STARI: \
1234 case OP_PLUSI: \
1235 case OP_NOTSTAR: \
1236 case OP_NOTPLUS: \
1237 case OP_NOTSTARI: \
1238 case OP_NOTPLUSI:
1239
1240#define CASE_ITERATOR_PRIVATE_DATA_2B \
1241 case OP_UPTO: \
1242 case OP_MINUPTO: \
1243 case OP_UPTOI: \
1244 case OP_MINUPTOI: \
1245 case OP_NOTUPTO: \
1246 case OP_NOTMINUPTO: \
1247 case OP_NOTUPTOI: \
1248 case OP_NOTMINUPTOI:
1249
1250#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1251 case OP_TYPEMINSTAR: \
1252 case OP_TYPEMINPLUS: \
1253 case OP_TYPEQUERY: \
1254 case OP_TYPEMINQUERY:
1255
1256#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1257 case OP_TYPESTAR: \
1258 case OP_TYPEPLUS:
1259
1260#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1261 case OP_TYPEUPTO: \
1262 case OP_TYPEMINUPTO:
1263
1264static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1265{
1266pcre_uchar *cc = common->start;
1267pcre_uchar *alternative;
1268pcre_uchar *end = NULL;
1269int private_data_ptr = *private_data_start;
1270int space, size, bracketlen;
1271BOOL repeat_check = TRUE;
1272
1273while (cc < ccend)
1274 {
1275 space = 0;
1276 size = 0;
1277 bracketlen = 0;
1278 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1279 break;
1280
1281 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1282 {
1283 if (detect_repeat(common, cc))
1284 {
1285 /* These brackets are converted to repeats, so no global
1286 based single character repeat is allowed. */
1287 if (cc >= end)
1288 end = bracketend(cc);
1289 }
1290 }
1291 repeat_check = TRUE;
1292
1293 switch(*cc)
1294 {
1295 case OP_KET:
1296 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1297 {
1298 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1299 private_data_ptr += sizeof(sljit_sw);
1300 cc += common->private_data_ptrs[cc + 1 - common->start];
1301 }
1302 cc += 1 + LINK_SIZE;
1303 break;
1304
1305 case OP_ASSERT:
1306 case OP_ASSERT_NOT:
1307 case OP_ASSERTBACK:
1308 case OP_ASSERTBACK_NOT:
1309 case OP_ONCE:
1310 case OP_ONCE_NC:
1311 case OP_BRAPOS:
1312 case OP_SBRA:
1313 case OP_SBRAPOS:
1314 case OP_SCOND:
1315 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1316 private_data_ptr += sizeof(sljit_sw);
1317 bracketlen = 1 + LINK_SIZE;
1318 break;
1319
1320 case OP_CBRAPOS:
1321 case OP_SCBRAPOS:
1322 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1323 private_data_ptr += sizeof(sljit_sw);
1324 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1325 break;
1326
1327 case OP_COND:
1328 /* Might be a hidden SCOND. */
1329 alternative = cc + GET(cc, 1);
1330 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1331 {
1332 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1333 private_data_ptr += sizeof(sljit_sw);
1334 }
1335 bracketlen = 1 + LINK_SIZE;
1336 break;
1337
1338 case OP_BRA:
1339 bracketlen = 1 + LINK_SIZE;
1340 break;
1341
1342 case OP_CBRA:
1343 case OP_SCBRA:
1344 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1345 break;
1346
1347 case OP_BRAZERO:
1348 case OP_BRAMINZERO:
1349 case OP_BRAPOSZERO:
1350 repeat_check = FALSE;
1351 size = 1;
1352 break;
1353
1354 CASE_ITERATOR_PRIVATE_DATA_1
1355 space = 1;
1356 size = -2;
1357 break;
1358
1359 CASE_ITERATOR_PRIVATE_DATA_2A
1360 space = 2;
1361 size = -2;
1362 break;
1363
1364 CASE_ITERATOR_PRIVATE_DATA_2B
1365 space = 2;
1366 size = -(2 + IMM2_SIZE);
1367 break;
1368
1369 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1370 space = 1;
1371 size = 1;
1372 break;
1373
1374 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1375 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1376 space = 2;
1377 size = 1;
1378 break;
1379
1380 case OP_TYPEUPTO:
1381 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1382 space = 2;
1383 size = 1 + IMM2_SIZE;
1384 break;
1385
1386 case OP_TYPEMINUPTO:
1387 space = 2;
1388 size = 1 + IMM2_SIZE;
1389 break;
1390
1391 case OP_CLASS:
1392 case OP_NCLASS:
1393 space = get_class_iterator_size(cc + size);
1394 size = 1 + 32 / sizeof(pcre_uchar);
1395 break;
1396
1397#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1398 case OP_XCLASS:
1399 space = get_class_iterator_size(cc + size);
1400 size = GET(cc, 1);
1401 break;
1402#endif
1403
1404 default:
1405 cc = next_opcode(common, cc);
1406 SLJIT_ASSERT(cc != NULL);
1407 break;
1408 }
1409
1410 /* Character iterators, which are not inside a repeated bracket,
1411 gets a private slot instead of allocating it on the stack. */
1412 if (space > 0 && cc >= end)
1413 {
1414 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1415 private_data_ptr += sizeof(sljit_sw) * space;
1416 }
1417
1418 if (size != 0)
1419 {
1420 if (size < 0)
1421 {
1422 cc += -size;
1423#ifdef SUPPORT_UTF
1424 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1425#endif
1426 }
1427 else
1428 cc += size;
1429 }
1430
1431 if (bracketlen > 0)
1432 {
1433 if (cc >= end)
1434 {
1435 end = bracketend(cc);
1436 if (end[-1 - LINK_SIZE] == OP_KET)
1437 end = NULL;
1438 }
1439 cc += bracketlen;
1440 }
1441 }
1442*private_data_start = private_data_ptr;
1443}
1444
1445/* Returns with a frame_types (always < 0) if no need for frame. */
1446static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1447{
1448int length = 0;
1449int possessive = 0;
1450BOOL stack_restore = FALSE;
1451BOOL setsom_found = recursive;
1452BOOL setmark_found = recursive;
1453/* The last capture is a local variable even for recursions. */
1454BOOL capture_last_found = FALSE;
1455
1456#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1457SLJIT_ASSERT(common->control_head_ptr != 0);
1458*needs_control_head = TRUE;
1459#else
1460*needs_control_head = FALSE;
1461#endif
1462
1463if (ccend == NULL)
1464 {
1465 ccend = bracketend(cc) - (1 + LINK_SIZE);
1466 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1467 {
1468 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1469 /* This is correct regardless of common->capture_last_ptr. */
1470 capture_last_found = TRUE;
1471 }
1472 cc = next_opcode(common, cc);
1473 }
1474
1475SLJIT_ASSERT(cc != NULL);
1476while (cc < ccend)
1477 switch(*cc)
1478 {
1479 case OP_SET_SOM:
1480 SLJIT_ASSERT(common->has_set_som);
1481 stack_restore = TRUE;
1482 if (!setsom_found)
1483 {
1484 length += 2;
1485 setsom_found = TRUE;
1486 }
1487 cc += 1;
1488 break;
1489
1490 case OP_MARK:
1491 case OP_PRUNE_ARG:
1492 case OP_THEN_ARG:
1493 SLJIT_ASSERT(common->mark_ptr != 0);
1494 stack_restore = TRUE;
1495 if (!setmark_found)
1496 {
1497 length += 2;
1498 setmark_found = TRUE;
1499 }
1500 if (common->control_head_ptr != 0)
1501 *needs_control_head = TRUE;
1502 cc += 1 + 2 + cc[1];
1503 break;
1504
1505 case OP_RECURSE:
1506 stack_restore = TRUE;
1507 if (common->has_set_som && !setsom_found)
1508 {
1509 length += 2;
1510 setsom_found = TRUE;
1511 }
1512 if (common->mark_ptr != 0 && !setmark_found)
1513 {
1514 length += 2;
1515 setmark_found = TRUE;
1516 }
1517 if (common->capture_last_ptr != 0 && !capture_last_found)
1518 {
1519 length += 2;
1520 capture_last_found = TRUE;
1521 }
1522 cc += 1 + LINK_SIZE;
1523 break;
1524
1525 case OP_CBRA:
1526 case OP_CBRAPOS:
1527 case OP_SCBRA:
1528 case OP_SCBRAPOS:
1529 stack_restore = TRUE;
1530 if (common->capture_last_ptr != 0 && !capture_last_found)
1531 {
1532 length += 2;
1533 capture_last_found = TRUE;
1534 }
1535 length += 3;
1536 cc += 1 + LINK_SIZE + IMM2_SIZE;
1537 break;
1538
1539 case OP_THEN:
1540 stack_restore = TRUE;
1541 if (common->control_head_ptr != 0)
1542 *needs_control_head = TRUE;
1543 cc ++;
1544 break;
1545
1546 default:
1547 stack_restore = TRUE;
1548 /* Fall through. */
1549
1550 case OP_NOT_WORD_BOUNDARY:
1551 case OP_WORD_BOUNDARY:
1552 case OP_NOT_DIGIT:
1553 case OP_DIGIT:
1554 case OP_NOT_WHITESPACE:
1555 case OP_WHITESPACE:
1556 case OP_NOT_WORDCHAR:
1557 case OP_WORDCHAR:
1558 case OP_ANY:
1559 case OP_ALLANY:
1560 case OP_ANYBYTE:
1561 case OP_NOTPROP:
1562 case OP_PROP:
1563 case OP_ANYNL:
1564 case OP_NOT_HSPACE:
1565 case OP_HSPACE:
1566 case OP_NOT_VSPACE:
1567 case OP_VSPACE:
1568 case OP_EXTUNI:
1569 case OP_EODN:
1570 case OP_EOD:
1571 case OP_CIRC:
1572 case OP_CIRCM:
1573 case OP_DOLL:
1574 case OP_DOLLM:
1575 case OP_CHAR:
1576 case OP_CHARI:
1577 case OP_NOT:
1578 case OP_NOTI:
1579
1580 case OP_EXACT:
1581 case OP_POSSTAR:
1582 case OP_POSPLUS:
1583 case OP_POSQUERY:
1584 case OP_POSUPTO:
1585
1586 case OP_EXACTI:
1587 case OP_POSSTARI:
1588 case OP_POSPLUSI:
1589 case OP_POSQUERYI:
1590 case OP_POSUPTOI:
1591
1592 case OP_NOTEXACT:
1593 case OP_NOTPOSSTAR:
1594 case OP_NOTPOSPLUS:
1595 case OP_NOTPOSQUERY:
1596 case OP_NOTPOSUPTO:
1597
1598 case OP_NOTEXACTI:
1599 case OP_NOTPOSSTARI:
1600 case OP_NOTPOSPLUSI:
1601 case OP_NOTPOSQUERYI:
1602 case OP_NOTPOSUPTOI:
1603
1604 case OP_TYPEEXACT:
1605 case OP_TYPEPOSSTAR:
1606 case OP_TYPEPOSPLUS:
1607 case OP_TYPEPOSQUERY:
1608 case OP_TYPEPOSUPTO:
1609
1610 case OP_CLASS:
1611 case OP_NCLASS:
1612 case OP_XCLASS:
1613 case OP_CALLOUT:
1614
1615 cc = next_opcode(common, cc);
1616 SLJIT_ASSERT(cc != NULL);
1617 break;
1618 }
1619
1620/* Possessive quantifiers can use a special case. */
1621if (SLJIT_UNLIKELY(possessive == length))
1622 return stack_restore ? no_frame : no_stack;
1623
1624if (length > 0)
1625 return length + 1;
1626return stack_restore ? no_frame : no_stack;
1627}
1628
1629static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1630{
1631DEFINE_COMPILER;
1632BOOL setsom_found = recursive;
1633BOOL setmark_found = recursive;
1634/* The last capture is a local variable even for recursions. */
1635BOOL capture_last_found = FALSE;
1636int offset;
1637
1638/* >= 1 + shortest item size (2) */
1639SLJIT_UNUSED_ARG(stacktop);
1640SLJIT_ASSERT(stackpos >= stacktop + 2);
1641
1642stackpos = STACK(stackpos);
1643if (ccend == NULL)
1644 {
1645 ccend = bracketend(cc) - (1 + LINK_SIZE);
1646 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1647 cc = next_opcode(common, cc);
1648 }
1649
1650SLJIT_ASSERT(cc != NULL);
1651while (cc < ccend)
1652 switch(*cc)
1653 {
1654 case OP_SET_SOM:
1655 SLJIT_ASSERT(common->has_set_som);
1656 if (!setsom_found)
1657 {
1658 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1659 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1660 stackpos += (int)sizeof(sljit_sw);
1661 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1662 stackpos += (int)sizeof(sljit_sw);
1663 setsom_found = TRUE;
1664 }
1665 cc += 1;
1666 break;
1667
1668 case OP_MARK:
1669 case OP_PRUNE_ARG:
1670 case OP_THEN_ARG:
1671 SLJIT_ASSERT(common->mark_ptr != 0);
1672 if (!setmark_found)
1673 {
1674 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1676 stackpos += (int)sizeof(sljit_sw);
1677 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1678 stackpos += (int)sizeof(sljit_sw);
1679 setmark_found = TRUE;
1680 }
1681 cc += 1 + 2 + cc[1];
1682 break;
1683
1684 case OP_RECURSE:
1685 if (common->has_set_som && !setsom_found)
1686 {
1687 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1688 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1689 stackpos += (int)sizeof(sljit_sw);
1690 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1691 stackpos += (int)sizeof(sljit_sw);
1692 setsom_found = TRUE;
1693 }
1694 if (common->mark_ptr != 0 && !setmark_found)
1695 {
1696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1697 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1698 stackpos += (int)sizeof(sljit_sw);
1699 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1700 stackpos += (int)sizeof(sljit_sw);
1701 setmark_found = TRUE;
1702 }
1703 if (common->capture_last_ptr != 0 && !capture_last_found)
1704 {
1705 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1706 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1707 stackpos += (int)sizeof(sljit_sw);
1708 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1709 stackpos += (int)sizeof(sljit_sw);
1710 capture_last_found = TRUE;
1711 }
1712 cc += 1 + LINK_SIZE;
1713 break;
1714
1715 case OP_CBRA:
1716 case OP_CBRAPOS:
1717 case OP_SCBRA:
1718 case OP_SCBRAPOS:
1719 if (common->capture_last_ptr != 0 && !capture_last_found)
1720 {
1721 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1723 stackpos += (int)sizeof(sljit_sw);
1724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1725 stackpos += (int)sizeof(sljit_sw);
1726 capture_last_found = TRUE;
1727 }
1728 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1730 stackpos += (int)sizeof(sljit_sw);
1731 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1732 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1733 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1734 stackpos += (int)sizeof(sljit_sw);
1735 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1736 stackpos += (int)sizeof(sljit_sw);
1737
1738 cc += 1 + LINK_SIZE + IMM2_SIZE;
1739 break;
1740
1741 default:
1742 cc = next_opcode(common, cc);
1743 SLJIT_ASSERT(cc != NULL);
1744 break;
1745 }
1746
1747OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1748SLJIT_ASSERT(stackpos == STACK(stacktop));
1749}
1750
1751static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1752{
1753int private_data_length = needs_control_head ? 3 : 2;
1754int size;
1755pcre_uchar *alternative;
1756/* Calculate the sum of the private machine words. */
1757while (cc < ccend)
1758 {
1759 size = 0;
1760 switch(*cc)
1761 {
1762 case OP_KET:
1763 if (PRIVATE_DATA(cc) != 0)
1764 {
1765 private_data_length++;
1766 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1767 cc += PRIVATE_DATA(cc + 1);
1768 }
1769 cc += 1 + LINK_SIZE;
1770 break;
1771
1772 case OP_ASSERT:
1773 case OP_ASSERT_NOT:
1774 case OP_ASSERTBACK:
1775 case OP_ASSERTBACK_NOT:
1776 case OP_ONCE:
1777 case OP_ONCE_NC:
1778 case OP_BRAPOS:
1779 case OP_SBRA:
1780 case OP_SBRAPOS:
1781 case OP_SCOND:
1782 private_data_length++;
1783 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1784 cc += 1 + LINK_SIZE;
1785 break;
1786
1787 case OP_CBRA:
1788 case OP_SCBRA:
1789 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1790 private_data_length++;
1791 cc += 1 + LINK_SIZE + IMM2_SIZE;
1792 break;
1793
1794 case OP_CBRAPOS:
1795 case OP_SCBRAPOS:
1796 private_data_length += 2;
1797 cc += 1 + LINK_SIZE + IMM2_SIZE;
1798 break;
1799
1800 case OP_COND:
1801 /* Might be a hidden SCOND. */
1802 alternative = cc + GET(cc, 1);
1803 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1804 private_data_length++;
1805 cc += 1 + LINK_SIZE;
1806 break;
1807
1808 CASE_ITERATOR_PRIVATE_DATA_1
1809 if (PRIVATE_DATA(cc))
1810 private_data_length++;
1811 cc += 2;
1812#ifdef SUPPORT_UTF
1813 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1814#endif
1815 break;
1816
1817 CASE_ITERATOR_PRIVATE_DATA_2A
1818 if (PRIVATE_DATA(cc))
1819 private_data_length += 2;
1820 cc += 2;
1821#ifdef SUPPORT_UTF
1822 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1823#endif
1824 break;
1825
1826 CASE_ITERATOR_PRIVATE_DATA_2B
1827 if (PRIVATE_DATA(cc))
1828 private_data_length += 2;
1829 cc += 2 + IMM2_SIZE;
1830#ifdef SUPPORT_UTF
1831 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1832#endif
1833 break;
1834
1835 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1836 if (PRIVATE_DATA(cc))
1837 private_data_length++;
1838 cc += 1;
1839 break;
1840
1841 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1842 if (PRIVATE_DATA(cc))
1843 private_data_length += 2;
1844 cc += 1;
1845 break;
1846
1847 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1848 if (PRIVATE_DATA(cc))
1849 private_data_length += 2;
1850 cc += 1 + IMM2_SIZE;
1851 break;
1852
1853 case OP_CLASS:
1854 case OP_NCLASS:
1855#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1856 case OP_XCLASS:
1857 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1858#else
1859 size = 1 + 32 / (int)sizeof(pcre_uchar);
1860#endif
1861 if (PRIVATE_DATA(cc))
1862 private_data_length += get_class_iterator_size(cc + size);
1863 cc += size;
1864 break;
1865
1866 default:
1867 cc = next_opcode(common, cc);
1868 SLJIT_ASSERT(cc != NULL);
1869 break;
1870 }
1871 }
1872SLJIT_ASSERT(cc == ccend);
1873return private_data_length;
1874}
1875
1876static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1877 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1878{
1879DEFINE_COMPILER;
1880int srcw[2];
1881int count, size;
1882BOOL tmp1next = TRUE;
1883BOOL tmp1empty = TRUE;
1884BOOL tmp2empty = TRUE;
1885pcre_uchar *alternative;
1886enum {
1887 start,
1888 loop,
1889 end
1890} status;
1891
1892status = save ? start : loop;
1893stackptr = STACK(stackptr - 2);
1894stacktop = STACK(stacktop - 1);
1895
1896if (!save)
1897 {
1898 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1899 if (stackptr < stacktop)
1900 {
1901 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1902 stackptr += sizeof(sljit_sw);
1903 tmp1empty = FALSE;
1904 }
1905 if (stackptr < stacktop)
1906 {
1907 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1908 stackptr += sizeof(sljit_sw);
1909 tmp2empty = FALSE;
1910 }
1911 /* The tmp1next must be TRUE in either way. */
1912 }
1913
1914do
1915 {
1916 count = 0;
1917 switch(status)
1918 {
1919 case start:
1920 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1921 count = 1;
1922 srcw[0] = common->recursive_head_ptr;
1923 if (needs_control_head)
1924 {
1925 SLJIT_ASSERT(common->control_head_ptr != 0);
1926 count = 2;
1927 srcw[1] = common->control_head_ptr;
1928 }
1929 status = loop;
1930 break;
1931
1932 case loop:
1933 if (cc >= ccend)
1934 {
1935 status = end;
1936 break;
1937 }
1938
1939 switch(*cc)
1940 {
1941 case OP_KET:
1942 if (PRIVATE_DATA(cc) != 0)
1943 {
1944 count = 1;
1945 srcw[0] = PRIVATE_DATA(cc);
1946 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1947 cc += PRIVATE_DATA(cc + 1);
1948 }
1949 cc += 1 + LINK_SIZE;
1950 break;
1951
1952 case OP_ASSERT:
1953 case OP_ASSERT_NOT:
1954 case OP_ASSERTBACK:
1955 case OP_ASSERTBACK_NOT:
1956 case OP_ONCE:
1957 case OP_ONCE_NC:
1958 case OP_BRAPOS:
1959 case OP_SBRA:
1960 case OP_SBRAPOS:
1961 case OP_SCOND:
1962 count = 1;
1963 srcw[0] = PRIVATE_DATA(cc);
1964 SLJIT_ASSERT(srcw[0] != 0);
1965 cc += 1 + LINK_SIZE;
1966 break;
1967
1968 case OP_CBRA:
1969 case OP_SCBRA:
1970 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1971 {
1972 count = 1;
1973 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1974 }
1975 cc += 1 + LINK_SIZE + IMM2_SIZE;
1976 break;
1977
1978 case OP_CBRAPOS:
1979 case OP_SCBRAPOS:
1980 count = 2;
1981 srcw[0] = PRIVATE_DATA(cc);
1982 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1983 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1984 cc += 1 + LINK_SIZE + IMM2_SIZE;
1985 break;
1986
1987 case OP_COND:
1988 /* Might be a hidden SCOND. */
1989 alternative = cc + GET(cc, 1);
1990 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1991 {
1992 count = 1;
1993 srcw[0] = PRIVATE_DATA(cc);
1994 SLJIT_ASSERT(srcw[0] != 0);
1995 }
1996 cc += 1 + LINK_SIZE;
1997 break;
1998
1999 CASE_ITERATOR_PRIVATE_DATA_1
2000 if (PRIVATE_DATA(cc))
2001 {
2002 count = 1;
2003 srcw[0] = PRIVATE_DATA(cc);
2004 }
2005 cc += 2;
2006#ifdef SUPPORT_UTF
2007 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2008#endif
2009 break;
2010
2011 CASE_ITERATOR_PRIVATE_DATA_2A
2012 if (PRIVATE_DATA(cc))
2013 {
2014 count = 2;
2015 srcw[0] = PRIVATE_DATA(cc);
2016 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2017 }
2018 cc += 2;
2019#ifdef SUPPORT_UTF
2020 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2021#endif
2022 break;
2023
2024 CASE_ITERATOR_PRIVATE_DATA_2B
2025 if (PRIVATE_DATA(cc))
2026 {
2027 count = 2;
2028 srcw[0] = PRIVATE_DATA(cc);
2029 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2030 }
2031 cc += 2 + IMM2_SIZE;
2032#ifdef SUPPORT_UTF
2033 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2034#endif
2035 break;
2036
2037 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2038 if (PRIVATE_DATA(cc))
2039 {
2040 count = 1;
2041 srcw[0] = PRIVATE_DATA(cc);
2042 }
2043 cc += 1;
2044 break;
2045
2046 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2047 if (PRIVATE_DATA(cc))
2048 {
2049 count = 2;
2050 srcw[0] = PRIVATE_DATA(cc);
2051 srcw[1] = srcw[0] + sizeof(sljit_sw);
2052 }
2053 cc += 1;
2054 break;
2055
2056 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2057 if (PRIVATE_DATA(cc))
2058 {
2059 count = 2;
2060 srcw[0] = PRIVATE_DATA(cc);
2061 srcw[1] = srcw[0] + sizeof(sljit_sw);
2062 }
2063 cc += 1 + IMM2_SIZE;
2064 break;
2065
2066 case OP_CLASS:
2067 case OP_NCLASS:
2068#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2069 case OP_XCLASS:
2070 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2071#else
2072 size = 1 + 32 / (int)sizeof(pcre_uchar);
2073#endif
2074 if (PRIVATE_DATA(cc))
2075 switch(get_class_iterator_size(cc + size))
2076 {
2077 case 1:
2078 count = 1;
2079 srcw[0] = PRIVATE_DATA(cc);
2080 break;
2081
2082 case 2:
2083 count = 2;
2084 srcw[0] = PRIVATE_DATA(cc);
2085 srcw[1] = srcw[0] + sizeof(sljit_sw);
2086 break;
2087
2088 default:
2089 SLJIT_ASSERT_STOP();
2090 break;
2091 }
2092 cc += size;
2093 break;
2094
2095 default:
2096 cc = next_opcode(common, cc);
2097 SLJIT_ASSERT(cc != NULL);
2098 break;
2099 }
2100 break;
2101
2102 case end:
2103 SLJIT_ASSERT_STOP();
2104 break;
2105 }
2106
2107 while (count > 0)
2108 {
2109 count--;
2110 if (save)
2111 {
2112 if (tmp1next)
2113 {
2114 if (!tmp1empty)
2115 {
2116 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2117 stackptr += sizeof(sljit_sw);
2118 }
2119 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2120 tmp1empty = FALSE;
2121 tmp1next = FALSE;
2122 }
2123 else
2124 {
2125 if (!tmp2empty)
2126 {
2127 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2128 stackptr += sizeof(sljit_sw);
2129 }
2130 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2131 tmp2empty = FALSE;
2132 tmp1next = TRUE;
2133 }
2134 }
2135 else
2136 {
2137 if (tmp1next)
2138 {
2139 SLJIT_ASSERT(!tmp1empty);
2140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2141 tmp1empty = stackptr >= stacktop;
2142 if (!tmp1empty)
2143 {
2144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2145 stackptr += sizeof(sljit_sw);
2146 }
2147 tmp1next = FALSE;
2148 }
2149 else
2150 {
2151 SLJIT_ASSERT(!tmp2empty);
2152 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2153 tmp2empty = stackptr >= stacktop;
2154 if (!tmp2empty)
2155 {
2156 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2157 stackptr += sizeof(sljit_sw);
2158 }
2159 tmp1next = TRUE;
2160 }
2161 }
2162 }
2163 }
2164while (status != end);
2165
2166if (save)
2167 {
2168 if (tmp1next)
2169 {
2170 if (!tmp1empty)
2171 {
2172 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2173 stackptr += sizeof(sljit_sw);
2174 }
2175 if (!tmp2empty)
2176 {
2177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2178 stackptr += sizeof(sljit_sw);
2179 }
2180 }
2181 else
2182 {
2183 if (!tmp2empty)
2184 {
2185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2186 stackptr += sizeof(sljit_sw);
2187 }
2188 if (!tmp1empty)
2189 {
2190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2191 stackptr += sizeof(sljit_sw);
2192 }
2193 }
2194 }
2195SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2196}
2197
2198static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2199{
2200pcre_uchar *end = bracketend(cc);
2201BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2202
2203/* Assert captures then. */
2204if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2205 current_offset = NULL;
2206/* Conditional block does not. */
2207if (*cc == OP_COND || *cc == OP_SCOND)
2208 has_alternatives = FALSE;
2209
2210cc = next_opcode(common, cc);
2211if (has_alternatives)
2212 current_offset = common->then_offsets + (cc - common->start);
2213
2214while (cc < end)
2215 {
2216 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2217 cc = set_then_offsets(common, cc, current_offset);
2218 else
2219 {
2220 if (*cc == OP_ALT && has_alternatives)
2221 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2222 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2223 *current_offset = 1;
2224 cc = next_opcode(common, cc);
2225 }
2226 }
2227
2228return end;
2229}
2230
2231#undef CASE_ITERATOR_PRIVATE_DATA_1
2232#undef CASE_ITERATOR_PRIVATE_DATA_2A
2233#undef CASE_ITERATOR_PRIVATE_DATA_2B
2234#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2235#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2236#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2237
2238static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2239{
2240return (value & (value - 1)) == 0;
2241}
2242
2243static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2244{
2245while (list)
2246 {
2247 /* sljit_set_label is clever enough to do nothing
2248 if either the jump or the label is NULL. */
2249 SET_LABEL(list->jump, label);
2250 list = list->next;
2251 }
2252}
2253
2254static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2255{
2256jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2257if (list_item)
2258 {
2259 list_item->next = *list;
2260 list_item->jump = jump;
2261 *list = list_item;
2262 }
2263}
2264
2265static void add_stub(compiler_common *common, struct sljit_jump *start)
2266{
2267DEFINE_COMPILER;
2268stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2269
2270if (list_item)
2271 {
2272 list_item->start = start;
2273 list_item->quit = LABEL();
2274 list_item->next = common->stubs;
2275 common->stubs = list_item;
2276 }
2277}
2278
2279static void flush_stubs(compiler_common *common)
2280{
2281DEFINE_COMPILER;
2282stub_list *list_item = common->stubs;
2283
2284while (list_item)
2285 {
2286 JUMPHERE(list_item->start);
2287 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2288 JUMPTO(SLJIT_JUMP, list_item->quit);
2289 list_item = list_item->next;
2290 }
2291common->stubs = NULL;
2292}
2293
2294static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2295{
2296DEFINE_COMPILER;
2297label_addr_list *label_addr;
2298
2299label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2300if (label_addr == NULL)
2301 return;
2302label_addr->label = LABEL();
2303label_addr->update_addr = update_addr;
2304label_addr->next = common->label_addrs;
2305common->label_addrs = label_addr;
2306}
2307
2308static SLJIT_INLINE void count_match(compiler_common *common)
2309{
2310DEFINE_COMPILER;
2311
2312OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2313add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2314}
2315
2316static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2317{
2318/* May destroy all locals and registers except TMP2. */
2319DEFINE_COMPILER;
2320
2321SLJIT_ASSERT(size > 0);
2322OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2323#ifdef DESTROY_REGISTERS
2324OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2325OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2326OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2327OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2328OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2329#endif
2330add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2331}
2332
2333static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2334{
2335DEFINE_COMPILER;
2336
2337SLJIT_ASSERT(size > 0);
2338OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2339}
2340
2341static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2342{
2343DEFINE_COMPILER;
2344sljit_uw *result;
2345
2346if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2347 return NULL;
2348
2349result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2350if (SLJIT_UNLIKELY(result == NULL))
2351 {
2352 sljit_set_compiler_memory_error(compiler);
2353 return NULL;
2354 }
2355
2356*(void**)result = common->read_only_data_head;
2357common->read_only_data_head = (void *)result;
2358return result + 1;
2359}
2360
2361static void free_read_only_data(void *current, void *allocator_data)
2362{
2363void *next;
2364
2365SLJIT_UNUSED_ARG(allocator_data);
2366
2367while (current != NULL)
2368 {
2369 next = *(void**)current;
2370 SLJIT_FREE(current, allocator_data);
2371 current = next;
2372 }
2373}
2374
2375static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2376{
2377DEFINE_COMPILER;
2378struct sljit_label *loop;
2379int i;
2380
2381/* At this point we can freely use all temporary registers. */
2382SLJIT_ASSERT(length > 1);
2383/* TMP1 returns with begin - 1. */
2384OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2385if (length < 8)
2386 {
2387 for (i = 1; i < length; i++)
2388 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2389 }
2390else
2391 {
2392 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2393 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2394 loop = LABEL();
2395 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2396 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2397 JUMPTO(SLJIT_NOT_ZERO, loop);
2398 }
2399}
2400
2401static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2402{
2403DEFINE_COMPILER;
2404sljit_s32 i;
2405
2406SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2407
2408OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2409for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2411}
2412
2413static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2414{
2415DEFINE_COMPILER;
2416struct sljit_label *loop;
2417int i;
2418
2419SLJIT_ASSERT(length > 1);
2420/* OVECTOR(1) contains the "string begin - 1" constant. */
2421if (length > 2)
2422 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2423if (length < 8)
2424 {
2425 for (i = 2; i < length; i++)
2426 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2427 }
2428else
2429 {
2430 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2431 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2432 loop = LABEL();
2433 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2434 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2435 JUMPTO(SLJIT_NOT_ZERO, loop);
2436 }
2437
2438OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2439if (common->mark_ptr != 0)
2440 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2441if (common->control_head_ptr != 0)
2442 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2443OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2444OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2445OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2446}
2447
2448static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2449{
2450while (current != NULL)
2451 {
2452 switch (current[-2])
2453 {
2454 case type_then_trap:
2455 break;
2456
2457 case type_mark:
2458 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2459 return current[-4];
2460 break;
2461
2462 default:
2463 SLJIT_ASSERT_STOP();
2464 break;
2465 }
2466 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2467 current = (sljit_sw*)current[-1];
2468 }
2469return -1;
2470}
2471
2472static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2473{
2474DEFINE_COMPILER;
2475struct sljit_label *loop;
2476struct sljit_jump *early_quit;
2477
2478/* At this point we can freely use all registers. */
2479OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2480OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2481
2482OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2483if (common->mark_ptr != 0)
2484 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2485OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2486if (common->mark_ptr != 0)
2487 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2488OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2489OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2490GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2491/* Unlikely, but possible */
2492early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2493loop = LABEL();
2494OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2495OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2496/* Copy the integer value to the output buffer */
2497#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2498OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2499#endif
2500OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2501OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2502JUMPTO(SLJIT_NOT_ZERO, loop);
2503JUMPHERE(early_quit);
2504
2505/* Calculate the return value, which is the maximum ovector value. */
2506if (topbracket > 1)
2507 {
2508 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2509 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2510
2511 /* OVECTOR(0) is never equal to SLJIT_S2. */
2512 loop = LABEL();
2513 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2514 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2515 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2516 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2517 }
2518else
2519 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2520}
2521
2522static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2523{
2524DEFINE_COMPILER;
2525struct sljit_jump *jump;
2526
2527SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2528SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2529 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2530
2531OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2532OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2533OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2534CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2535
2536/* Store match begin and end. */
2537OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2538OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2539
2540jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2541OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2542#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2543OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2544#endif
2545OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2546JUMPHERE(jump);
2547
2548OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2549OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2550#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2551OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2552#endif
2553OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2554
2555OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2556#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2557OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2558#endif
2559OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2560
2561JUMPTO(SLJIT_JUMP, quit);
2562}
2563
2564static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2565{
2566/* May destroy TMP1. */
2567DEFINE_COMPILER;
2568struct sljit_jump *jump;
2569
2570if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2571 {
2572 /* The value of -1 must be kept for start_used_ptr! */
2573 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2574 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2575 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2576 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2577 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2578 JUMPHERE(jump);
2579 }
2580else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2581 {
2582 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2583 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2584 JUMPHERE(jump);
2585 }
2586}
2587
2588static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2589{
2590/* Detects if the character has an othercase. */
2591unsigned int c;
2592
2593#ifdef SUPPORT_UTF
2594if (common->utf)
2595 {
2596 GETCHAR(c, cc);
2597 if (c > 127)
2598 {
2599#ifdef SUPPORT_UCP
2600 return c != UCD_OTHERCASE(c);
2601#else
2602 return FALSE;
2603#endif
2604 }
2605#ifndef COMPILE_PCRE8
2606 return common->fcc[c] != c;
2607#endif
2608 }
2609else
2610#endif
2611 c = *cc;
2612return MAX_255(c) ? common->fcc[c] != c : FALSE;
2613}
2614
2615static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2616{
2617/* Returns with the othercase. */
2618#ifdef SUPPORT_UTF
2619if (common->utf && c > 127)
2620 {
2621#ifdef SUPPORT_UCP
2622 return UCD_OTHERCASE(c);
2623#else
2624 return c;
2625#endif
2626 }
2627#endif
2628return TABLE_GET(c, common->fcc, c);
2629}
2630
2631static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2632{
2633/* Detects if the character and its othercase has only 1 bit difference. */
2634unsigned int c, oc, bit;
2635#if defined SUPPORT_UTF && defined COMPILE_PCRE8
2636int n;
2637#endif
2638
2639#ifdef SUPPORT_UTF
2640if (common->utf)
2641 {
2642 GETCHAR(c, cc);
2643 if (c <= 127)
2644 oc = common->fcc[c];
2645 else
2646 {
2647#ifdef SUPPORT_UCP
2648 oc = UCD_OTHERCASE(c);
2649#else
2650 oc = c;
2651#endif
2652 }
2653 }
2654else
2655 {
2656 c = *cc;
2657 oc = TABLE_GET(c, common->fcc, c);
2658 }
2659#else
2660c = *cc;
2661oc = TABLE_GET(c, common->fcc, c);
2662#endif
2663
2664SLJIT_ASSERT(c != oc);
2665
2666bit = c ^ oc;
2667/* Optimized for English alphabet. */
2668if (c <= 127 && bit == 0x20)
2669 return (0 << 8) | 0x20;
2670
2671/* Since c != oc, they must have at least 1 bit difference. */
2672if (!is_powerof2(bit))
2673 return 0;
2674
2675#if defined COMPILE_PCRE8
2676
2677#ifdef SUPPORT_UTF
2678if (common->utf && c > 127)
2679 {
2680 n = GET_EXTRALEN(*cc);
2681 while ((bit & 0x3f) == 0)
2682 {
2683 n--;
2684 bit >>= 6;
2685 }
2686 return (n << 8) | bit;
2687 }
2688#endif /* SUPPORT_UTF */
2689return (0 << 8) | bit;
2690
2691#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2692
2693#ifdef SUPPORT_UTF
2694if (common->utf && c > 65535)
2695 {
2696 if (bit >= (1 << 10))
2697 bit >>= 10;
2698 else
2699 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2700 }
2701#endif /* SUPPORT_UTF */
2702return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2703
2704#endif /* COMPILE_PCRE[8|16|32] */
2705}
2706
2707static void check_partial(compiler_common *common, BOOL force)
2708{
2709/* Checks whether a partial matching is occurred. Does not modify registers. */
2710DEFINE_COMPILER;
2711struct sljit_jump *jump = NULL;
2712
2713SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2714
2715if (common->mode == JIT_COMPILE)
2716 return;
2717
2718if (!force)
2719 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2720else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2721 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2722
2723if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2725else
2726 {
2727 if (common->partialmatchlabel != NULL)
2728 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2729 else
2730 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2731 }
2732
2733if (jump != NULL)
2734 JUMPHERE(jump);
2735}
2736
2737static void check_str_end(compiler_common *common, jump_list **end_reached)
2738{
2739/* Does not affect registers. Usually used in a tight spot. */
2740DEFINE_COMPILER;
2741struct sljit_jump *jump;
2742
2743if (common->mode == JIT_COMPILE)
2744 {
2745 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2746 return;
2747 }
2748
2749jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2750if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2751 {
2752 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2754 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2755 }
2756else
2757 {
2758 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2759 if (common->partialmatchlabel != NULL)
2760 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2761 else
2762 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2763 }
2764JUMPHERE(jump);
2765}
2766
2767static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2768{
2769DEFINE_COMPILER;
2770struct sljit_jump *jump;
2771
2772if (common->mode == JIT_COMPILE)
2773 {
2774 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2775 return;
2776 }
2777
2778/* Partial matching mode. */
2779jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2780add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2781if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2782 {
2783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2784 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2785 }
2786else
2787 {
2788 if (common->partialmatchlabel != NULL)
2789 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2790 else
2791 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2792 }
2793JUMPHERE(jump);
2794}
2795
2796static void peek_char(compiler_common *common, sljit_u32 max)
2797{
2798/* Reads the character into TMP1, keeps STR_PTR.
2799Does not check STR_END. TMP2 Destroyed. */
2800DEFINE_COMPILER;
2801#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2802struct sljit_jump *jump;
2803#endif
2804
2805SLJIT_UNUSED_ARG(max);
2806
2807OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2808#if defined SUPPORT_UTF && defined COMPILE_PCRE8
2809if (common->utf)
2810 {
2811 if (max < 128) return;
2812
2813 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2814 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2815 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2816 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2817 JUMPHERE(jump);
2818 }
2819#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2820
2821#if defined SUPPORT_UTF && defined COMPILE_PCRE16
2822if (common->utf)
2823 {
2824 if (max < 0xd800) return;
2825
2826 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2827 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2828 /* TMP2 contains the high surrogate. */
2829 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2830 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2831 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2832 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2833 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2834 JUMPHERE(jump);
2835 }
2836#endif
2837}
2838
2839#if defined SUPPORT_UTF && defined COMPILE_PCRE8
2840
2841static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2842{
2843/* Tells whether the character codes below 128 are enough
2844to determine a match. */
2845const sljit_u8 value = nclass ? 0xff : 0;
2846const sljit_u8 *end = bitset + 32;
2847
2848bitset += 16;
2849do
2850 {
2851 if (*bitset++ != value)
2852 return FALSE;
2853 }
2854while (bitset < end);
2855return TRUE;
2856}
2857
2858static void read_char7_type(compiler_common *common, BOOL full_read)
2859{
2860/* Reads the precise character type of a character into TMP1, if the character
2861is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2862full_read argument tells whether characters above max are accepted or not. */
2863DEFINE_COMPILER;
2864struct sljit_jump *jump;
2865
2866SLJIT_ASSERT(common->utf);
2867
2868OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2869OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2870
2871OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2872
2873if (full_read)
2874 {
2875 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2876 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2877 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2878 JUMPHERE(jump);
2879 }
2880}
2881
2882#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2883
2884static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2885{
2886/* Reads the precise value of a character into TMP1, if the character is
2887between min and max (c >= min && c <= max). Otherwise it returns with a value
2888outside the range. Does not check STR_END. */
2889DEFINE_COMPILER;
2890#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2891struct sljit_jump *jump;
2892#endif
2893#if defined SUPPORT_UTF && defined COMPILE_PCRE8
2894struct sljit_jump *jump2;
2895#endif
2896
2897SLJIT_UNUSED_ARG(update_str_ptr);
2898SLJIT_UNUSED_ARG(min);
2899SLJIT_UNUSED_ARG(max);
2900SLJIT_ASSERT(min <= max);
2901
2902OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2903OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2904
2905#if defined SUPPORT_UTF && defined COMPILE_PCRE8
2906if (common->utf)
2907 {
2908 if (max < 128 && !update_str_ptr) return;
2909
2910 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2911 if (min >= 0x10000)
2912 {
2913 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2914 if (update_str_ptr)
2915 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2916 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2917 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2918 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2919 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2920 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2921 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2922 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2923 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2924 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2926 if (!update_str_ptr)
2927 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2928 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2929 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2930 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2931 JUMPHERE(jump2);
2932 if (update_str_ptr)
2933 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2934 }
2935 else if (min >= 0x800 && max <= 0xffff)
2936 {
2937 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2938 if (update_str_ptr)
2939 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2940 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2941 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2942 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2943 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2945 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2946 if (!update_str_ptr)
2947 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2948 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2949 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2950 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2951 JUMPHERE(jump2);
2952 if (update_str_ptr)
2953 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2954 }
2955 else if (max >= 0x800)
2956 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2957 else if (max < 128)
2958 {
2959 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2960 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2961 }
2962 else
2963 {
2964 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2965 if (!update_str_ptr)
2966 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2967 else
2968 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2969 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2970 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2971 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2972 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2973 if (update_str_ptr)
2974 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2975 }
2976 JUMPHERE(jump);
2977 }
2978#endif
2979
2980#if defined SUPPORT_UTF && defined COMPILE_PCRE16
2981if (common->utf)
2982 {
2983 if (max >= 0x10000)
2984 {
2985 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2986 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2987 /* TMP2 contains the high surrogate. */
2988 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2989 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2990 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2991 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2992 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2993 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2994 JUMPHERE(jump);
2995 return;
2996 }
2997
2998 if (max < 0xd800 && !update_str_ptr) return;
2999
3000 /* Skip low surrogate if necessary. */
3001 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3002 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3003 if (update_str_ptr)
3004 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3005 if (max >= 0xd800)
3006 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3007 JUMPHERE(jump);
3008 }
3009#endif
3010}
3011
3012static SLJIT_INLINE void read_char(compiler_common *common)
3013{
3014read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3015}
3016
3017static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3018{
3019/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3020DEFINE_COMPILER;
3021#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3022struct sljit_jump *jump;
3023#endif
3024#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3025struct sljit_jump *jump2;
3026#endif
3027
3028SLJIT_UNUSED_ARG(update_str_ptr);
3029
3030OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3031OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3032
3033#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3034if (common->utf)
3035 {
3036 /* This can be an extra read in some situations, but hopefully
3037 it is needed in most cases. */
3038 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3039 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3040 if (!update_str_ptr)
3041 {
3042 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3043 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3044 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3045 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3046 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3047 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3048 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3049 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3050 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3051 JUMPHERE(jump2);
3052 }
3053 else
3054 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3055 JUMPHERE(jump);
3056 return;
3057 }
3058#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3059
3060#if !defined COMPILE_PCRE8
3061/* The ctypes array contains only 256 values. */
3062OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3063jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3064#endif
3065OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3066#if !defined COMPILE_PCRE8
3067JUMPHERE(jump);
3068#endif
3069
3070#if defined SUPPORT_UTF && defined COMPILE_PCRE16
3071if (common->utf && update_str_ptr)
3072 {
3073 /* Skip low surrogate if necessary. */
3074 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3075 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3076 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3077 JUMPHERE(jump);
3078 }
3079#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3080}
3081
3082static void skip_char_back(compiler_common *common)
3083{
3084/* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3085DEFINE_COMPILER;
3086#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3087#if defined COMPILE_PCRE8
3088struct sljit_label *label;
3089
3090if (common->utf)
3091 {
3092 label = LABEL();
3093 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3094 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3095 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3096 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3097 return;
3098 }
3099#elif defined COMPILE_PCRE16
3100if (common->utf)
3101 {
3102 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3103 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3104 /* Skip low surrogate if necessary. */
3105 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3106 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3107 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3108 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3109 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3110 return;
3111 }
3112#endif /* COMPILE_PCRE[8|16] */
3113#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3114OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3115}
3116
3117static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3118{
3119/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3120DEFINE_COMPILER;
3121struct sljit_jump *jump;
3122
3123if (nltype == NLTYPE_ANY)
3124 {
3125 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3126 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3127 }
3128else if (nltype == NLTYPE_ANYCRLF)
3129 {
3130 if (jumpifmatch)
3131 {
3132 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3133 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3134 }
3135 else
3136 {
3137 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3138 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3139 JUMPHERE(jump);
3140 }
3141 }
3142else
3143 {
3144 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3145 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3146 }
3147}
3148
3149#ifdef SUPPORT_UTF
3150
3151#if defined COMPILE_PCRE8
3152static void do_utfreadchar(compiler_common *common)
3153{
3154/* Fast decoding a UTF-8 character. TMP1 contains the first byte
3155of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3156DEFINE_COMPILER;
3157struct sljit_jump *jump;
3158
3159sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3160OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3161OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3162OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3163OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3164OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3165
3166/* Searching for the first zero. */
3167OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3168jump = JUMP(SLJIT_NOT_ZERO);
3169/* Two byte sequence. */
3170OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3171OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3172sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3173
3174JUMPHERE(jump);
3175OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3176OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3177OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3178OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3179OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3180
3181OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3182jump = JUMP(SLJIT_NOT_ZERO);
3183/* Three byte sequence. */
3184OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3185OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3186sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3187
3188/* Four byte sequence. */
3189JUMPHERE(jump);
3190OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3191OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3192OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3193OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3194OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3195OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3196OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3197sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3198}
3199
3200static void do_utfreadchar16(compiler_common *common)
3201{
3202/* Fast decoding a UTF-8 character. TMP1 contains the first byte
3203of the character (>= 0xc0). Return value in TMP1. */
3204DEFINE_COMPILER;
3205struct sljit_jump *jump;
3206
3207sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3208OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3209OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3210OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3211OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3212OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3213
3214/* Searching for the first zero. */
3215OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3216jump = JUMP(SLJIT_NOT_ZERO);
3217/* Two byte sequence. */
3218OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3219sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3220
3221JUMPHERE(jump);
3222OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3223OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3224/* This code runs only in 8 bit mode. No need to shift the value. */
3225OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3226OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3227OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3228OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3229OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3230OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3231/* Three byte sequence. */
3232OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3233sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3234}
3235
3236static void do_utfreadtype8(compiler_common *common)
3237{
3238/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3239of the character (>= 0xc0). Return value in TMP1. */
3240DEFINE_COMPILER;
3241struct sljit_jump *jump;
3242struct sljit_jump *compare;
3243
3244sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3245
3246OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3247jump = JUMP(SLJIT_NOT_ZERO);
3248/* Two byte sequence. */
3249OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3250OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3251OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3252/* The upper 5 bits are known at this point. */
3253compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3254OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3255OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3256OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3257OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3258sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3259
3260JUMPHERE(compare);
3261OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3262sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3263
3264/* We only have types for characters less than 256. */
3265JUMPHERE(jump);
3266OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3267OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3268OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3269sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3270}
3271
3272#endif /* COMPILE_PCRE8 */
3273
3274#endif /* SUPPORT_UTF */
3275
3276#ifdef SUPPORT_UCP
3277
3278/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3279#define UCD_BLOCK_MASK 127
3280#define UCD_BLOCK_SHIFT 7
3281
3282static void do_getucd(compiler_common *common)
3283{
3284/* Search the UCD record for the character comes in TMP1.
3285Returns chartype in TMP1 and UCD offset in TMP2. */
3286DEFINE_COMPILER;
3287
3288SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3289
3290sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3291OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3292OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3293OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3294OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3295OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3296OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3297OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3298OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3299OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3300sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3301}
3302#endif
3303
3304static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3305{
3306DEFINE_COMPILER;
3307struct sljit_label *mainloop;
3308struct sljit_label *newlinelabel = NULL;
3309struct sljit_jump *start;
3310struct sljit_jump *end = NULL;
3311struct sljit_jump *end2 = NULL;
3312#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3313struct sljit_jump *singlechar;
3314#endif
3315jump_list *newline = NULL;
3316BOOL newlinecheck = FALSE;
3317BOOL readuchar = FALSE;
3318
3319if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3320 (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3321 newlinecheck = TRUE;
3322
3323if (common->match_end_ptr != 0)
3324 {
3325 /* Search for the end of the first line. */
3326 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3327
3328 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3329 {
3330 mainloop = LABEL();
3331 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3332 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3333 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3334 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3335 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3336 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3337 JUMPHERE(end);
3338 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3339 }
3340 else
3341 {
3342 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3343 mainloop = LABEL();
3344 /* Continual stores does not cause data dependency. */
3345 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3346 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3347 check_newlinechar(common, common->nltype, &newline, TRUE);
3348 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3349 JUMPHERE(end);
3350 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3351 set_jumps(newline, LABEL());
3352 }
3353
3354 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3355 }
3356
3357start = JUMP(SLJIT_JUMP);
3358
3359if (newlinecheck)
3360 {
3361 newlinelabel = LABEL();
3362 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3363 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3364 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3365 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3366 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3367#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3368 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3369#endif
3370 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3371 end2 = JUMP(SLJIT_JUMP);
3372 }
3373
3374mainloop = LABEL();
3375
3376/* Increasing the STR_PTR here requires one less jump in the most common case. */
3377#ifdef SUPPORT_UTF
3378if (common->utf) readuchar = TRUE;
3379#endif
3380if (newlinecheck) readuchar = TRUE;
3381
3382if (readuchar)
3383 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3384
3385if (newlinecheck)
3386 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3387
3388OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3389#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3390#if defined COMPILE_PCRE8
3391if (common->utf)
3392 {
3393 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3394 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3395 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3396 JUMPHERE(singlechar);
3397 }
3398#elif defined COMPILE_PCRE16
3399if (common->utf)
3400 {
3401 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3402 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3403 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3404 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3405 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3406 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3407 JUMPHERE(singlechar);
3408 }
3409#endif /* COMPILE_PCRE[8|16] */
3410#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3411JUMPHERE(start);
3412
3413if (newlinecheck)
3414 {
3415 JUMPHERE(end);
3416 JUMPHERE(end2);
3417 }
3418
3419return mainloop;
3420}
3421
3422#define MAX_N_CHARS 16
3423#define MAX_DIFF_CHARS 6
3424
3425static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3426{
3427pcre_uchar i, len;
3428
3429len = chars[0];
3430if (len == 255)
3431 return;
3432
3433if (len == 0)
3434 {
3435 chars[0] = 1;
3436 chars[1] = chr;
3437 return;
3438 }
3439
3440for (i = len; i > 0; i--)
3441 if (chars[i] == chr)
3442 return;
3443
3444if (len >= MAX_DIFF_CHARS - 1)
3445 {
3446 chars[0] = 255;
3447 return;
3448 }
3449
3450len++;
3451chars[len] = chr;
3452chars[0] = len;
3453}
3454
3455static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3456{
3457/* Recursive function, which scans prefix literals. */
3458BOOL last, any, class, caseless;
3459int len, repeat, len_save, consumed = 0;
3460sljit_u32 chr; /* Any unicode character. */
3461sljit_u8 *bytes, *bytes_end, byte;
3462pcre_uchar *alternative, *cc_save, *oc;
3463#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3464pcre_uchar othercase[8];
3465#elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3466pcre_uchar othercase[2];
3467#else
3468pcre_uchar othercase[1];
3469#endif
3470
3471repeat = 1;
3472while (TRUE)
3473 {
3474 if (*rec_count == 0)
3475 return 0;
3476 (*rec_count)--;
3477
3478 last = TRUE;
3479 any = FALSE;
3480 class = FALSE;
3481 caseless = FALSE;
3482
3483 switch (*cc)
3484 {
3485 case OP_CHARI:
3486 caseless = TRUE;
3487 case OP_CHAR:
3488 last = FALSE;
3489 cc++;
3490 break;
3491
3492 case OP_SOD:
3493 case OP_SOM:
3494 case OP_SET_SOM:
3495 case OP_NOT_WORD_BOUNDARY:
3496 case OP_WORD_BOUNDARY:
3497 case OP_EODN:
3498 case OP_EOD:
3499 case OP_CIRC:
3500 case OP_CIRCM:
3501 case OP_DOLL:
3502 case OP_DOLLM:
3503 /* Zero width assertions. */
3504 cc++;
3505 continue;
3506
3507 case OP_ASSERT:
3508 case OP_ASSERT_NOT:
3509 case OP_ASSERTBACK:
3510 case OP_ASSERTBACK_NOT:
3511 cc = bracketend(cc);
3512 continue;
3513
3514 case OP_PLUSI:
3515 case OP_MINPLUSI:
3516 case OP_POSPLUSI:
3517 caseless = TRUE;
3518 case OP_PLUS:
3519 case OP_MINPLUS:
3520 case OP_POSPLUS:
3521 cc++;
3522 break;
3523
3524 case OP_EXACTI:
3525 caseless = TRUE;
3526 case OP_EXACT:
3527 repeat = GET2(cc, 1);
3528 last = FALSE;
3529 cc += 1 + IMM2_SIZE;
3530 break;
3531
3532 case OP_QUERYI:
3533 case OP_MINQUERYI:
3534 case OP_POSQUERYI:
3535 caseless = TRUE;
3536 case OP_QUERY:
3537 case OP_MINQUERY:
3538 case OP_POSQUERY:
3539 len = 1;
3540 cc++;
3541#ifdef SUPPORT_UTF
3542 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3543#endif
3544 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3545 if (max_chars == 0)
3546 return consumed;
3547 last = FALSE;
3548 break;
3549
3550 case OP_KET:
3551 cc += 1 + LINK_SIZE;
3552 continue;
3553
3554 case OP_ALT:
3555 cc += GET(cc, 1);
3556 continue;
3557
3558 case OP_ONCE:
3559 case OP_ONCE_NC:
3560 case OP_BRA:
3561 case OP_BRAPOS:
3562 case OP_CBRA:
3563 case OP_CBRAPOS:
3564 alternative = cc + GET(cc, 1);
3565 while (*alternative == OP_ALT)
3566 {
3567 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3568 if (max_chars == 0)
3569 return consumed;
3570 alternative += GET(alternative, 1);
3571 }
3572
3573 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3574 cc += IMM2_SIZE;
3575 cc += 1 + LINK_SIZE;
3576 continue;
3577
3578 case OP_CLASS:
3579#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3580 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3581 return consumed;
3582#endif
3583 class = TRUE;
3584 break;
3585
3586 case OP_NCLASS:
3587#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3588 if (common->utf) return consumed;
3589#endif
3590 class = TRUE;
3591 break;
3592
3593#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3594 case OP_XCLASS:
3595#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3596 if (common->utf) return consumed;
3597#endif
3598 any = TRUE;
3599 cc += GET(cc, 1);
3600 break;
3601#endif
3602
3603 case OP_DIGIT:
3604#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3605 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3606 return consumed;
3607#endif
3608 any = TRUE;
3609 cc++;
3610 break;
3611
3612 case OP_WHITESPACE:
3613#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3614 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3615 return consumed;
3616#endif
3617 any = TRUE;
3618 cc++;
3619 break;
3620
3621 case OP_WORDCHAR:
3622#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3623 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3624 return consumed;
3625#endif
3626 any = TRUE;
3627 cc++;
3628 break;
3629
3630 case OP_NOT:
3631 case OP_NOTI:
3632 cc++;
3633 /* Fall through. */
3634 case OP_NOT_DIGIT:
3635 case OP_NOT_WHITESPACE:
3636 case OP_NOT_WORDCHAR:
3637 case OP_ANY:
3638 case OP_ALLANY:
3639#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3640 if (common->utf) return consumed;
3641#endif
3642 any = TRUE;
3643 cc++;
3644 break;
3645
3646#ifdef SUPPORT_UTF
3647 case OP_NOTPROP:
3648 case OP_PROP:
3649#ifndef COMPILE_PCRE32
3650 if (common->utf) return consumed;
3651#endif
3652 any = TRUE;
3653 cc += 1 + 2;
3654 break;
3655#endif
3656
3657 case OP_TYPEEXACT:
3658 repeat = GET2(cc, 1);
3659 cc += 1 + IMM2_SIZE;
3660 continue;
3661
3662 case OP_NOTEXACT:
3663 case OP_NOTEXACTI:
3664#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3665 if (common->utf) return consumed;
3666#endif
3667 any = TRUE;
3668 repeat = GET2(cc, 1);
3669 cc += 1 + IMM2_SIZE + 1;
3670 break;
3671
3672 default:
3673 return consumed;
3674 }
3675
3676 if (any)
3677 {
3678 do
3679 {
3680 chars[0] = 255;
3681
3682 consumed++;
3683 if (--max_chars == 0)
3684 return consumed;
3685 chars += MAX_DIFF_CHARS;
3686 }
3687 while (--repeat > 0);
3688
3689 repeat = 1;
3690 continue;
3691 }
3692
3693 if (class)
3694 {
3695 bytes = (sljit_u8*) (cc + 1);
3696 cc += 1 + 32 / sizeof(pcre_uchar);
3697
3698 switch (*cc)
3699 {
3700 case OP_CRSTAR:
3701 case OP_CRMINSTAR:
3702 case OP_CRPOSSTAR:
3703 case OP_CRQUERY:
3704 case OP_CRMINQUERY:
3705 case OP_CRPOSQUERY:
3706 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3707 if (max_chars == 0)
3708 return consumed;
3709 break;
3710
3711 default:
3712 case OP_CRPLUS:
3713 case OP_CRMINPLUS:
3714 case OP_CRPOSPLUS:
3715 break;
3716
3717 case OP_CRRANGE:
3718 case OP_CRMINRANGE:
3719 case OP_CRPOSRANGE:
3720 repeat = GET2(cc, 1);
3721 if (repeat <= 0)
3722 return consumed;
3723 break;
3724 }
3725
3726 do
3727 {
3728 if (bytes[31] & 0x80)
3729 chars[0] = 255;
3730 else if (chars[0] != 255)
3731 {
3732 bytes_end = bytes + 32;
3733 chr = 0;
3734 do
3735 {
3736 byte = *bytes++;
3737 SLJIT_ASSERT((chr & 0x7) == 0);
3738 if (byte == 0)
3739 chr += 8;
3740 else
3741 {
3742 do
3743 {
3744 if ((byte & 0x1) != 0)
3745 add_prefix_char(chr, chars);
3746 byte >>= 1;
3747 chr++;
3748 }
3749 while (byte != 0);
3750 chr = (chr + 7) & ~7;
3751 }
3752 }
3753 while (chars[0] != 255 && bytes < bytes_end);
3754 bytes = bytes_end - 32;
3755 }
3756
3757 consumed++;
3758 if (--max_chars == 0)
3759 return consumed;
3760 chars += MAX_DIFF_CHARS;
3761 }
3762 while (--repeat > 0);
3763
3764 switch (*cc)
3765 {
3766 case OP_CRSTAR:
3767 case OP_CRMINSTAR:
3768 case OP_CRPOSSTAR:
3769 return consumed;
3770
3771 case OP_CRQUERY:
3772 case OP_CRMINQUERY:
3773 case OP_CRPOSQUERY:
3774 cc++;
3775 break;
3776
3777 case OP_CRRANGE:
3778 case OP_CRMINRANGE:
3779 case OP_CRPOSRANGE:
3780 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3781 return consumed;
3782 cc += 1 + 2 * IMM2_SIZE;
3783 break;
3784 }
3785
3786 repeat = 1;
3787 continue;
3788 }
3789
3790 len = 1;
3791#ifdef SUPPORT_UTF
3792 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3793#endif
3794
3795 if (caseless && char_has_othercase(common, cc))
3796 {
3797#ifdef SUPPORT_UTF
3798 if (common->utf)
3799 {
3800 GETCHAR(chr, cc);
3801 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3802 return consumed;
3803 }
3804 else
3805#endif
3806 {
3807 chr = *cc;
3808 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3809 }
3810 }
3811 else
3812 {
3813 caseless = FALSE;
3814 othercase[0] = 0; /* Stops compiler warning - PH */
3815 }
3816
3817 len_save = len;
3818 cc_save = cc;
3819 while (TRUE)
3820 {
3821 oc = othercase;
3822 do
3823 {
3824 chr = *cc;
3825 add_prefix_char(*cc, chars);
3826
3827 if (caseless)
3828 add_prefix_char(*oc, chars);
3829
3830 len--;
3831 consumed++;
3832 if (--max_chars == 0)
3833 return consumed;
3834 chars += MAX_DIFF_CHARS;
3835 cc++;
3836 oc++;
3837 }
3838 while (len > 0);
3839
3840 if (--repeat == 0)
3841 break;
3842
3843 len = len_save;
3844 cc = cc_save;
3845 }
3846
3847 repeat = 1;
3848 if (last)
3849 return consumed;
3850 }
3851}
3852
3853#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3854
3855static sljit_s32 character_to_int32(pcre_uchar chr)
3856{
3857sljit_s32 value = (sljit_s32)chr;
3858#if defined COMPILE_PCRE8
3859#define SSE2_COMPARE_TYPE_INDEX 0
3860return (value << 24) | (value << 16) | (value << 8) | value;
3861#elif defined COMPILE_PCRE16
3862#define SSE2_COMPARE_TYPE_INDEX 1
3863return (value << 16) | value;
3864#elif defined COMPILE_PCRE32
3865#define SSE2_COMPARE_TYPE_INDEX 2
3866return value;
3867#else
3868#error "Unsupported unit width"
3869#endif
3870}
3871
3872static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3873{
3874DEFINE_COMPILER;
3875struct sljit_label *start;
3876struct sljit_jump *quit[3];
3877struct sljit_jump *nomatch;
3878sljit_u8 instruction[8];
3879sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3880sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3881sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3882BOOL load_twice = FALSE;
3883pcre_uchar bit;
3884
3885bit = char1 ^ char2;
3886if (!is_powerof2(bit))
3887 bit = 0;
3888
3889if ((char1 != char2) && bit == 0)
3890 load_twice = TRUE;
3891
3892quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3893
3894/* First part (unaligned start) */
3895
3896OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3897
3898SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3899
3900/* MOVD xmm, r/m32 */
3901instruction[0] = 0x66;
3902instruction[1] = 0x0f;
3903instruction[2] = 0x6e;
3904instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3905sljit_emit_op_custom(compiler, instruction, 4);
3906
3907if (char1 != char2)
3908 {
3909 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3910
3911 /* MOVD xmm, r/m32 */
3912 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3913 sljit_emit_op_custom(compiler, instruction, 4);
3914 }
3915
3916/* PSHUFD xmm1, xmm2/m128, imm8 */
3917instruction[2] = 0x70;
3918instruction[3] = 0xc0 | (2 << 3) | 2;
3919instruction[4] = 0;
3920sljit_emit_op_custom(compiler, instruction, 5);
3921
3922if (char1 != char2)
3923 {
3924 /* PSHUFD xmm1, xmm2/m128, imm8 */
3925 instruction[3] = 0xc0 | (3 << 3) | 3;
3926 instruction[4] = 0;
3927 sljit_emit_op_custom(compiler, instruction, 5);
3928 }
3929
3930OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3931OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3932
3933/* MOVDQA xmm1, xmm2/m128 */
3934#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3935
3936if (str_ptr_ind < 8)
3937 {
3938 instruction[2] = 0x6f;
3939 instruction[3] = (0 << 3) | str_ptr_ind;
3940 sljit_emit_op_custom(compiler, instruction, 4);
3941
3942 if (load_twice)
3943 {
3944 instruction[3] = (1 << 3) | str_ptr_ind;
3945 sljit_emit_op_custom(compiler, instruction, 4);
3946 }
3947 }
3948else
3949 {
3950 instruction[1] = 0x41;
3951 instruction[2] = 0x0f;
3952 instruction[3] = 0x6f;
3953 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3954 sljit_emit_op_custom(compiler, instruction, 5);
3955
3956 if (load_twice)
3957 {
3958 instruction[4] = (1 << 3) | str_ptr_ind;
3959 sljit_emit_op_custom(compiler, instruction, 5);
3960 }
3961 instruction[1] = 0x0f;
3962 }
3963
3964#else
3965
3966instruction[2] = 0x6f;
3967instruction[3] = (0 << 3) | str_ptr_ind;
3968sljit_emit_op_custom(compiler, instruction, 4);
3969
3970if (load_twice)
3971 {
3972 instruction[3] = (1 << 3) | str_ptr_ind;
3973 sljit_emit_op_custom(compiler, instruction, 4);
3974 }
3975
3976#endif
3977
3978if (bit != 0)
3979 {
3980 /* POR xmm1, xmm2/m128 */
3981 instruction[2] = 0xeb;
3982 instruction[3] = 0xc0 | (0 << 3) | 3;
3983 sljit_emit_op_custom(compiler, instruction, 4);
3984 }
3985
3986/* PCMPEQB/W/D xmm1, xmm2/m128 */
3987instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
3988instruction[3] = 0xc0 | (0 << 3) | 2;
3989sljit_emit_op_custom(compiler, instruction, 4);
3990
3991if (load_twice)
3992 {
3993 instruction[3] = 0xc0 | (1 << 3) | 3;
3994 sljit_emit_op_custom(compiler, instruction, 4);
3995 }
3996
3997/* PMOVMSKB reg, xmm */
3998instruction[2] = 0xd7;
3999instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4000sljit_emit_op_custom(compiler, instruction, 4);
4001
4002if (load_twice)
4003 {
4004 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4005 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4006 sljit_emit_op_custom(compiler, instruction, 4);
4007
4008 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4009 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4010 }
4011
4012OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4013
4014/* BSF r32, r/m32 */
4015instruction[0] = 0x0f;
4016instruction[1] = 0xbc;
4017instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4018sljit_emit_op_custom(compiler, instruction, 3);
4019
4020nomatch = JUMP(SLJIT_ZERO);
4021
4022OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4023OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4024quit[1] = JUMP(SLJIT_JUMP);
4025
4026JUMPHERE(nomatch);
4027
4028start = LABEL();
4029OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4030quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4031
4032/* Second part (aligned) */
4033
4034instruction[0] = 0x66;
4035instruction[1] = 0x0f;
4036
4037/* MOVDQA xmm1, xmm2/m128 */
4038#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4039
4040if (str_ptr_ind < 8)
4041 {
4042 instruction[2] = 0x6f;
4043 instruction[3] = (0 << 3) | str_ptr_ind;
4044 sljit_emit_op_custom(compiler, instruction, 4);
4045
4046 if (load_twice)
4047 {
4048 instruction[3] = (1 << 3) | str_ptr_ind;
4049 sljit_emit_op_custom(compiler, instruction, 4);
4050 }
4051 }
4052else
4053 {
4054 instruction[1] = 0x41;
4055 instruction[2] = 0x0f;
4056 instruction[3] = 0x6f;
4057 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4058 sljit_emit_op_custom(compiler, instruction, 5);
4059
4060 if (load_twice)
4061 {
4062 instruction[4] = (1 << 3) | str_ptr_ind;
4063 sljit_emit_op_custom(compiler, instruction, 5);
4064 }
4065 instruction[1] = 0x0f;
4066 }
4067
4068#else
4069
4070instruction[2] = 0x6f;
4071instruction[3] = (0 << 3) | str_ptr_ind;
4072sljit_emit_op_custom(compiler, instruction, 4);
4073
4074if (load_twice)
4075 {
4076 instruction[3] = (1 << 3) | str_ptr_ind;
4077 sljit_emit_op_custom(compiler, instruction, 4);
4078 }
4079
4080#endif
4081
4082if (bit != 0)
4083 {
4084 /* POR xmm1, xmm2/m128 */
4085 instruction[2] = 0xeb;
4086 instruction[3] = 0xc0 | (0 << 3) | 3;
4087 sljit_emit_op_custom(compiler, instruction, 4);
4088 }
4089
4090/* PCMPEQB/W/D xmm1, xmm2/m128 */
4091instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4092instruction[3] = 0xc0 | (0 << 3) | 2;
4093sljit_emit_op_custom(compiler, instruction, 4);
4094
4095if (load_twice)
4096 {
4097 instruction[3] = 0xc0 | (1 << 3) | 3;
4098 sljit_emit_op_custom(compiler, instruction, 4);
4099 }
4100
4101/* PMOVMSKB reg, xmm */
4102instruction[2] = 0xd7;
4103instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4104sljit_emit_op_custom(compiler, instruction, 4);
4105
4106if (load_twice)
4107 {
4108 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4109 sljit_emit_op_custom(compiler, instruction, 4);
4110
4111 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4112 }
4113
4114/* BSF r32, r/m32 */
4115instruction[0] = 0x0f;
4116instruction[1] = 0xbc;
4117instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4118sljit_emit_op_custom(compiler, instruction, 3);
4119
4120JUMPTO(SLJIT_ZERO, start);
4121
4122OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4123
4124start = LABEL();
4125SET_LABEL(quit[0], start);
4126SET_LABEL(quit[1], start);
4127SET_LABEL(quit[2], start);
4128}
4129
4130#undef SSE2_COMPARE_TYPE_INDEX
4131
4132#endif
4133
4134static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4135{
4136DEFINE_COMPILER;
4137struct sljit_label *start;
4138struct sljit_jump *quit;
4139struct sljit_jump *found;
4140pcre_uchar mask;
4141#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4142struct sljit_label *utf_start = NULL;
4143struct sljit_jump *utf_quit = NULL;
4144#endif
4145BOOL has_match_end = (common->match_end_ptr != 0);
4146
4147if (offset > 0)
4148 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4149
4150if (has_match_end)
4151 {
4152 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4153
4154 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4155#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4156 if (sljit_x86_is_cmov_available())
4157 {
4158 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4159 sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4160 }
4161#endif
4162 {
4163 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
4164 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4165 JUMPHERE(quit);
4166 }
4167 }
4168
4169#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4170if (common->utf && offset > 0)
4171 utf_start = LABEL();
4172#endif
4173
4174#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4175
4176/* SSE2 accelerated first character search. */
4177
4178if (sljit_x86_is_sse2_available())
4179 {
4180 fast_forward_first_char2_sse2(common, char1, char2);
4181
4182 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4183 if (common->mode == JIT_COMPILE)
4184 {
4185 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4186 SLJIT_ASSERT(common->forced_quit_label == NULL);
4187 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4188 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4189
4190#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4191 if (common->utf && offset > 0)
4192 {
4193 SLJIT_ASSERT(common->mode == JIT_COMPILE);
4194
4195 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4196 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4197#if defined COMPILE_PCRE8
4198 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4199 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4200#elif defined COMPILE_PCRE16
4201 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4202 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4203#else
4204#error "Unknown code width"
4205#endif
4206 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4207 }
4208#endif
4209
4210 if (offset > 0)
4211 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4212 }
4213 else if (sljit_x86_is_cmov_available())
4214 {
4215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4216 sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
4217 }
4218 else
4219 {
4220 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4221 OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
4222 JUMPHERE(quit);
4223 }
4224
4225 if (has_match_end)
4226 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4227 return;
4228 }
4229
4230#endif
4231
4232quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4233
4234start = LABEL();
4235OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4236
4237if (char1 == char2)
4238 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4239else
4240 {
4241 mask = char1 ^ char2;
4242 if (is_powerof2(mask))
4243 {
4244 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4245 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4246 }
4247 else
4248 {
4249 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4250 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4251 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4252 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4253 found = JUMP(SLJIT_NOT_ZERO);
4254 }
4255 }
4256
4257OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4258CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4259
4260#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4261if (common->utf && offset > 0)
4262 utf_quit = JUMP(SLJIT_JUMP);
4263#endif
4264
4265JUMPHERE(found);
4266
4267#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4268if (common->utf && offset > 0)
4269 {
4270 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4271 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4272#if defined COMPILE_PCRE8
4273 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4274 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4275#elif defined COMPILE_PCRE16
4276 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4277 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4278#else
4279#error "Unknown code width"
4280#endif
4281 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4282 JUMPHERE(utf_quit);
4283 }
4284#endif
4285
4286JUMPHERE(quit);
4287
4288if (has_match_end)
4289 {
4290 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4291 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4292 if (offset > 0)
4293 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4294 JUMPHERE(quit);
4295 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4296 }
4297
4298if (offset > 0)
4299 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4300}
4301
4302static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4303{
4304DEFINE_COMPILER;
4305struct sljit_label *start;
4306struct sljit_jump *quit;
4307struct sljit_jump *match;
4308/* bytes[0] represent the number of characters between 0
4309and MAX_N_BYTES - 1, 255 represents any character. */
4310pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4311sljit_s32 offset;
4312pcre_uchar mask;
4313pcre_uchar *char_set, *char_set_end;
4314int i, max, from;
4315int range_right = -1, range_len;
4316sljit_u8 *update_table = NULL;
4317BOOL in_range;
4318sljit_u32 rec_count;
4319
4320for (i = 0; i < MAX_N_CHARS; i++)
4321 chars[i * MAX_DIFF_CHARS] = 0;
4322
4323rec_count = 10000;
4324max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4325
4326if (max < 1)
4327 return FALSE;
4328
4329in_range = FALSE;
4330/* Prevent compiler "uninitialized" warning */
4331from = 0;
4332range_len = 4 /* minimum length */ - 1;
4333for (i = 0; i <= max; i++)
4334 {
4335 if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4336 {
4337 range_len = i - from;
4338 range_right = i - 1;
4339 }
4340
4341 if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4342 {
4343 SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4344 if (!in_range)
4345 {
4346 in_range = TRUE;
4347 from = i;
4348 }
4349 }
4350 else
4351 in_range = FALSE;
4352 }
4353
4354if (range_right >= 0)
4355 {
4356 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4357 if (update_table == NULL)
4358 return TRUE;
4359 memset(update_table, IN_UCHARS(range_len), 256);
4360
4361 for (i = 0; i < range_len; i++)
4362 {
4363 char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4364 SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4365 char_set_end = char_set + char_set[0];
4366 char_set++;
4367 while (char_set <= char_set_end)
4368 {
4369 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4370 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4371 char_set++;
4372 }
4373 }
4374 }
4375
4376offset = -1;
4377/* Scan forward. */
4378for (i = 0; i < max; i++)
4379 {
4380 if (offset == -1)
4381 {
4382 if (chars[i * MAX_DIFF_CHARS] <= 2)
4383 offset = i;
4384 }
4385 else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4386 {
4387 if (chars[i * MAX_DIFF_CHARS] == 1)
4388 offset = i;
4389 else
4390 {
4391 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4392 if (!is_powerof2(mask))
4393 {
4394 mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4395 if (is_powerof2(mask))
4396 offset = i;
4397 }
4398 }
4399 }
4400 }
4401
4402if (range_right < 0)
4403 {
4404 if (offset < 0)
4405 return FALSE;
4406 SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4407 /* Works regardless the value is 1 or 2. */
4408 mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4409 fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4410 return TRUE;
4411 }
4412
4413if (range_right == offset)
4414 offset = -1;
4415
4416SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4417
4418max -= 1;
4419SLJIT_ASSERT(max > 0);
4420if (common->match_end_ptr != 0)
4421 {
4422 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4423 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4424 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4425 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4426 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4427 JUMPHERE(quit);
4428 }
4429else
4430 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4431
4432SLJIT_ASSERT(range_right >= 0);
4433
4434#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4435OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4436#endif
4437
4438start = LABEL();
4439quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4440
4441#if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4442OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4443#else
4444OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4445#endif
4446
4447#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4448OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4449#else
4450OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4451#endif
4452OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4453CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4454
4455if (offset >= 0)
4456 {
4457 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4458 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4459
4460 if (chars[offset * MAX_DIFF_CHARS] == 1)
4461 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4462 else
4463 {
4464 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4465 if (is_powerof2(mask))
4466 {
4467 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4468 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4469 }
4470 else
4471 {
4472 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4473 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4474 JUMPHERE(match);
4475 }
4476 }
4477 }
4478
4479#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4480if (common->utf && offset != 0)
4481 {
4482 if (offset < 0)
4483 {
4484 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4485 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4486 }
4487 else
4488 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4489#if defined COMPILE_PCRE8
4490 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4491 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4492#elif defined COMPILE_PCRE16
4493 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4494 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4495#else
4496#error "Unknown code width"
4497#endif
4498 if (offset < 0)
4499 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4500 }
4501#endif
4502
4503if (offset >= 0)
4504 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4505
4506JUMPHERE(quit);
4507
4508if (common->match_end_ptr != 0)
4509 {
4510 if (range_right >= 0)
4511 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4512 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4513 if (range_right >= 0)
4514 {
4515 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4516 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4517 JUMPHERE(quit);
4518 }
4519 }
4520else
4521 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4522return TRUE;
4523}
4524
4525#undef MAX_N_CHARS
4526#undef MAX_DIFF_CHARS
4527
4528static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4529{
4530pcre_uchar oc;
4531
4532oc = first_char;
4533if (caseless)
4534 {
4535 oc = TABLE_GET(first_char, common->fcc, first_char);
4536#if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4537 if (first_char > 127 && common->utf)
4538 oc = UCD_OTHERCASE(first_char);
4539#endif
4540 }
4541
4542fast_forward_first_char2(common, first_char, oc, 0);
4543}
4544
4545static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4546{
4547DEFINE_COMPILER;
4548struct sljit_label *loop;
4549struct sljit_jump *lastchar;
4550struct sljit_jump *firstchar;
4551struct sljit_jump *quit;
4552struct sljit_jump *foundcr = NULL;
4553struct sljit_jump *notfoundnl;
4554jump_list *newline = NULL;
4555
4556if (common->match_end_ptr != 0)
4557 {
4558 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4559 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4560 }
4561
4562if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4563 {
4564 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4565 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4566 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4567 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4568 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4569
4570 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4571 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4572 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
4573#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4574 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4575#endif
4576 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4577
4578 loop = LABEL();
4579 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4580 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4581 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4582 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4583 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4584 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4585
4586 JUMPHERE(quit);
4587 JUMPHERE(firstchar);
4588 JUMPHERE(lastchar);
4589
4590 if (common->match_end_ptr != 0)
4591 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4592 return;
4593 }
4594
4595OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4596OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4597firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4598skip_char_back(common);
4599
4600loop = LABEL();
4601common->ff_newline_shortcut = loop;
4602
4603read_char_range(common, common->nlmin, common->nlmax, TRUE);
4604lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4605if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4606 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4607check_newlinechar(common, common->nltype, &newline, FALSE);
4608set_jumps(newline, loop);
4609
4610if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4611 {
4612 quit = JUMP(SLJIT_JUMP);
4613 JUMPHERE(foundcr);
4614 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4615 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4616 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4617 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4618#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4619 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4620#endif
4621 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4622 JUMPHERE(notfoundnl);
4623 JUMPHERE(quit);
4624 }
4625JUMPHERE(lastchar);
4626JUMPHERE(firstchar);
4627
4628if (common->match_end_ptr != 0)
4629 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4630}
4631
4632static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4633
4634static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4635{
4636DEFINE_COMPILER;
4637struct sljit_label *start;
4638struct sljit_jump *quit;
4639struct sljit_jump *found = NULL;
4640jump_list *matches = NULL;
4641#ifndef COMPILE_PCRE8
4642struct sljit_jump *jump;
4643#endif
4644
4645if (common->match_end_ptr != 0)
4646 {
4647 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4648 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4649 }
4650
4651start = LABEL();
4652quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4653OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4654#ifdef SUPPORT_UTF
4655if (common->utf)
4656 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4657#endif
4658
4659if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4660 {
4661#ifndef COMPILE_PCRE8
4662 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4663 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4664 JUMPHERE(jump);
4665#endif
4666 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4667 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4668 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4669 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4670 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4671 found = JUMP(SLJIT_NOT_ZERO);
4672 }
4673
4674#ifdef SUPPORT_UTF
4675if (common->utf)
4676 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4677#endif
4678OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4679#ifdef SUPPORT_UTF
4680#if defined COMPILE_PCRE8
4681if (common->utf)
4682 {
4683 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4684 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4685 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4686 }
4687#elif defined COMPILE_PCRE16
4688if (common->utf)
4689 {
4690 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4691 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4692 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4693 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4694 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4695 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4696 }
4697#endif /* COMPILE_PCRE[8|16] */
4698#endif /* SUPPORT_UTF */
4699JUMPTO(SLJIT_JUMP, start);
4700if (found != NULL)
4701 JUMPHERE(found);
4702if (matches != NULL)
4703 set_jumps(matches, LABEL());
4704JUMPHERE(quit);
4705
4706if (common->match_end_ptr != 0)
4707 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4708}
4709
4710static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4711{
4712DEFINE_COMPILER;
4713struct sljit_label *loop;
4714struct sljit_jump *toolong;
4715struct sljit_jump *alreadyfound;
4716struct sljit_jump *found;
4717struct sljit_jump *foundoc = NULL;
4718struct sljit_jump *notfound;
4719sljit_u32 oc, bit;
4720
4721SLJIT_ASSERT(common->req_char_ptr != 0);
4722OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4723OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4724toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4725alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4726
4727if (has_firstchar)
4728 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4729else
4730 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4731
4732loop = LABEL();
4733notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4734
4735OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4736oc = req_char;
4737if (caseless)
4738 {
4739 oc = TABLE_GET(req_char, common->fcc, req_char);
4740#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4741 if (req_char > 127 && common->utf)
4742 oc = UCD_OTHERCASE(req_char);
4743#endif
4744 }
4745if (req_char == oc)
4746 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4747else
4748 {
4749 bit = req_char ^ oc;
4750 if (is_powerof2(bit))
4751 {
4752 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4753 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4754 }
4755 else
4756 {
4757 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4758 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4759 }
4760 }
4761OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4762JUMPTO(SLJIT_JUMP, loop);
4763
4764JUMPHERE(found);
4765if (foundoc)
4766 JUMPHERE(foundoc);
4767OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4768JUMPHERE(alreadyfound);
4769JUMPHERE(toolong);
4770return notfound;
4771}
4772
4773static void do_revertframes(compiler_common *common)
4774{
4775DEFINE_COMPILER;
4776struct sljit_jump *jump;
4777struct sljit_label *mainloop;
4778
4779sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4780OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4781GET_LOCAL_BASE(TMP3, 0, 0);
4782
4783/* Drop frames until we reach STACK_TOP. */
4784mainloop = LABEL();
4785OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4786OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4787jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4788
4789OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4790OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4791OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4792OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4793JUMPTO(SLJIT_JUMP, mainloop);
4794
4795JUMPHERE(jump);
4796jump = JUMP(SLJIT_SIG_LESS);
4797/* End of dropping frames. */
4798sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4799
4800JUMPHERE(jump);
4801OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4802OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4803OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4804OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4805JUMPTO(SLJIT_JUMP, mainloop);
4806}
4807
4808static void check_wordboundary(compiler_common *common)
4809{
4810DEFINE_COMPILER;
4811struct sljit_jump *skipread;
4812jump_list *skipread_list = NULL;
4813#if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4814struct sljit_jump *jump;
4815#endif
4816
4817SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4818
4819sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4820/* Get type of the previous char, and put it to LOCALS1. */
4821OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4822OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4823OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4824skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4825skip_char_back(common);
4826check_start_used_ptr(common);
4827read_char(common);
4828
4829/* Testing char type. */
4830#ifdef SUPPORT_UCP
4831if (common->use_ucp)
4832 {
4833 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4834 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4835 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4837 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4838 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4839 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4840 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4841 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4842 JUMPHERE(jump);
4843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4844 }
4845else
4846#endif
4847 {
4848#ifndef COMPILE_PCRE8
4849 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4850#elif defined SUPPORT_UTF
4851 /* Here LOCALS1 has already been zeroed. */
4852 jump = NULL;
4853 if (common->utf)
4854 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4855#endif /* COMPILE_PCRE8 */
4856 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4857 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4858 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4859 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4860#ifndef COMPILE_PCRE8
4861 JUMPHERE(jump);
4862#elif defined SUPPORT_UTF
4863 if (jump != NULL)
4864 JUMPHERE(jump);
4865#endif /* COMPILE_PCRE8 */
4866 }
4867JUMPHERE(skipread);
4868
4869OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4870check_str_end(common, &skipread_list);
4871peek_char(common, READ_CHAR_MAX);
4872
4873/* Testing char type. This is a code duplication. */
4874#ifdef SUPPORT_UCP
4875if (common->use_ucp)
4876 {
4877 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4878 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4879 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4880 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4881 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4882 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4883 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4884 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4885 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4886 JUMPHERE(jump);
4887 }
4888else
4889#endif
4890 {
4891#ifndef COMPILE_PCRE8
4892 /* TMP2 may be destroyed by peek_char. */
4893 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4894 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4895#elif defined SUPPORT_UTF
4896 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4897 jump = NULL;
4898 if (common->utf)
4899 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4900#endif
4901 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4902 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4903 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4904#ifndef COMPILE_PCRE8
4905 JUMPHERE(jump);
4906#elif defined SUPPORT_UTF
4907 if (jump != NULL)
4908 JUMPHERE(jump);
4909#endif /* COMPILE_PCRE8 */
4910 }
4911set_jumps(skipread_list, LABEL());
4912
4913OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4914sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4915}
4916
4917static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4918{
4919/* May destroy TMP1. */
4920DEFINE_COMPILER;
4921int ranges[MAX_RANGE_SIZE];
4922sljit_u8 bit, cbit, all;
4923int i, byte, length = 0;
4924
4925bit = bits[0] & 0x1;
4926/* All bits will be zero or one (since bit is zero or one). */
4927all = -bit;
4928
4929for (i = 0; i < 256; )
4930 {
4931 byte = i >> 3;
4932 if ((i & 0x7) == 0 && bits[byte] == all)
4933 i += 8;
4934 else
4935 {
4936 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4937 if (cbit != bit)
4938 {
4939 if (length >= MAX_RANGE_SIZE)
4940 return FALSE;
4941 ranges[length] = i;
4942 length++;
4943 bit = cbit;
4944 all = -cbit;
4945 }
4946 i++;
4947 }
4948 }
4949
4950if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4951 {
4952 if (length >= MAX_RANGE_SIZE)
4953 return FALSE;
4954 ranges[length] = 256;
4955 length++;
4956 }
4957
4958if (length < 0 || length > 4)
4959 return FALSE;
4960
4961bit = bits[0] & 0x1;
4962if (invert) bit ^= 0x1;
4963
4964/* No character is accepted. */
4965if (length == 0 && bit == 0)
4966 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4967
4968switch(length)
4969 {
4970 case 0:
4971 /* When bit != 0, all characters are accepted. */
4972 return TRUE;
4973
4974 case 1:
4975 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4976 return TRUE;
4977
4978 case 2:
4979 if (ranges[0] + 1 != ranges[1])
4980 {
4981 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4982 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4983 }
4984 else
4985 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4986 return TRUE;
4987
4988 case 3:
4989 if (bit != 0)
4990 {
4991 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4992 if (ranges[0] + 1 != ranges[1])
4993 {
4994 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4995 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4996 }
4997 else
4998 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4999 return TRUE;
5000 }
5001
5002 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5003 if (ranges[1] + 1 != ranges[2])
5004 {
5005 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5006 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5007 }
5008 else
5009 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5010 return TRUE;
5011
5012 case 4:
5013 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5014 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5015 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5016 && is_powerof2(ranges[2] - ranges[0]))
5017 {
5018 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5019 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5020 if (ranges[2] + 1 != ranges[3])
5021 {
5022 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5023 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5024 }
5025 else
5026 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5027 return TRUE;
5028 }
5029
5030 if (bit != 0)
5031 {
5032 i = 0;
5033 if (ranges[0] + 1 != ranges[1])
5034 {
5035 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5036 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5037 i = ranges[0];
5038 }
5039 else
5040 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5041
5042 if (ranges[2] + 1 != ranges[3])
5043 {
5044 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5045 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5046 }
5047 else
5048 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5049 return TRUE;
5050 }
5051
5052 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5053 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5054 if (ranges[1] + 1 != ranges[2])
5055 {
5056 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5057 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5058 }
5059 else
5060 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5061 return TRUE;
5062
5063 default:
5064 SLJIT_ASSERT_STOP();
5065 return FALSE;
5066 }
5067}
5068
5069static void check_anynewline(compiler_common *common)
5070{
5071/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5072DEFINE_COMPILER;
5073
5074sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5075
5076OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5077OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5078OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5079OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5080#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5081#ifdef COMPILE_PCRE8
5082if (common->utf)
5083 {
5084#endif
5085 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5086 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5087 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5088#ifdef COMPILE_PCRE8
5089 }
5090#endif
5091#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5092OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5093sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5094}
5095
5096static void check_hspace(compiler_common *common)
5097{
5098/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5099DEFINE_COMPILER;
5100
5101sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5102
5103OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5104OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5105OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5106OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5107OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5108#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5109#ifdef COMPILE_PCRE8
5110if (common->utf)
5111 {
5112#endif
5113 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5114 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5115 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5116 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5117 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5118 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5119 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5120 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5121 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5122 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5123 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5124 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5125 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5126#ifdef COMPILE_PCRE8
5127 }
5128#endif
5129#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5130OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5131
5132sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5133}
5134
5135static void check_vspace(compiler_common *common)
5136{
5137/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5138DEFINE_COMPILER;
5139
5140sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5141
5142OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5143OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5144OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5145OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5146#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5147#ifdef COMPILE_PCRE8
5148if (common->utf)
5149 {
5150#endif
5151 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5152 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5154#ifdef COMPILE_PCRE8
5155 }
5156#endif
5157#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5158OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5159
5160sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5161}
5162
5163#define CHAR1 STR_END
5164#define CHAR2 STACK_TOP
5165
5166static void do_casefulcmp(compiler_common *common)
5167{
5168DEFINE_COMPILER;
5169struct sljit_jump *jump;
5170struct sljit_label *label;
5171
5172sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5173OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5174OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
5175OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
5176OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5177OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5178
5179label = LABEL();
5180OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5181OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5182jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5183OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5184JUMPTO(SLJIT_NOT_ZERO, label);
5185
5186JUMPHERE(jump);
5187OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5188OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
5189OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5190sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5191}
5192
5193#define LCC_TABLE STACK_LIMIT
5194
5195static void do_caselesscmp(compiler_common *common)
5196{
5197DEFINE_COMPILER;
5198struct sljit_jump *jump;
5199struct sljit_label *label;
5200
5201sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5202OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5203
5204OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
5205OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
5206OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
5207OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
5208OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5209OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5210
5211label = LABEL();
5212OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5213OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5214#ifndef COMPILE_PCRE8
5215jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
5216#endif
5217OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
5218#ifndef COMPILE_PCRE8
5219JUMPHERE(jump);
5220jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
5221#endif
5222OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
5223#ifndef COMPILE_PCRE8
5224JUMPHERE(jump);
5225#endif
5226jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5227OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5228JUMPTO(SLJIT_NOT_ZERO, label);
5229
5230JUMPHERE(jump);
5231OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5232OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
5233OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5234OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5235sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5236}
5237
5238#undef LCC_TABLE
5239#undef CHAR1
5240#undef CHAR2
5241
5242#if defined SUPPORT_UTF && defined SUPPORT_UCP
5243
5244static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
5245{
5246/* This function would be ineffective to do in JIT level. */
5247sljit_u32 c1, c2;
5248const pcre_uchar *src2 = args->uchar_ptr;
5249const pcre_uchar *end2 = args->end;
5250const ucd_record *ur;
5251const sljit_u32 *pp;
5252
5253while (src1 < end1)
5254 {
5255 if (src2 >= end2)
5256 return (pcre_uchar*)1;
5257 GETCHARINC(c1, src1);
5258 GETCHARINC(c2, src2);
5259 ur = GET_UCD(c2);
5260 if (c1 != c2 && c1 != c2 + ur->other_case)
5261 {
5262 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5263 for (;;)
5264 {
5265 if (c1 < *pp) return NULL;
5266 if (c1 == *pp++) break;
5267 }
5268 }
5269 }
5270return src2;
5271}
5272
5273#endif /* SUPPORT_UTF && SUPPORT_UCP */
5274
5275static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5276 compare_context *context, jump_list **backtracks)
5277{
5278DEFINE_COMPILER;
5279unsigned int othercasebit = 0;
5280pcre_uchar *othercasechar = NULL;
5281#ifdef SUPPORT_UTF
5282int utflength;
5283#endif
5284
5285if (caseless && char_has_othercase(common, cc))
5286 {
5287 othercasebit = char_get_othercase_bit(common, cc);
5288 SLJIT_ASSERT(othercasebit);
5289 /* Extracting bit difference info. */
5290#if defined COMPILE_PCRE8
5291 othercasechar = cc + (othercasebit >> 8);
5292 othercasebit &= 0xff;
5293#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5294 /* Note that this code only handles characters in the BMP. If there
5295 ever are characters outside the BMP whose othercase differs in only one
5296 bit from itself (there currently are none), this code will need to be
5297 revised for COMPILE_PCRE32. */
5298 othercasechar = cc + (othercasebit >> 9);
5299 if ((othercasebit & 0x100) != 0)
5300 othercasebit = (othercasebit & 0xff) << 8;
5301 else
5302 othercasebit &= 0xff;
5303#endif /* COMPILE_PCRE[8|16|32] */
5304 }
5305
5306if (context->sourcereg == -1)
5307 {
5308#if defined COMPILE_PCRE8
5309#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5310 if (context->length >= 4)
5311 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5312 else if (context->length >= 2)
5313 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5314 else
5315#endif
5316 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5317#elif defined COMPILE_PCRE16
5318#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5319 if (context->length >= 4)
5320 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5321 else
5322#endif
5323 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5324#elif defined COMPILE_PCRE32
5325 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5326#endif /* COMPILE_PCRE[8|16|32] */
5327 context->sourcereg = TMP2;
5328 }
5329
5330#ifdef SUPPORT_UTF
5331utflength = 1;
5332if (common->utf && HAS_EXTRALEN(*cc))
5333 utflength += GET_EXTRALEN(*cc);
5334
5335do
5336 {
5337#endif
5338
5339 context->length -= IN_UCHARS(1);
5340#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5341
5342 /* Unaligned read is supported. */
5343 if (othercasebit != 0 && othercasechar == cc)
5344 {
5345 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5346 context->oc.asuchars[context->ucharptr] = othercasebit;
5347 }
5348 else
5349 {
5350 context->c.asuchars[context->ucharptr] = *cc;
5351 context->oc.asuchars[context->ucharptr] = 0;
5352 }
5353 context->ucharptr++;
5354
5355#if defined COMPILE_PCRE8
5356 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5357#else
5358 if (context->ucharptr >= 2 || context->length == 0)
5359#endif
5360 {
5361 if (context->length >= 4)
5362 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5363 else if (context->length >= 2)
5364 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5365#if defined COMPILE_PCRE8
5366 else if (context->length >= 1)
5367 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5368#endif /* COMPILE_PCRE8 */
5369 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5370
5371 switch(context->ucharptr)
5372 {
5373 case 4 / sizeof(pcre_uchar):
5374 if (context->oc.asint != 0)
5375 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5376 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5377 break;
5378
5379 case 2 / sizeof(pcre_uchar):
5380 if (context->oc.asushort != 0)
5381 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5382 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5383 break;
5384
5385#ifdef COMPILE_PCRE8
5386 case 1:
5387 if (context->oc.asbyte != 0)
5388 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5389 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5390 break;
5391#endif
5392
5393 default:
5394 SLJIT_ASSERT_STOP();
5395 break;
5396 }
5397 context->ucharptr = 0;
5398 }
5399
5400#else
5401
5402 /* Unaligned read is unsupported or in 32 bit mode. */
5403 if (context->length >= 1)
5404 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5405
5406 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5407
5408 if (othercasebit != 0 && othercasechar == cc)
5409 {
5410 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5411 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5412 }
5413 else
5414 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5415
5416#endif
5417
5418 cc++;
5419#ifdef SUPPORT_UTF
5420 utflength--;
5421 }
5422while (utflength > 0);
5423#endif
5424
5425return cc;
5426}
5427
5428#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5429
5430#define SET_TYPE_OFFSET(value) \
5431 if ((value) != typeoffset) \
5432 { \
5433 if ((value) < typeoffset) \
5434 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5435 else \
5436 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5437 } \
5438 typeoffset = (value);
5439
5440#define SET_CHAR_OFFSET(value) \
5441 if ((value) != charoffset) \
5442 { \
5443 if ((value) < charoffset) \
5444 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5445 else \
5446 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5447 } \
5448 charoffset = (value);
5449
5450static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5451
5452static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5453{
5454DEFINE_COMPILER;
5455jump_list *found = NULL;
5456jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5457sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5458struct sljit_jump *jump = NULL;
5459pcre_uchar *ccbegin;
5460int compares, invertcmp, numberofcmps;
5461#if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5462BOOL utf = common->utf;
5463#endif
5464
5465#ifdef SUPPORT_UCP
5466BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5467BOOL charsaved = FALSE;
5468int typereg = TMP1;
5469const sljit_u32 *other_cases;
5470sljit_uw typeoffset;
5471#endif
5472
5473/* Scanning the necessary info. */
5474cc++;
5475ccbegin = cc;
5476compares = 0;
5477if (cc[-1] & XCL_MAP)
5478 {
5479 min = 0;
5480 cc += 32 / sizeof(pcre_uchar);
5481 }
5482
5483while (*cc != XCL_END)
5484 {
5485 compares++;
5486 if (*cc == XCL_SINGLE)
5487 {
5488 cc ++;
5489 GETCHARINCTEST(c, cc);
5490 if (c > max) max = c;
5491 if (c < min) min = c;
5492#ifdef SUPPORT_UCP
5493 needschar = TRUE;
5494#endif
5495 }
5496 else if (*cc == XCL_RANGE)
5497 {
5498 cc ++;
5499 GETCHARINCTEST(c, cc);
5500 if (c < min) min = c;
5501 GETCHARINCTEST(c, cc);
5502 if (c > max) max = c;
5503#ifdef SUPPORT_UCP
5504 needschar = TRUE;
5505#endif
5506 }
5507#ifdef SUPPORT_UCP
5508 else
5509 {
5510 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5511 cc++;
5512 if (*cc == PT_CLIST)
5513 {
5514 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5515 while (*other_cases != NOTACHAR)
5516 {
5517 if (*other_cases > max) max = *other_cases;
5518 if (*other_cases < min) min = *other_cases;
5519 other_cases++;
5520 }
5521 }
5522 else
5523 {
5524 max = READ_CHAR_MAX;
5525 min = 0;
5526 }
5527
5528 switch(*cc)
5529 {
5530 case PT_ANY:
5531 /* Any either accepts everything or ignored. */
5532 if (cc[-1] == XCL_PROP)
5533 {
5534 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5535 if (list == backtracks)
5536 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5537 return;
5538 }
5539 break;
5540
5541 case PT_LAMP:
5542 case PT_GC:
5543 case PT_PC:
5544 case PT_ALNUM:
5545 needstype = TRUE;
5546 break;
5547
5548 case PT_SC:
5549 needsscript = TRUE;
5550 break;
5551
5552 case PT_SPACE:
5553 case PT_PXSPACE:
5554 case PT_WORD:
5555 case PT_PXGRAPH:
5556 case PT_PXPRINT:
5557 case PT_PXPUNCT:
5558 needstype = TRUE;
5559 needschar = TRUE;
5560 break;
5561
5562 case PT_CLIST:
5563 case PT_UCNC:
5564 needschar = TRUE;
5565 break;
5566
5567 default:
5568 SLJIT_ASSERT_STOP();
5569 break;
5570 }
5571 cc += 2;
5572 }
5573#endif
5574 }
5575SLJIT_ASSERT(compares > 0);
5576
5577/* We are not necessary in utf mode even in 8 bit mode. */
5578cc = ccbegin;
5579read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5580
5581if ((cc[-1] & XCL_HASPROP) == 0)
5582 {
5583 if ((cc[-1] & XCL_MAP) != 0)
5584 {
5585 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5586 if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5587 {
5588 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5589 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5590 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5591 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5592 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5593 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5594 }
5595
5596 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5597 JUMPHERE(jump);
5598
5599 cc += 32 / sizeof(pcre_uchar);
5600 }
5601 else
5602 {
5603 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5604 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5605 }
5606 }
5607else if ((cc[-1] & XCL_MAP) != 0)
5608 {
5609 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5610#ifdef SUPPORT_UCP
5611 charsaved = TRUE;
5612#endif
5613 if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5614 {
5615#ifdef COMPILE_PCRE8
5616 jump = NULL;
5617 if (common->utf)
5618#endif
5619 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5620
5621 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5622 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5623 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5624 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5625 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5626 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5627
5628#ifdef COMPILE_PCRE8
5629 if (common->utf)
5630#endif
5631 JUMPHERE(jump);
5632 }
5633
5634 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5635 cc += 32 / sizeof(pcre_uchar);
5636 }
5637
5638#ifdef SUPPORT_UCP
5639if (needstype || needsscript)
5640 {
5641 if (needschar && !charsaved)
5642 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5643
5644 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5645 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5646 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5647 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5648 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5649 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5650 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5651
5652 /* Before anything else, we deal with scripts. */
5653 if (needsscript)
5654 {
5655 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5656 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5657
5658 ccbegin = cc;
5659
5660 while (*cc != XCL_END)
5661 {
5662 if (*cc == XCL_SINGLE)
5663 {
5664 cc ++;
5665 GETCHARINCTEST(c, cc);
5666 }
5667 else if (*cc == XCL_RANGE)
5668 {
5669 cc ++;
5670 GETCHARINCTEST(c, cc);
5671 GETCHARINCTEST(c, cc);
5672 }
5673 else
5674 {
5675 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5676 cc++;
5677 if (*cc == PT_SC)
5678 {
5679 compares--;
5680 invertcmp = (compares == 0 && list != backtracks);
5681 if (cc[-1] == XCL_NOTPROP)
5682 invertcmp ^= 0x1;
5683 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5684 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5685 }
5686 cc += 2;
5687 }
5688 }
5689
5690 cc = ccbegin;
5691 }
5692
5693 if (needschar)
5694 {
5695 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5696 }
5697
5698 if (needstype)
5699 {
5700 if (!needschar)
5701 {
5702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5703 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5704 }
5705 else
5706 {
5707 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5708 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5709 typereg = RETURN_ADDR;
5710 }
5711 }
5712 }
5713#endif
5714
5715/* Generating code. */
5716charoffset = 0;
5717numberofcmps = 0;
5718#ifdef SUPPORT_UCP
5719typeoffset = 0;
5720#endif
5721
5722while (*cc != XCL_END)
5723 {
5724 compares--;
5725 invertcmp = (compares == 0 && list != backtracks);
5726 jump = NULL;
5727
5728 if (*cc == XCL_SINGLE)
5729 {
5730 cc ++;
5731 GETCHARINCTEST(c, cc);
5732
5733 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5734 {
5735 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5736 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5737 numberofcmps++;
5738 }
5739 else if (numberofcmps > 0)
5740 {
5741 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5742 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5743 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5744 numberofcmps = 0;
5745 }
5746 else
5747 {
5748 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5749 numberofcmps = 0;
5750 }
5751 }
5752 else if (*cc == XCL_RANGE)
5753 {
5754 cc ++;
5755 GETCHARINCTEST(c, cc);
5756 SET_CHAR_OFFSET(c);
5757 GETCHARINCTEST(c, cc);
5758
5759 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5760 {
5761 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5762 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5763 numberofcmps++;
5764 }
5765 else if (numberofcmps > 0)
5766 {
5767 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5768 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5769 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5770 numberofcmps = 0;
5771 }
5772 else
5773 {
5774 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5775 numberofcmps = 0;
5776 }
5777 }
5778#ifdef SUPPORT_UCP
5779 else
5780 {
5781 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5782 if (*cc == XCL_NOTPROP)
5783 invertcmp ^= 0x1;
5784 cc++;
5785 switch(*cc)
5786 {
5787 case PT_ANY:
5788 if (!invertcmp)
5789 jump = JUMP(SLJIT_JUMP);
5790 break;
5791
5792 case PT_LAMP:
5793 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5794 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5795 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5796 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5797 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5798 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5799 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5800 break;
5801
5802 case PT_GC:
5803 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5804 SET_TYPE_OFFSET(c);
5805 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5806 break;
5807
5808 case PT_PC:
5809 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5810 break;
5811
5812 case PT_SC:
5813 compares++;
5814 /* Do nothing. */
5815 break;
5816
5817 case PT_SPACE:
5818 case PT_PXSPACE:
5819 SET_CHAR_OFFSET(9);
5820 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5821 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5822
5823 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5824 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5825
5826 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5827 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5828
5829 SET_TYPE_OFFSET(ucp_Zl);
5830 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5831 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5832 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5833 break;
5834
5835 case PT_WORD:
5836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5837 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5838 /* Fall through. */
5839
5840 case PT_ALNUM:
5841 SET_TYPE_OFFSET(ucp_Ll);
5842 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5843 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5844 SET_TYPE_OFFSET(ucp_Nd);
5845 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5846 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5847 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5848 break;
5849
5850 case PT_CLIST:
5851 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5852
5853 /* At least three characters are required.
5854 Otherwise this case would be handled by the normal code path. */
5855 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5856 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5857
5858 /* Optimizing character pairs, if their difference is power of 2. */
5859 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5860 {
5861 if (charoffset == 0)
5862 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5863 else
5864 {
5865 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5866 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5867 }
5868 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5869 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5870 other_cases += 2;
5871 }
5872 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5873 {
5874 if (charoffset == 0)
5875 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5876 else
5877 {
5878 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5879 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5880 }
5881 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5882 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5883
5884 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5885 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5886
5887 other_cases += 3;
5888 }
5889 else
5890 {
5891 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5892 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5893 }
5894
5895 while (*other_cases != NOTACHAR)
5896 {
5897 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5898 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5899 }
5900 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5901 break;
5902
5903 case PT_UCNC:
5904 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5905 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5906 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5907 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5908 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5909 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5910
5911 SET_CHAR_OFFSET(0xa0);
5912 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5913 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5914 SET_CHAR_OFFSET(0);
5915 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5916 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5917 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5918 break;
5919
5920 case PT_PXGRAPH:
5921 /* C and Z groups are the farthest two groups. */
5922 SET_TYPE_OFFSET(ucp_Ll);
5923 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5924 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5925
5926 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5927
5928 /* In case of ucp_Cf, we overwrite the result. */
5929 SET_CHAR_OFFSET(0x2066);
5930 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5931 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5932
5933 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5934 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5935
5936 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5937 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5938
5939 JUMPHERE(jump);
5940 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5941 break;
5942
5943 case PT_PXPRINT:
5944 /* C and Z groups are the farthest two groups. */
5945 SET_TYPE_OFFSET(ucp_Ll);
5946 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5947 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5948
5949 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5950 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5951
5952 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5953
5954 /* In case of ucp_Cf, we overwrite the result. */
5955 SET_CHAR_OFFSET(0x2066);
5956 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5957 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5958
5959 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5960 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5961
5962 JUMPHERE(jump);
5963 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5964 break;
5965
5966 case PT_PXPUNCT:
5967 SET_TYPE_OFFSET(ucp_Sc);
5968 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5969 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5970
5971 SET_CHAR_OFFSET(0);
5972 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5973 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5974
5975 SET_TYPE_OFFSET(ucp_Pc);
5976 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5977 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5978 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5979 break;
5980
5981 default:
5982 SLJIT_ASSERT_STOP();
5983 break;
5984 }
5985 cc += 2;
5986 }
5987#endif
5988
5989 if (jump != NULL)
5990 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5991 }
5992
5993if (found != NULL)
5994 set_jumps(found, LABEL());
5995}
5996
5997#undef SET_TYPE_OFFSET
5998#undef SET_CHAR_OFFSET
5999
6000#endif
6001
6002static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6003{
6004DEFINE_COMPILER;
6005int length;
6006struct sljit_jump *jump[4];
6007#ifdef SUPPORT_UTF
6008struct sljit_label *label;
6009#endif /* SUPPORT_UTF */
6010
6011switch(type)
6012 {
6013 case OP_SOD:
6014 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6015 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6016 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6017 return cc;
6018
6019 case OP_SOM:
6020 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6021 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6022 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6023 return cc;
6024
6025 case OP_NOT_WORD_BOUNDARY:
6026 case OP_WORD_BOUNDARY:
6027 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6028 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6029 return cc;
6030
6031 case OP_EODN:
6032 /* Requires rather complex checks. */
6033 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6034 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6035 {
6036 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6037 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6038 if (common->mode == JIT_COMPILE)
6039 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6040 else
6041 {
6042 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6043 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6044 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
6045 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6046 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
6047 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6048 check_partial(common, TRUE);
6049 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6050 JUMPHERE(jump[1]);
6051 }
6052 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6053 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6054 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6055 }
6056 else if (common->nltype == NLTYPE_FIXED)
6057 {
6058 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6059 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6060 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6061 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6062 }
6063 else
6064 {
6065 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6066 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6067 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6068 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6069 jump[2] = JUMP(SLJIT_GREATER);
6070 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
6071 /* Equal. */
6072 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6073 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6074 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6075
6076 JUMPHERE(jump[1]);
6077 if (common->nltype == NLTYPE_ANYCRLF)
6078 {
6079 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6080 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6081 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6082 }
6083 else
6084 {
6085 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6086 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6087 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6088 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6089 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6090 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6091 }
6092 JUMPHERE(jump[2]);
6093 JUMPHERE(jump[3]);
6094 }
6095 JUMPHERE(jump[0]);
6096 check_partial(common, FALSE);
6097 return cc;
6098
6099 case OP_EOD:
6100 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6101 check_partial(common, FALSE);
6102 return cc;
6103
6104 case OP_DOLL:
6105 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6106 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6107 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6108
6109 if (!common->endonly)
6110 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6111 else
6112 {
6113 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6114 check_partial(common, FALSE);
6115 }
6116 return cc;
6117
6118 case OP_DOLLM:
6119 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6120 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6121 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6122 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6123 check_partial(common, FALSE);
6124 jump[0] = JUMP(SLJIT_JUMP);
6125 JUMPHERE(jump[1]);
6126
6127 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6128 {
6129 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6130 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6131 if (common->mode == JIT_COMPILE)
6132 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6133 else
6134 {
6135 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6136 /* STR_PTR = STR_END - IN_UCHARS(1) */
6137 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6138 check_partial(common, TRUE);
6139 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6140 JUMPHERE(jump[1]);
6141 }
6142
6143 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6144 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6145 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6146 }
6147 else
6148 {
6149 peek_char(common, common->nlmax);
6150 check_newlinechar(common, common->nltype, backtracks, FALSE);
6151 }
6152 JUMPHERE(jump[0]);
6153 return cc;
6154
6155 case OP_CIRC:
6156 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6157 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6158 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6159 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6160 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6161 return cc;
6162
6163 case OP_CIRCM:
6164 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6165 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6166 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6167 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6168 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6169 jump[0] = JUMP(SLJIT_JUMP);
6170 JUMPHERE(jump[1]);
6171
6172 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6173 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6174 {
6175 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6176 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6177 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6178 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6179 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6180 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6181 }
6182 else
6183 {
6184 skip_char_back(common);
6185 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6186 check_newlinechar(common, common->nltype, backtracks, FALSE);
6187 }
6188 JUMPHERE(jump[0]);
6189 return cc;
6190
6191 case OP_REVERSE:
6192 length = GET(cc, 0);
6193 if (length == 0)
6194 return cc + LINK_SIZE;
6195 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6196#ifdef SUPPORT_UTF
6197 if (common->utf)
6198 {
6199 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6201 label = LABEL();
6202 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6203 skip_char_back(common);
6204 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6205 JUMPTO(SLJIT_NOT_ZERO, label);
6206 }
6207 else
6208#endif
6209 {
6210 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6211 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6212 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6213 }
6214 check_start_used_ptr(common);
6215 return cc + LINK_SIZE;
6216 }
6217SLJIT_ASSERT_STOP();
6218return cc;
6219}
6220
6221static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6222{
6223DEFINE_COMPILER;
6224int length;
6225unsigned int c, oc, bit;
6226compare_context context;
6227struct sljit_jump *jump[3];
6228jump_list *end_list;
6229#ifdef SUPPORT_UTF
6230struct sljit_label *label;
6231#ifdef SUPPORT_UCP
6232pcre_uchar propdata[5];
6233#endif
6234#endif /* SUPPORT_UTF */
6235
6236switch(type)
6237 {
6238 case OP_NOT_DIGIT:
6239 case OP_DIGIT:
6240 /* Digits are usually 0-9, so it is worth to optimize them. */
6241 if (check_str_ptr)
6242 detect_partial_match(common, backtracks);
6243#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6244 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6245 read_char7_type(common, type == OP_NOT_DIGIT);
6246 else
6247#endif
6248 read_char8_type(common, type == OP_NOT_DIGIT);
6249 /* Flip the starting bit in the negative case. */
6250 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6251 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6252 return cc;
6253
6254 case OP_NOT_WHITESPACE:
6255 case OP_WHITESPACE:
6256 if (check_str_ptr)
6257 detect_partial_match(common, backtracks);
6258#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6259 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6260 read_char7_type(common, type == OP_NOT_WHITESPACE);
6261 else
6262#endif
6263 read_char8_type(common, type == OP_NOT_WHITESPACE);
6264 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6265 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6266 return cc;
6267
6268 case OP_NOT_WORDCHAR:
6269 case OP_WORDCHAR:
6270 if (check_str_ptr)
6271 detect_partial_match(common, backtracks);
6272#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6273 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6274 read_char7_type(common, type == OP_NOT_WORDCHAR);
6275 else
6276#endif
6277 read_char8_type(common, type == OP_NOT_WORDCHAR);
6278 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6279 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6280 return cc;
6281
6282 case OP_ANY:
6283 if (check_str_ptr)
6284 detect_partial_match(common, backtracks);
6285 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6286 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6287 {
6288 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6289 end_list = NULL;
6290 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6291 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6292 else
6293 check_str_end(common, &end_list);
6294
6295 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6296 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6297 set_jumps(end_list, LABEL());
6298 JUMPHERE(jump[0]);
6299 }
6300 else
6301 check_newlinechar(common, common->nltype, backtracks, TRUE);
6302 return cc;
6303
6304 case OP_ALLANY:
6305 if (check_str_ptr)
6306 detect_partial_match(common, backtracks);
6307#ifdef SUPPORT_UTF
6308 if (common->utf)
6309 {
6310 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6311 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6312#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6313#if defined COMPILE_PCRE8
6314 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6315 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6316 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6317#elif defined COMPILE_PCRE16
6318 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6319 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6320 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6321 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
6322 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6323 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6324#endif
6325 JUMPHERE(jump[0]);
6326#endif /* COMPILE_PCRE[8|16] */
6327 return cc;
6328 }
6329#endif
6330 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6331 return cc;
6332
6333 case OP_ANYBYTE:
6334 if (check_str_ptr)
6335 detect_partial_match(common, backtracks);
6336 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6337 return cc;
6338
6339#ifdef SUPPORT_UTF
6340#ifdef SUPPORT_UCP
6341 case OP_NOTPROP:
6342 case OP_PROP:
6343 propdata[0] = XCL_HASPROP;
6344 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6345 propdata[2] = cc[0];
6346 propdata[3] = cc[1];
6347 propdata[4] = XCL_END;
6348 if (check_str_ptr)
6349 detect_partial_match(common, backtracks);
6350 compile_xclass_matchingpath(common, propdata, backtracks);
6351 return cc + 2;
6352#endif
6353#endif
6354
6355 case OP_ANYNL:
6356 if (check_str_ptr)
6357 detect_partial_match(common, backtracks);
6358 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6359 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6360 /* We don't need to handle soft partial matching case. */
6361 end_list = NULL;
6362 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6363 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6364 else
6365 check_str_end(common, &end_list);
6366 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6367 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6369 jump[2] = JUMP(SLJIT_JUMP);
6370 JUMPHERE(jump[0]);
6371 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6372 set_jumps(end_list, LABEL());
6373 JUMPHERE(jump[1]);
6374 JUMPHERE(jump[2]);
6375 return cc;
6376
6377 case OP_NOT_HSPACE:
6378 case OP_HSPACE:
6379 if (check_str_ptr)
6380 detect_partial_match(common, backtracks);
6381 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6382 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6383 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6384 return cc;
6385
6386 case OP_NOT_VSPACE:
6387 case OP_VSPACE:
6388 if (check_str_ptr)
6389 detect_partial_match(common, backtracks);
6390 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6391 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6392 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6393 return cc;
6394
6395#ifdef SUPPORT_UCP
6396 case OP_EXTUNI:
6397 if (check_str_ptr)
6398 detect_partial_match(common, backtracks);
6399 read_char(common);
6400 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6401 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6402 /* Optimize register allocation: use a real register. */
6403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6404 OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6405
6406 label = LABEL();
6407 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6408 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6409 read_char(common);
6410 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6411 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6412 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6413
6414 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6415 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6416 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6417 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6418 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6419 JUMPTO(SLJIT_NOT_ZERO, label);
6420
6421 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6422 JUMPHERE(jump[0]);
6423 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6424
6425 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6426 {
6427 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6428 /* Since we successfully read a char above, partial matching must occur. */
6429 check_partial(common, TRUE);
6430 JUMPHERE(jump[0]);
6431 }
6432 return cc;
6433#endif
6434
6435 case OP_CHAR:
6436 case OP_CHARI:
6437 length = 1;
6438#ifdef SUPPORT_UTF
6439 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6440#endif
6441 if (common->mode == JIT_COMPILE && check_str_ptr
6442 && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6443 {
6444 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6445 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6446
6447 context.length = IN_UCHARS(length);
6448 context.sourcereg = -1;
6449#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6450 context.ucharptr = 0;
6451#endif
6452 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6453 }
6454
6455 if (check_str_ptr)
6456 detect_partial_match(common, backtracks);
6457#ifdef SUPPORT_UTF
6458 if (common->utf)
6459 {
6460 GETCHAR(c, cc);
6461 }
6462 else
6463#endif
6464 c = *cc;
6465
6466 if (type == OP_CHAR || !char_has_othercase(common, cc))
6467 {
6468 read_char_range(common, c, c, FALSE);
6469 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6470 return cc + length;
6471 }
6472 oc = char_othercase(common, c);
6473 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6474 bit = c ^ oc;
6475 if (is_powerof2(bit))
6476 {
6477 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6478 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6479 return cc + length;
6480 }
6481 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6482 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6483 JUMPHERE(jump[0]);
6484 return cc + length;
6485
6486 case OP_NOT:
6487 case OP_NOTI:
6488 if (check_str_ptr)
6489 detect_partial_match(common, backtracks);
6490 length = 1;
6491#ifdef SUPPORT_UTF
6492 if (common->utf)
6493 {
6494#ifdef COMPILE_PCRE8
6495 c = *cc;
6496 if (c < 128)
6497 {
6498 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6499 if (type == OP_NOT || !char_has_othercase(common, cc))
6500 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6501 else
6502 {
6503 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6504 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6505 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6506 }
6507 /* Skip the variable-length character. */
6508 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6509 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6510 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6511 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6512 JUMPHERE(jump[0]);
6513 return cc + 1;
6514 }
6515 else
6516#endif /* COMPILE_PCRE8 */
6517 {
6518 GETCHARLEN(c, cc, length);
6519 }
6520 }
6521 else
6522#endif /* SUPPORT_UTF */
6523 c = *cc;
6524
6525 if (type == OP_NOT || !char_has_othercase(common, cc))
6526 {
6527 read_char_range(common, c, c, TRUE);
6528 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6529 }
6530 else
6531 {
6532 oc = char_othercase(common, c);
6533 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6534 bit = c ^ oc;
6535 if (is_powerof2(bit))
6536 {
6537 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6538 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6539 }
6540 else
6541 {
6542 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6543 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6544 }
6545 }
6546 return cc + length;
6547
6548 case OP_CLASS:
6549 case OP_NCLASS:
6550 if (check_str_ptr)
6551 detect_partial_match(common, backtracks);
6552
6553#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6554 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6555 read_char_range(common, 0, bit, type == OP_NCLASS);
6556#else
6557 read_char_range(common, 0, 255, type == OP_NCLASS);
6558#endif
6559
6560 if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6561 return cc + 32 / sizeof(pcre_uchar);
6562
6563#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6564 jump[0] = NULL;
6565 if (common->utf)
6566 {
6567 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6568 if (type == OP_CLASS)
6569 {
6570 add_jump(compiler, backtracks, jump[0]);
6571 jump[0] = NULL;
6572 }
6573 }
6574#elif !defined COMPILE_PCRE8
6575 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6576 if (type == OP_CLASS)
6577 {
6578 add_jump(compiler, backtracks, jump[0]);
6579 jump[0] = NULL;
6580 }
6581#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6582
6583 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6584 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6585 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6586 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6587 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6588 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6589
6590#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6591 if (jump[0] != NULL)
6592 JUMPHERE(jump[0]);
6593#endif
6594 return cc + 32 / sizeof(pcre_uchar);
6595
6596#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6597 case OP_XCLASS:
6598 if (check_str_ptr)
6599 detect_partial_match(common, backtracks);
6600 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6601 return cc + GET(cc, 0) - 1;
6602#endif
6603 }
6604SLJIT_ASSERT_STOP();
6605return cc;
6606}
6607
6608static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6609{
6610/* This function consumes at least one input character. */
6611/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6612DEFINE_COMPILER;
6613pcre_uchar *ccbegin = cc;
6614compare_context context;
6615int size;
6616
6617context.length = 0;
6618do
6619 {
6620 if (cc >= ccend)
6621 break;
6622
6623 if (*cc == OP_CHAR)
6624 {
6625 size = 1;
6626#ifdef SUPPORT_UTF
6627 if (common->utf && HAS_EXTRALEN(cc[1]))
6628 size += GET_EXTRALEN(cc[1]);
6629#endif
6630 }
6631 else if (*cc == OP_CHARI)
6632 {
6633 size = 1;
6634#ifdef SUPPORT_UTF
6635 if (common->utf)
6636 {
6637 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6638 size = 0;
6639 else if (HAS_EXTRALEN(cc[1]))
6640 size += GET_EXTRALEN(cc[1]);
6641 }
6642 else
6643#endif
6644 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6645 size = 0;
6646 }
6647 else
6648 size = 0;
6649
6650 cc += 1 + size;
6651 context.length += IN_UCHARS(size);
6652 }
6653while (size > 0 && context.length <= 128);
6654
6655cc = ccbegin;
6656if (context.length > 0)
6657 {
6658 /* We have a fixed-length byte sequence. */
6659 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6660 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6661
6662 context.sourcereg = -1;
6663#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6664 context.ucharptr = 0;
6665#endif
6666 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6667 return cc;
6668 }
6669
6670/* A non-fixed length character will be checked if length == 0. */
6671return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6672}
6673
6674/* Forward definitions. */
6675static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6676static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6677
6678#define PUSH_BACKTRACK(size, ccstart, error) \
6679 do \
6680 { \
6681 backtrack = sljit_alloc_memory(compiler, (size)); \
6682 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6683 return error; \
6684 memset(backtrack, 0, size); \
6685 backtrack->prev = parent->top; \
6686 backtrack->cc = (ccstart); \
6687 parent->top = backtrack; \
6688 } \
6689 while (0)
6690
6691#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6692 do \
6693 { \
6694 backtrack = sljit_alloc_memory(compiler, (size)); \
6695 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6696 return; \
6697 memset(backtrack, 0, size); \
6698 backtrack->prev = parent->top; \
6699 backtrack->cc = (ccstart); \
6700 parent->top = backtrack; \
6701 } \
6702 while (0)
6703
6704#define BACKTRACK_AS(type) ((type *)backtrack)
6705
6706static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6707{
6708/* The OVECTOR offset goes to TMP2. */
6709DEFINE_COMPILER;
6710int count = GET2(cc, 1 + IMM2_SIZE);
6711pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6712unsigned int offset;
6713jump_list *found = NULL;
6714
6715SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6716
6717OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6718
6719count--;
6720while (count-- > 0)
6721 {
6722 offset = GET2(slot, 0) << 1;
6723 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6724 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6725 slot += common->name_entry_size;
6726 }
6727
6728offset = GET2(slot, 0) << 1;
6729GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6730if (backtracks != NULL && !common->jscript_compat)
6731 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6732
6733set_jumps(found, LABEL());
6734}
6735
6736static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6737{
6738DEFINE_COMPILER;
6739BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6740int offset = 0;
6741struct sljit_jump *jump = NULL;
6742struct sljit_jump *partial;
6743struct sljit_jump *nopartial;
6744
6745if (ref)
6746 {
6747 offset = GET2(cc, 1) << 1;
6748 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6749 /* OVECTOR(1) contains the "string begin - 1" constant. */
6750 if (withchecks && !common->jscript_compat)
6751 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6752 }
6753else
6754 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6755
6756#if defined SUPPORT_UTF && defined SUPPORT_UCP
6757if (common->utf && *cc == OP_REFI)
6758 {
6759 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6760 if (ref)
6761 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6762 else
6763 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6764
6765 if (withchecks)
6766 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6767
6768 /* Needed to save important temporary registers. */
6769 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6770 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6772 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6773 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6774 if (common->mode == JIT_COMPILE)
6775 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6776 else
6777 {
6778 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6779 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6780 check_partial(common, FALSE);
6781 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6782 JUMPHERE(nopartial);
6783 }
6784 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6785 }
6786else
6787#endif /* SUPPORT_UTF && SUPPORT_UCP */
6788 {
6789 if (ref)
6790 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6791 else
6792 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6793
6794 if (withchecks)
6795 jump = JUMP(SLJIT_ZERO);
6796
6797 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6798 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6799 if (common->mode == JIT_COMPILE)
6800 add_jump(compiler, backtracks, partial);
6801
6802 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6803 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6804
6805 if (common->mode != JIT_COMPILE)
6806 {
6807 nopartial = JUMP(SLJIT_JUMP);
6808 JUMPHERE(partial);
6809 /* TMP2 -= STR_END - STR_PTR */
6810 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6811 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6812 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6813 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6814 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6815 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6816 JUMPHERE(partial);
6817 check_partial(common, FALSE);
6818 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6819 JUMPHERE(nopartial);
6820 }
6821 }
6822
6823if (jump != NULL)
6824 {
6825 if (emptyfail)
6826 add_jump(compiler, backtracks, jump);
6827 else
6828 JUMPHERE(jump);
6829 }
6830}
6831
6832static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6833{
6834DEFINE_COMPILER;
6835BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6836backtrack_common *backtrack;
6837pcre_uchar type;
6838int offset = 0;
6839struct sljit_label *label;
6840struct sljit_jump *zerolength;
6841struct sljit_jump *jump = NULL;
6842pcre_uchar *ccbegin = cc;
6843int min = 0, max = 0;
6844BOOL minimize;
6845
6846PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
6847
6848if (ref)
6849 offset = GET2(cc, 1) << 1;
6850else
6851 cc += IMM2_SIZE;
6852type = cc[1 + IMM2_SIZE];
6853
6854SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6855minimize = (type & 0x1) != 0;
6856switch(type)
6857 {
6858 case OP_CRSTAR:
6859 case OP_CRMINSTAR:
6860 min = 0;
6861 max = 0;
6862 cc += 1 + IMM2_SIZE + 1;
6863 break;
6864 case OP_CRPLUS:
6865 case OP_CRMINPLUS:
6866 min = 1;
6867 max = 0;
6868 cc += 1 + IMM2_SIZE + 1;
6869 break;
6870 case OP_CRQUERY:
6871 case OP_CRMINQUERY:
6872 min = 0;
6873 max = 1;
6874 cc += 1 + IMM2_SIZE + 1;
6875 break;
6876 case OP_CRRANGE:
6877 case OP_CRMINRANGE:
6878 min = GET2(cc, 1 + IMM2_SIZE + 1);
6879 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6880 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6881 break;
6882 default:
6883 SLJIT_ASSERT_STOP();
6884 break;
6885 }
6886
6887if (!minimize)
6888 {
6889 if (min == 0)
6890 {
6891 allocate_stack(common, 2);
6892 if (ref)
6893 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6896 /* Temporary release of STR_PTR. */
6897 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6898 /* Handles both invalid and empty cases. Since the minimum repeat,
6899 is zero the invalid case is basically the same as an empty case. */
6900 if (ref)
6901 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6902 else
6903 {
6904 compile_dnref_search(common, ccbegin, NULL);
6905 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6906 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6907 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6908 }
6909 /* Restore if not zero length. */
6910 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6911 }
6912 else
6913 {
6914 allocate_stack(common, 1);
6915 if (ref)
6916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6918 if (ref)
6919 {
6920 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6921 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6922 }
6923 else
6924 {
6925 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6926 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6927 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6928 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6929 }
6930 }
6931
6932 if (min > 1 || max > 1)
6933 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6934
6935 label = LABEL();
6936 if (!ref)
6937 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6938 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6939
6940 if (min > 1 || max > 1)
6941 {
6942 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6943 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6944 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6945 if (min > 1)
6946 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6947 if (max > 1)
6948 {
6949 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6950 allocate_stack(common, 1);
6951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6952 JUMPTO(SLJIT_JUMP, label);
6953 JUMPHERE(jump);
6954 }
6955 }
6956
6957 if (max == 0)
6958 {
6959 /* Includes min > 1 case as well. */
6960 allocate_stack(common, 1);
6961 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6962 JUMPTO(SLJIT_JUMP, label);
6963 }
6964
6965 JUMPHERE(zerolength);
6966 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6967
6968 count_match(common);
6969 return cc;
6970 }
6971
6972allocate_stack(common, ref ? 2 : 3);
6973if (ref)
6974 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6975OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6976if (type != OP_CRMINSTAR)
6977 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6978
6979if (min == 0)
6980 {
6981 /* Handles both invalid and empty cases. Since the minimum repeat,
6982 is zero the invalid case is basically the same as an empty case. */
6983 if (ref)
6984 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6985 else
6986 {
6987 compile_dnref_search(common, ccbegin, NULL);
6988 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6989 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6990 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6991 }
6992 /* Length is non-zero, we can match real repeats. */
6993 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6994 jump = JUMP(SLJIT_JUMP);
6995 }
6996else
6997 {
6998 if (ref)
6999 {
7000 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7001 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7002 }
7003 else
7004 {
7005 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7006 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7007 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7008 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7009 }
7010 }
7011
7012BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7013if (max > 0)
7014 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
7015
7016if (!ref)
7017 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
7018compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
7019OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7020
7021if (min > 1)
7022 {
7023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7024 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7025 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7026 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
7027 }
7028else if (max > 0)
7029 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
7030
7031if (jump != NULL)
7032 JUMPHERE(jump);
7033JUMPHERE(zerolength);
7034
7035count_match(common);
7036return cc;
7037}
7038
7039static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7040{
7041DEFINE_COMPILER;
7042backtrack_common *backtrack;
7043recurse_entry *entry = common->entries;
7044recurse_entry *prev = NULL;
7045sljit_sw start = GET(cc, 1);
7046pcre_uchar *start_cc;
7047BOOL needs_control_head;
7048
7049PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
7050
7051/* Inlining simple patterns. */
7052if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
7053 {
7054 start_cc = common->start + start;
7055 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
7056 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
7057 return cc + 1 + LINK_SIZE;
7058 }
7059
7060while (entry != NULL)
7061 {
7062 if (entry->start == start)
7063 break;
7064 prev = entry;
7065 entry = entry->next;
7066 }
7067
7068if (entry == NULL)
7069 {
7070 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
7071 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7072 return NULL;
7073 entry->next = NULL;
7074 entry->entry = NULL;
7075 entry->calls = NULL;
7076 entry->start = start;
7077
7078 if (prev != NULL)
7079 prev->next = entry;
7080 else
7081 common->entries = entry;
7082 }
7083
7084if (common->has_set_som && common->mark_ptr != 0)
7085 {
7086 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7087 allocate_stack(common, 2);
7088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
7089 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7090 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7091 }
7092else if (common->has_set_som || common->mark_ptr != 0)
7093 {
7094 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
7095 allocate_stack(common, 1);
7096 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7097 }
7098
7099if (entry->entry == NULL)
7100 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
7101else
7102 JUMPTO(SLJIT_FAST_CALL, entry->entry);
7103/* Leave if the match is failed. */
7104add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
7105return cc + 1 + LINK_SIZE;
7106}
7107
7108static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
7109{
7110const pcre_uchar *begin = arguments->begin;
7111int *offset_vector = arguments->offsets;
7112int offset_count = arguments->offset_count;
7113int i;
7114
7115if (PUBL(callout) == NULL)
7116 return 0;
7117
7118callout_block->version = 2;
7119callout_block->callout_data = arguments->callout_data;
7120
7121/* Offsets in subject. */
7122callout_block->subject_length = arguments->end - arguments->begin;
7123callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
7124callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
7125#if defined COMPILE_PCRE8
7126callout_block->subject = (PCRE_SPTR)begin;
7127#elif defined COMPILE_PCRE16
7128callout_block->subject = (PCRE_SPTR16)begin;
7129#elif defined COMPILE_PCRE32
7130callout_block->subject = (PCRE_SPTR32)begin;
7131#endif
7132
7133/* Convert and copy the JIT offset vector to the offset_vector array. */
7134callout_block->capture_top = 0;
7135callout_block->offset_vector = offset_vector;
7136for (i = 2; i < offset_count; i += 2)
7137 {
7138 offset_vector[i] = jit_ovector[i] - begin;
7139 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
7140 if (jit_ovector[i] >= begin)
7141 callout_block->capture_top = i;
7142 }
7143
7144callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
7145if (offset_count > 0)
7146 offset_vector[0] = -1;
7147if (offset_count > 1)
7148 offset_vector[1] = -1;
7149return (*PUBL(callout))(callout_block);
7150}
7151
7152/* Aligning to 8 byte. */
7153#define CALLOUT_ARG_SIZE \
7154 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
7155
7156#define CALLOUT_ARG_OFFSET(arg) \
7157 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
7158
7159static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7160{
7161DEFINE_COMPILER;
7162backtrack_common *backtrack;
7163
7164PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7165
7166allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7167
7168SLJIT_ASSERT(common->capture_last_ptr != 0);
7169OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7170OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7171OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
7172OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
7173
7174/* These pointer sized fields temporarly stores internal variables. */
7175OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7176OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
7177OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
7178
7179if (common->mark_ptr != 0)
7180 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
7181OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
7182OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
7183OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
7184
7185/* Needed to save important temporary registers. */
7186OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
7187OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
7188GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
7189sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
7190OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
7191OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7192free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7193
7194/* Check return value. */
7195OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7196add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
7197if (common->forced_quit_label == NULL)
7198 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
7199else
7200 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
7201return cc + 2 + 2 * LINK_SIZE;
7202}
7203
7204#undef CALLOUT_ARG_SIZE
7205#undef CALLOUT_ARG_OFFSET
7206
7207static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
7208{
7209while (TRUE)
7210 {
7211 switch (*cc)
7212 {
7213 case OP_NOT_WORD_BOUNDARY:
7214 case OP_WORD_BOUNDARY:
7215 case OP_CIRC:
7216 case OP_CIRCM:
7217 case OP_DOLL:
7218 case OP_DOLLM:
7219 case OP_CALLOUT:
7220 case OP_ALT:
7221 cc += PRIV(OP_lengths)[*cc];
7222 break;
7223
7224 case OP_KET:
7225 return FALSE;
7226
7227 default:
7228 return TRUE;
7229 }
7230 }
7231}
7232
7233static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
7234{
7235DEFINE_COMPILER;
7236int framesize;
7237int extrasize;
7238BOOL needs_control_head;
7239int private_data_ptr;
7240backtrack_common altbacktrack;
7241pcre_uchar *ccbegin;
7242pcre_uchar opcode;
7243pcre_uchar bra = OP_BRA;
7244jump_list *tmp = NULL;
7245jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
7246jump_list **found;
7247/* Saving previous accept variables. */
7248BOOL save_local_exit = common->local_exit;
7249BOOL save_positive_assert = common->positive_assert;
7250then_trap_backtrack *save_then_trap = common->then_trap;
7251struct sljit_label *save_quit_label = common->quit_label;
7252struct sljit_label *save_accept_label = common->accept_label;
7253jump_list *save_quit = common->quit;
7254jump_list *save_positive_assert_quit = common->positive_assert_quit;
7255jump_list *save_accept = common->accept;
7256struct sljit_jump *jump;
7257struct sljit_jump *brajump = NULL;
7258
7259/* Assert captures then. */
7260common->then_trap = NULL;
7261
7262if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7263 {
7264 SLJIT_ASSERT(!conditional);
7265 bra = *cc;
7266 cc++;
7267 }
7268private_data_ptr = PRIVATE_DATA(cc);
7269SLJIT_ASSERT(private_data_ptr != 0);
7270framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7271backtrack->framesize = framesize;
7272backtrack->private_data_ptr = private_data_ptr;
7273opcode = *cc;
7274SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
7275found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
7276ccbegin = cc;
7277cc += GET(cc, 1);
7278
7279if (bra == OP_BRAMINZERO)
7280 {
7281 /* This is a braminzero backtrack path. */
7282 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7283 free_stack(common, 1);
7284 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7285 }
7286
7287if (framesize < 0)
7288 {
7289 extrasize = 1;
7290 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
7291 extrasize = 0;
7292
7293 if (needs_control_head)
7294 extrasize++;
7295
7296 if (framesize == no_frame)
7297 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7298
7299 if (extrasize > 0)
7300 allocate_stack(common, extrasize);
7301
7302 if (needs_control_head)
7303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7304
7305 if (extrasize > 0)
7306 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7307
7308 if (needs_control_head)
7309 {
7310 SLJIT_ASSERT(extrasize == 2);
7311 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7312 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7313 }
7314 }
7315else
7316 {
7317 extrasize = needs_control_head ? 3 : 2;
7318 allocate_stack(common, framesize + extrasize);
7319
7320 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7321 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7322 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7323 if (needs_control_head)
7324 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7325 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7326
7327 if (needs_control_head)
7328 {
7329 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7330 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7331 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7332 }
7333 else
7334 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7335
7336 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
7337 }
7338
7339memset(&altbacktrack, 0, sizeof(backtrack_common));
7340if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7341 {
7342 /* Negative assert is stronger than positive assert. */
7343 common->local_exit = TRUE;
7344 common->quit_label = NULL;
7345 common->quit = NULL;
7346 common->positive_assert = FALSE;
7347 }
7348else
7349 common->positive_assert = TRUE;
7350common->positive_assert_quit = NULL;
7351
7352while (1)
7353 {
7354 common->accept_label = NULL;
7355 common->accept = NULL;
7356 altbacktrack.top = NULL;
7357 altbacktrack.topbacktracks = NULL;
7358
7359 if (*ccbegin == OP_ALT && extrasize > 0)
7360 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7361
7362 altbacktrack.cc = ccbegin;
7363 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
7364 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7365 {
7366 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7367 {
7368 common->local_exit = save_local_exit;
7369 common->quit_label = save_quit_label;
7370 common->quit = save_quit;
7371 }
7372 common->positive_assert = save_positive_assert;
7373 common->then_trap = save_then_trap;
7374 common->accept_label = save_accept_label;
7375 common->positive_assert_quit = save_positive_assert_quit;
7376 common->accept = save_accept;
7377 return NULL;
7378 }
7379 common->accept_label = LABEL();
7380 if (common->accept != NULL)
7381 set_jumps(common->accept, common->accept_label);
7382
7383 /* Reset stack. */
7384 if (framesize < 0)
7385 {
7386 if (framesize == no_frame)
7387 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7388 else if (extrasize > 0)
7389 free_stack(common, extrasize);
7390
7391 if (needs_control_head)
7392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
7393 }
7394 else
7395 {
7396 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
7397 {
7398 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7399 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7400 if (needs_control_head)
7401 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
7402 }
7403 else
7404 {
7405 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7406 if (needs_control_head)
7407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
7408 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7409 }
7410 }
7411
7412 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7413 {
7414 /* We know that STR_PTR was stored on the top of the stack. */
7415 if (conditional)
7416 {
7417 if (extrasize > 0)
7418 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
7419 }
7420 else if (bra == OP_BRAZERO)
7421 {
7422 if (framesize < 0)
7423 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
7424 else
7425 {
7426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
7427 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
7428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7429 }
7430 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7431 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7432 }
7433 else if (framesize >= 0)
7434 {
7435 /* For OP_BRA and OP_BRAMINZERO. */
7436 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
7437 }
7438 }
7439 add_jump(compiler, found, JUMP(SLJIT_JUMP));
7440
7441 compile_backtrackingpath(common, altbacktrack.top);
7442 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7443 {
7444 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7445 {
7446 common->local_exit = save_local_exit;
7447 common->quit_label = save_quit_label;
7448 common->quit = save_quit;
7449 }
7450 common->positive_assert = save_positive_assert;
7451 common->then_trap = save_then_trap;
7452 common->accept_label = save_accept_label;
7453 common->positive_assert_quit = save_positive_assert_quit;
7454 common->accept = save_accept;
7455 return NULL;
7456 }
7457 set_jumps(altbacktrack.topbacktracks, LABEL());
7458
7459 if (*cc != OP_ALT)
7460 break;
7461
7462 ccbegin = cc;
7463 cc += GET(cc, 1);
7464 }
7465
7466if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7467 {
7468 SLJIT_ASSERT(common->positive_assert_quit == NULL);
7469 /* Makes the check less complicated below. */
7470 common->positive_assert_quit = common->quit;
7471 }
7472
7473/* None of them matched. */
7474if (common->positive_assert_quit != NULL)
7475 {
7476 jump = JUMP(SLJIT_JUMP);
7477 set_jumps(common->positive_assert_quit, LABEL());
7478 SLJIT_ASSERT(framesize != no_stack);
7479 if (framesize < 0)
7480 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
7481 else
7482 {
7483 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7484 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7485 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7486 }
7487 JUMPHERE(jump);
7488 }
7489
7490if (needs_control_head)
7491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
7492
7493if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
7494 {
7495 /* Assert is failed. */
7496 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
7497 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7498
7499 if (framesize < 0)
7500 {
7501 /* The topmost item should be 0. */
7502 if (bra == OP_BRAZERO)
7503 {
7504 if (extrasize == 2)
7505 free_stack(common, 1);
7506 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7507 }
7508 else if (extrasize > 0)
7509 free_stack(common, extrasize);
7510 }
7511 else
7512 {
7513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7514 /* The topmost item should be 0. */
7515 if (bra == OP_BRAZERO)
7516 {
7517 free_stack(common, framesize + extrasize - 1);
7518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7519 }
7520 else
7521 free_stack(common, framesize + extrasize);
7522 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7523 }
7524 jump = JUMP(SLJIT_JUMP);
7525 if (bra != OP_BRAZERO)
7526 add_jump(compiler, target, jump);
7527
7528 /* Assert is successful. */
7529 set_jumps(tmp, LABEL());
7530 if (framesize < 0)
7531 {
7532 /* We know that STR_PTR was stored on the top of the stack. */
7533 if (extrasize > 0)
7534 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
7535
7536 /* Keep the STR_PTR on the top of the stack. */
7537 if (bra == OP_BRAZERO)
7538 {
7539 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7540 if (extrasize == 2)
7541 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7542 }
7543 else if (bra == OP_BRAMINZERO)
7544 {
7545 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7546 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7547 }
7548 }
7549 else
7550 {
7551 if (bra == OP_BRA)
7552 {
7553 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7554 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7555 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
7556 }
7557 else
7558 {
7559 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7560 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
7561 if (extrasize == 2)
7562 {
7563 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7564 if (bra == OP_BRAMINZERO)
7565 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7566 }
7567 else
7568 {
7569 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
7570 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
7571 }
7572 }
7573 }
7574
7575 if (bra == OP_BRAZERO)
7576 {
7577 backtrack->matchingpath = LABEL();
7578 SET_LABEL(jump, backtrack->matchingpath);
7579 }
7580 else if (bra == OP_BRAMINZERO)
7581 {
7582 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7583 JUMPHERE(brajump);
7584 if (framesize >= 0)
7585 {
7586 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7587 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7588 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
7589 }
7590 set_jumps(backtrack->common.topbacktracks, LABEL());
7591 }
7592 }
7593else
7594 {
7595 /* AssertNot is successful. */
7596 if (framesize < 0)
7597 {
7598 if (extrasize > 0)
7599 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7600
7601 if (bra != OP_BRA)
7602 {
7603 if (extrasize == 2)
7604 free_stack(common, 1);
7605 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7606 }
7607 else if (extrasize > 0)
7608 free_stack(common, extrasize);
7609 }
7610 else
7611 {
7612 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7613 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7614 /* The topmost item should be 0. */
7615 if (bra != OP_BRA)
7616 {
7617 free_stack(common, framesize + extrasize - 1);
7618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7619 }
7620 else
7621 free_stack(common, framesize + extrasize);
7622 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7623 }
7624
7625 if (bra == OP_BRAZERO)
7626 backtrack->matchingpath = LABEL();
7627 else if (bra == OP_BRAMINZERO)
7628 {
7629 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7630 JUMPHERE(brajump);
7631 }
7632
7633 if (bra != OP_BRA)
7634 {
7635 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
7636 set_jumps(backtrack->common.topbacktracks, LABEL());
7637 backtrack->common.topbacktracks = NULL;
7638 }
7639 }
7640
7641if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7642 {
7643 common->local_exit = save_local_exit;
7644 common->quit_label = save_quit_label;
7645 common->quit = save_quit;
7646 }
7647common->positive_assert = save_positive_assert;
7648common->then_trap = save_then_trap;
7649common->accept_label = save_accept_label;
7650common->positive_assert_quit = save_positive_assert_quit;
7651common->accept = save_accept;
7652return cc + 1 + LINK_SIZE;
7653}
7654
7655static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
7656{
7657DEFINE_COMPILER;
7658int stacksize;
7659
7660if (framesize < 0)
7661 {
7662 if (framesize == no_frame)
7663 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7664 else
7665 {
7666 stacksize = needs_control_head ? 1 : 0;
7667 if (ket != OP_KET || has_alternatives)
7668 stacksize++;
7669
7670 if (stacksize > 0)
7671 free_stack(common, stacksize);
7672 }
7673
7674 if (needs_control_head)
7675 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
7676
7677 /* TMP2 which is set here used by OP_KETRMAX below. */
7678 if (ket == OP_KETRMAX)
7679 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
7680 else if (ket == OP_KETRMIN)
7681 {
7682 /* Move the STR_PTR to the private_data_ptr. */
7683 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
7684 }
7685 }
7686else
7687 {
7688 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
7689 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
7690 if (needs_control_head)
7691 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
7692
7693 if (ket == OP_KETRMAX)
7694 {
7695 /* TMP2 which is set here used by OP_KETRMAX below. */
7696 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7697 }
7698 }
7699if (needs_control_head)
7700 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
7701}
7702
7703static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
7704{
7705DEFINE_COMPILER;
7706
7707if (common->capture_last_ptr != 0)
7708 {
7709 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7710 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7712 stacksize++;
7713 }
7714if (common->optimized_cbracket[offset >> 1] == 0)
7715 {
7716 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7717 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7719 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7720 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7721 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7722 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7723 stacksize += 2;
7724 }
7725return stacksize;
7726}
7727
7728/*
7729 Handling bracketed expressions is probably the most complex part.
7730
7731 Stack layout naming characters:
7732 S - Push the current STR_PTR
7733 0 - Push a 0 (NULL)
7734 A - Push the current STR_PTR. Needed for restoring the STR_PTR
7735 before the next alternative. Not pushed if there are no alternatives.
7736 M - Any values pushed by the current alternative. Can be empty, or anything.
7737 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
7738 L - Push the previous local (pointed by localptr) to the stack
7739 () - opional values stored on the stack
7740 ()* - optonal, can be stored multiple times
7741
7742 The following list shows the regular expression templates, their PCRE byte codes
7743 and stack layout supported by pcre-sljit.
7744
7745 (?:) OP_BRA | OP_KET A M
7746 () OP_CBRA | OP_KET C M
7747 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
7748 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
7749 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
7750 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
7751 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
7752 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
7753 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
7754 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
7755 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
7756 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
7757 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
7758 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
7759 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
7760 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
7761 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
7762 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
7763 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
7764 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
7765 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
7766 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
7767
7768
7769 Stack layout naming characters:
7770 A - Push the alternative index (starting from 0) on the stack.
7771 Not pushed if there is no alternatives.
7772 M - Any values pushed by the current alternative. Can be empty, or anything.
7773
7774 The next list shows the possible content of a bracket:
7775 (|) OP_*BRA | OP_ALT ... M A
7776 (?()|) OP_*COND | OP_ALT M A
7777 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
7778 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
7779 Or nothing, if trace is unnecessary
7780*/
7781
7782static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7783{
7784DEFINE_COMPILER;
7785backtrack_common *backtrack;
7786pcre_uchar opcode;
7787int private_data_ptr = 0;
7788int offset = 0;
7789int i, stacksize;
7790int repeat_ptr = 0, repeat_length = 0;
7791int repeat_type = 0, repeat_count = 0;
7792pcre_uchar *ccbegin;
7793pcre_uchar *matchingpath;
7794pcre_uchar *slot;
7795pcre_uchar bra = OP_BRA;
7796pcre_uchar ket;
7797assert_backtrack *assert;
7798BOOL has_alternatives;
7799BOOL needs_control_head = FALSE;
7800struct sljit_jump *jump;
7801struct sljit_jump *skip;
7802struct sljit_label *rmax_label = NULL;
7803struct sljit_jump *braminzero = NULL;
7804
7805PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7806
7807if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7808 {
7809 bra = *cc;
7810 cc++;
7811 opcode = *cc;
7812 }
7813
7814opcode = *cc;
7815ccbegin = cc;
7816matchingpath = bracketend(cc) - 1 - LINK_SIZE;
7817ket = *matchingpath;
7818if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
7819 {
7820 repeat_ptr = PRIVATE_DATA(matchingpath);
7821 repeat_length = PRIVATE_DATA(matchingpath + 1);
7822 repeat_type = PRIVATE_DATA(matchingpath + 2);
7823 repeat_count = PRIVATE_DATA(matchingpath + 3);
7824 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
7825 if (repeat_type == OP_UPTO)
7826 ket = OP_KETRMAX;
7827 if (repeat_type == OP_MINUPTO)
7828 ket = OP_KETRMIN;
7829 }
7830
7831if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
7832 {
7833 /* Drop this bracket_backtrack. */
7834 parent->top = backtrack->prev;
7835 return matchingpath + 1 + LINK_SIZE + repeat_length;
7836 }
7837
7838matchingpath = ccbegin + 1 + LINK_SIZE;
7839SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
7840SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
7841cc += GET(cc, 1);
7842
7843has_alternatives = *cc == OP_ALT;
7844if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
7845 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
7846
7847if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
7848 opcode = OP_SCOND;
7849if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
7850 opcode = OP_ONCE;
7851
7852if (opcode == OP_CBRA || opcode == OP_SCBRA)
7853 {
7854 /* Capturing brackets has a pre-allocated space. */
7855 offset = GET2(ccbegin, 1 + LINK_SIZE);
7856 if (common->optimized_cbracket[offset] == 0)
7857 {
7858 private_data_ptr = OVECTOR_PRIV(offset);
7859 offset <<= 1;
7860 }
7861 else
7862 {
7863 offset <<= 1;
7864 private_data_ptr = OVECTOR(offset);
7865 }
7866 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7867 matchingpath += IMM2_SIZE;
7868 }
7869else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7870 {
7871 /* Other brackets simply allocate the next entry. */
7872 private_data_ptr = PRIVATE_DATA(ccbegin);
7873 SLJIT_ASSERT(private_data_ptr != 0);
7874 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7875 if (opcode == OP_ONCE)
7876 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7877 }
7878
7879/* Instructions before the first alternative. */
7880stacksize = 0;
7881if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7882 stacksize++;
7883if (bra == OP_BRAZERO)
7884 stacksize++;
7885
7886if (stacksize > 0)
7887 allocate_stack(common, stacksize);
7888
7889stacksize = 0;
7890if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7891 {
7892 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7893 stacksize++;
7894 }
7895
7896if (bra == OP_BRAZERO)
7897 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7898
7899if (bra == OP_BRAMINZERO)
7900 {
7901 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7902 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7903 if (ket != OP_KETRMIN)
7904 {
7905 free_stack(common, 1);
7906 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7907 }
7908 else
7909 {
7910 if (opcode == OP_ONCE || opcode >= OP_SBRA)
7911 {
7912 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7913 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7914 /* Nothing stored during the first run. */
7915 skip = JUMP(SLJIT_JUMP);
7916 JUMPHERE(jump);
7917 /* Checking zero-length iteration. */
7918 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7919 {
7920 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7921 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7922 }
7923 else
7924 {
7925 /* Except when the whole stack frame must be saved. */
7926 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7927 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
7928 }
7929 JUMPHERE(skip);
7930 }
7931 else
7932 {
7933 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7934 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7935 JUMPHERE(jump);
7936 }
7937 }
7938 }
7939
7940if (repeat_type != 0)
7941 {
7942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7943 if (repeat_type == OP_EXACT)
7944 rmax_label = LABEL();
7945 }
7946
7947if (ket == OP_KETRMIN)
7948 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7949
7950if (ket == OP_KETRMAX)
7951 {
7952 rmax_label = LABEL();
7953 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7954 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7955 }
7956
7957/* Handling capturing brackets and alternatives. */
7958if (opcode == OP_ONCE)
7959 {
7960 stacksize = 0;
7961 if (needs_control_head)
7962 {
7963 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7964 stacksize++;
7965 }
7966
7967 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7968 {
7969 /* Neither capturing brackets nor recursions are found in the block. */
7970 if (ket == OP_KETRMIN)
7971 {
7972 stacksize += 2;
7973 if (!needs_control_head)
7974 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7975 }
7976 else
7977 {
7978 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7979 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7980 if (ket == OP_KETRMAX || has_alternatives)
7981 stacksize++;
7982 }
7983
7984 if (stacksize > 0)
7985 allocate_stack(common, stacksize);
7986
7987 stacksize = 0;
7988 if (needs_control_head)
7989 {
7990 stacksize++;
7991 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7992 }
7993
7994 if (ket == OP_KETRMIN)
7995 {
7996 if (needs_control_head)
7997 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7998 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7999 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8000 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
8001 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8002 }
8003 else if (ket == OP_KETRMAX || has_alternatives)
8004 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8005 }
8006 else
8007 {
8008 if (ket != OP_KET || has_alternatives)
8009 stacksize++;
8010
8011 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
8012 allocate_stack(common, stacksize);
8013
8014 if (needs_control_head)
8015 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8016
8017 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8018 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8019
8020 stacksize = needs_control_head ? 1 : 0;
8021 if (ket != OP_KET || has_alternatives)
8022 {
8023 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8025 stacksize++;
8026 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8027 }
8028 else
8029 {
8030 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8031 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8032 }
8033 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
8034 }
8035 }
8036else if (opcode == OP_CBRA || opcode == OP_SCBRA)
8037 {
8038 /* Saving the previous values. */
8039 if (common->optimized_cbracket[offset >> 1] != 0)
8040 {
8041 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
8042 allocate_stack(common, 2);
8043 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8044 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
8045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8046 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8047 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8048 }
8049 else
8050 {
8051 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8052 allocate_stack(common, 1);
8053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8054 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8055 }
8056 }
8057else if (opcode == OP_SBRA || opcode == OP_SCOND)
8058 {
8059 /* Saving the previous value. */
8060 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8061 allocate_stack(common, 1);
8062 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8063 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8064 }
8065else if (has_alternatives)
8066 {
8067 /* Pushing the starting string pointer. */
8068 allocate_stack(common, 1);
8069 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8070 }
8071
8072/* Generating code for the first alternative. */
8073if (opcode == OP_COND || opcode == OP_SCOND)
8074 {
8075 if (*matchingpath == OP_CREF)
8076 {
8077 SLJIT_ASSERT(has_alternatives);
8078 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
8079 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8080 matchingpath += 1 + IMM2_SIZE;
8081 }
8082 else if (*matchingpath == OP_DNCREF)
8083 {
8084 SLJIT_ASSERT(has_alternatives);
8085
8086 i = GET2(matchingpath, 1 + IMM2_SIZE);
8087 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8088 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8089 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8090 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8091 slot += common->name_entry_size;
8092 i--;
8093 while (i-- > 0)
8094 {
8095 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8096 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
8097 slot += common->name_entry_size;
8098 }
8099 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8100 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
8101 matchingpath += 1 + 2 * IMM2_SIZE;
8102 }
8103 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
8104 {
8105 /* Never has other case. */
8106 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
8107 SLJIT_ASSERT(!has_alternatives);
8108
8109 if (*matchingpath == OP_FAIL)
8110 stacksize = 0;
8111 if (*matchingpath == OP_RREF)
8112 {
8113 stacksize = GET2(matchingpath, 1);
8114 if (common->currententry == NULL)
8115 stacksize = 0;
8116 else if (stacksize == RREF_ANY)
8117 stacksize = 1;
8118 else if (common->currententry->start == 0)
8119 stacksize = stacksize == 0;
8120 else
8121 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8122
8123 if (stacksize != 0)
8124 matchingpath += 1 + IMM2_SIZE;
8125 }
8126 else
8127 {
8128 if (common->currententry == NULL || common->currententry->start == 0)
8129 stacksize = 0;
8130 else
8131 {
8132 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
8133 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8134 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8135 while (stacksize > 0)
8136 {
8137 if ((int)GET2(slot, 0) == i)
8138 break;
8139 slot += common->name_entry_size;
8140 stacksize--;
8141 }
8142 }
8143
8144 if (stacksize != 0)
8145 matchingpath += 1 + 2 * IMM2_SIZE;
8146 }
8147
8148 /* The stacksize == 0 is a common "else" case. */
8149 if (stacksize == 0)
8150 {
8151 if (*cc == OP_ALT)
8152 {
8153 matchingpath = cc + 1 + LINK_SIZE;
8154 cc += GET(cc, 1);
8155 }
8156 else
8157 matchingpath = cc;
8158 }
8159 }
8160 else
8161 {
8162 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
8163 /* Similar code as PUSH_BACKTRACK macro. */
8164 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
8165 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8166 return NULL;
8167 memset(assert, 0, sizeof(assert_backtrack));
8168 assert->common.cc = matchingpath;
8169 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
8170 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
8171 }
8172 }
8173
8174compile_matchingpath(common, matchingpath, cc, backtrack);
8175if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8176 return NULL;
8177
8178if (opcode == OP_ONCE)
8179 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
8180
8181stacksize = 0;
8182if (repeat_type == OP_MINUPTO)
8183 {
8184 /* We need to preserve the counter. TMP2 will be used below. */
8185 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8186 stacksize++;
8187 }
8188if (ket != OP_KET || bra != OP_BRA)
8189 stacksize++;
8190if (offset != 0)
8191 {
8192 if (common->capture_last_ptr != 0)
8193 stacksize++;
8194 if (common->optimized_cbracket[offset >> 1] == 0)
8195 stacksize += 2;
8196 }
8197if (has_alternatives && opcode != OP_ONCE)
8198 stacksize++;
8199
8200if (stacksize > 0)
8201 allocate_stack(common, stacksize);
8202
8203stacksize = 0;
8204if (repeat_type == OP_MINUPTO)
8205 {
8206 /* TMP2 was set above. */
8207 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8208 stacksize++;
8209 }
8210
8211if (ket != OP_KET || bra != OP_BRA)
8212 {
8213 if (ket != OP_KET)
8214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8215 else
8216 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8217 stacksize++;
8218 }
8219
8220if (offset != 0)
8221 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8222
8223if (has_alternatives)
8224 {
8225 if (opcode != OP_ONCE)
8226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8227 if (ket != OP_KETRMAX)
8228 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8229 }
8230
8231/* Must be after the matchingpath label. */
8232if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
8233 {
8234 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
8235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8236 }
8237
8238if (ket == OP_KETRMAX)
8239 {
8240 if (repeat_type != 0)
8241 {
8242 if (has_alternatives)
8243 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8244 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8245 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8246 /* Drop STR_PTR for greedy plus quantifier. */
8247 if (opcode != OP_ONCE)
8248 free_stack(common, 1);
8249 }
8250 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
8251 {
8252 if (has_alternatives)
8253 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8254 /* Checking zero-length iteration. */
8255 if (opcode != OP_ONCE)
8256 {
8257 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
8258 /* Drop STR_PTR for greedy plus quantifier. */
8259 if (bra != OP_BRAZERO)
8260 free_stack(common, 1);
8261 }
8262 else
8263 /* TMP2 must contain the starting STR_PTR. */
8264 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
8265 }
8266 else
8267 JUMPTO(SLJIT_JUMP, rmax_label);
8268 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8269 }
8270
8271if (repeat_type == OP_EXACT)
8272 {
8273 count_match(common);
8274 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8275 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8276 }
8277else if (repeat_type == OP_UPTO)
8278 {
8279 /* We need to preserve the counter. */
8280 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8281 allocate_stack(common, 1);
8282 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8283 }
8284
8285if (bra == OP_BRAZERO)
8286 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
8287
8288if (bra == OP_BRAMINZERO)
8289 {
8290 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
8291 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
8292 if (braminzero != NULL)
8293 {
8294 JUMPHERE(braminzero);
8295 /* We need to release the end pointer to perform the
8296 backtrack for the zero-length iteration. When
8297 framesize is < 0, OP_ONCE will do the release itself. */
8298 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
8299 {
8300 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8301 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8302 }
8303 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
8304 free_stack(common, 1);
8305 }
8306 /* Continue to the normal backtrack. */
8307 }
8308
8309if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
8310 count_match(common);
8311
8312/* Skip the other alternatives. */
8313while (*cc == OP_ALT)
8314 cc += GET(cc, 1);
8315cc += 1 + LINK_SIZE;
8316
8317if (opcode == OP_ONCE)
8318 {
8319 /* We temporarily encode the needs_control_head in the lowest bit.
8320 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
8321 the same value for small signed numbers (including negative numbers). */
8322 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
8323 }
8324return cc + repeat_length;
8325}
8326
8327static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8328{
8329DEFINE_COMPILER;
8330backtrack_common *backtrack;
8331pcre_uchar opcode;
8332int private_data_ptr;
8333int cbraprivptr = 0;
8334BOOL needs_control_head;
8335int framesize;
8336int stacksize;
8337int offset = 0;
8338BOOL zero = FALSE;
8339pcre_uchar *ccbegin = NULL;
8340int stack; /* Also contains the offset of control head. */
8341struct sljit_label *loop = NULL;
8342struct jump_list *emptymatch = NULL;
8343
8344PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
8345if (*cc == OP_BRAPOSZERO)
8346 {
8347 zero = TRUE;
8348 cc++;
8349 }
8350
8351opcode = *cc;
8352private_data_ptr = PRIVATE_DATA(cc);
8353SLJIT_ASSERT(private_data_ptr != 0);
8354BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
8355switch(opcode)
8356 {
8357 case OP_BRAPOS:
8358 case OP_SBRAPOS:
8359 ccbegin = cc + 1 + LINK_SIZE;
8360 break;
8361
8362 case OP_CBRAPOS:
8363 case OP_SCBRAPOS:
8364 offset = GET2(cc, 1 + LINK_SIZE);
8365 /* This case cannot be optimized in the same was as
8366 normal capturing brackets. */
8367 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
8368 cbraprivptr = OVECTOR_PRIV(offset);
8369 offset <<= 1;
8370 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
8371 break;
8372
8373 default:
8374 SLJIT_ASSERT_STOP();
8375 break;
8376 }
8377
8378framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8379BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
8380if (framesize < 0)
8381 {
8382 if (offset != 0)
8383 {
8384 stacksize = 2;
8385 if (common->capture_last_ptr != 0)
8386 stacksize++;
8387 }
8388 else
8389 stacksize = 1;
8390
8391 if (needs_control_head)
8392 stacksize++;
8393 if (!zero)
8394 stacksize++;
8395
8396 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8397 allocate_stack(common, stacksize);
8398 if (framesize == no_frame)
8399 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8400
8401 stack = 0;
8402 if (offset != 0)
8403 {
8404 stack = 2;
8405 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8406 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8407 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8408 if (common->capture_last_ptr != 0)
8409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8411 if (needs_control_head)
8412 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8413 if (common->capture_last_ptr != 0)
8414 {
8415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8416 stack = 3;
8417 }
8418 }
8419 else
8420 {
8421 if (needs_control_head)
8422 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8424 stack = 1;
8425 }
8426
8427 if (needs_control_head)
8428 stack++;
8429 if (!zero)
8430 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
8431 if (needs_control_head)
8432 {
8433 stack--;
8434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8435 }
8436 }
8437else
8438 {
8439 stacksize = framesize + 1;
8440 if (!zero)
8441 stacksize++;
8442 if (needs_control_head)
8443 stacksize++;
8444 if (offset == 0)
8445 stacksize++;
8446 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8447
8448 allocate_stack(common, stacksize);
8449 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8450 if (needs_control_head)
8451 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8452 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
8453
8454 stack = 0;
8455 if (!zero)
8456 {
8457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
8458 stack = 1;
8459 }
8460 if (needs_control_head)
8461 {
8462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8463 stack++;
8464 }
8465 if (offset == 0)
8466 {
8467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
8468 stack++;
8469 }
8470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
8471 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
8472 stack -= 1 + (offset == 0);
8473 }
8474
8475if (offset != 0)
8476 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8477
8478loop = LABEL();
8479while (*cc != OP_KETRPOS)
8480 {
8481 backtrack->top = NULL;
8482 backtrack->topbacktracks = NULL;
8483 cc += GET(cc, 1);
8484
8485 compile_matchingpath(common, ccbegin, cc, backtrack);
8486 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8487 return NULL;
8488
8489 if (framesize < 0)
8490 {
8491 if (framesize == no_frame)
8492 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8493
8494 if (offset != 0)
8495 {
8496 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8498 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8499 if (common->capture_last_ptr != 0)
8500 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8501 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8502 }
8503 else
8504 {
8505 if (opcode == OP_SBRAPOS)
8506 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8508 }
8509
8510 /* Even if the match is empty, we need to reset the control head. */
8511 if (needs_control_head)
8512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8513
8514 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8515 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8516
8517 if (!zero)
8518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8519 }
8520 else
8521 {
8522 if (offset != 0)
8523 {
8524 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8525 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8526 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8527 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8528 if (common->capture_last_ptr != 0)
8529 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8530 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8531 }
8532 else
8533 {
8534 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8535 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8536 if (opcode == OP_SBRAPOS)
8537 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
8538 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
8539 }
8540
8541 /* Even if the match is empty, we need to reset the control head. */
8542 if (needs_control_head)
8543 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8544
8545 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8546 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8547
8548 if (!zero)
8549 {
8550 if (framesize < 0)
8551 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8552 else
8553 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8554 }
8555 }
8556
8557 JUMPTO(SLJIT_JUMP, loop);
8558 flush_stubs(common);
8559
8560 compile_backtrackingpath(common, backtrack->top);
8561 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8562 return NULL;
8563 set_jumps(backtrack->topbacktracks, LABEL());
8564
8565 if (framesize < 0)
8566 {
8567 if (offset != 0)
8568 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8569 else
8570 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8571 }
8572 else
8573 {
8574 if (offset != 0)
8575 {
8576 /* Last alternative. */
8577 if (*cc == OP_KETRPOS)
8578 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8579 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8580 }
8581 else
8582 {
8583 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8584 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
8585 }
8586 }
8587
8588 if (*cc == OP_KETRPOS)
8589 break;
8590 ccbegin = cc + 1 + LINK_SIZE;
8591 }
8592
8593/* We don't have to restore the control head in case of a failed match. */
8594
8595backtrack->topbacktracks = NULL;
8596if (!zero)
8597 {
8598 if (framesize < 0)
8599 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
8600 else /* TMP2 is set to [private_data_ptr] above. */
8601 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
8602 }
8603
8604/* None of them matched. */
8605set_jumps(emptymatch, LABEL());
8606count_match(common);
8607return cc + 1 + LINK_SIZE;
8608}
8609
8610static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end)
8611{
8612int class_len;
8613
8614*opcode = *cc;
8615*exact = 0;
8616
8617if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
8618 {
8619 cc++;
8620 *type = OP_CHAR;
8621 }
8622else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
8623 {
8624 cc++;
8625 *type = OP_CHARI;
8626 *opcode -= OP_STARI - OP_STAR;
8627 }
8628else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
8629 {
8630 cc++;
8631 *type = OP_NOT;
8632 *opcode -= OP_NOTSTAR - OP_STAR;
8633 }
8634else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
8635 {
8636 cc++;
8637 *type = OP_NOTI;
8638 *opcode -= OP_NOTSTARI - OP_STAR;
8639 }
8640else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
8641 {
8642 cc++;
8643 *opcode -= OP_TYPESTAR - OP_STAR;
8644 *type = OP_END;
8645 }
8646else
8647 {
8648 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
8649 *type = *opcode;
8650 cc++;
8651 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
8652 *opcode = cc[class_len - 1];
8653
8654 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
8655 {
8656 *opcode -= OP_CRSTAR - OP_STAR;
8657 *end = cc + class_len;
8658
8659 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
8660 {
8661 *exact = 1;
8662 *opcode -= OP_PLUS - OP_STAR;
8663 }
8664 }
8665 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
8666 {
8667 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
8668 *end = cc + class_len;
8669
8670 if (*opcode == OP_POSPLUS)
8671 {
8672 *exact = 1;
8673 *opcode = OP_POSSTAR;
8674 }
8675 }
8676 else
8677 {
8678 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
8679 *max = GET2(cc, (class_len + IMM2_SIZE));
8680 *exact = GET2(cc, class_len);
8681
8682 if (*max == 0)
8683 {
8684 if (*opcode == OP_CRPOSRANGE)
8685 *opcode = OP_POSSTAR;
8686 else
8687 *opcode -= OP_CRRANGE - OP_STAR;
8688 }
8689 else
8690 {
8691 *max -= *exact;
8692 if (*max == 0)
8693 *opcode = OP_EXACT;
8694 else if (*max == 1)
8695 {
8696 if (*opcode == OP_CRPOSRANGE)
8697 *opcode = OP_POSQUERY;
8698 else
8699 *opcode -= OP_CRRANGE - OP_QUERY;
8700 }
8701 else
8702 {
8703 if (*opcode == OP_CRPOSRANGE)
8704 *opcode = OP_POSUPTO;
8705 else
8706 *opcode -= OP_CRRANGE - OP_UPTO;
8707 }
8708 }
8709 *end = cc + class_len + 2 * IMM2_SIZE;
8710 }
8711 return cc;
8712 }
8713
8714switch(*opcode)
8715 {
8716 case OP_EXACT:
8717 *exact = GET2(cc, 0);
8718 cc += IMM2_SIZE;
8719 break;
8720
8721 case OP_PLUS:
8722 case OP_MINPLUS:
8723 *exact = 1;
8724 *opcode -= OP_PLUS - OP_STAR;
8725 break;
8726
8727 case OP_POSPLUS:
8728 *exact = 1;
8729 *opcode = OP_POSSTAR;
8730 break;
8731
8732 case OP_UPTO:
8733 case OP_MINUPTO:
8734 case OP_POSUPTO:
8735 *max = GET2(cc, 0);
8736 cc += IMM2_SIZE;
8737 break;
8738 }
8739
8740if (*type == OP_END)
8741 {
8742 *type = *cc;
8743 *end = next_opcode(common, cc);
8744 cc++;
8745 return cc;
8746 }
8747
8748*end = cc + 1;
8749#ifdef SUPPORT_UTF
8750if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
8751#endif
8752return cc;
8753}
8754
8755static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8756{
8757DEFINE_COMPILER;
8758backtrack_common *backtrack;
8759pcre_uchar opcode;
8760pcre_uchar type;
8761sljit_u32 max = 0, exact;
8762BOOL fast_fail;
8763sljit_s32 fast_str_ptr;
8764BOOL charpos_enabled;
8765pcre_uchar charpos_char;
8766unsigned int charpos_othercasebit;
8767pcre_uchar *end;
8768jump_list *no_match = NULL;
8769jump_list *no_char1_match = NULL;
8770struct sljit_jump *jump = NULL;
8771struct sljit_label *label;
8772int private_data_ptr = PRIVATE_DATA(cc);
8773int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8774int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8775int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8776int tmp_base, tmp_offset;
8777
8778PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
8779
8780fast_str_ptr = PRIVATE_DATA(cc + 1);
8781fast_fail = TRUE;
8782
8783SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
8784
8785if (cc == common->fast_forward_bc_ptr)
8786 fast_fail = FALSE;
8787else if (common->fast_fail_start_ptr == 0)
8788 fast_str_ptr = 0;
8789
8790SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
8791 || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
8792
8793cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
8794
8795if (type != OP_EXTUNI)
8796 {
8797 tmp_base = TMP3;
8798 tmp_offset = 0;
8799 }
8800else
8801 {
8802 tmp_base = SLJIT_MEM1(SLJIT_SP);
8803 tmp_offset = POSSESSIVE0;
8804 }
8805
8806if (fast_fail && fast_str_ptr != 0)
8807 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
8808
8809/* Handle fixed part first. */
8810if (exact > 1)
8811 {
8812 SLJIT_ASSERT(fast_str_ptr == 0);
8813 if (common->mode == JIT_COMPILE
8814#ifdef SUPPORT_UTF
8815 && !common->utf
8816#endif
8817 )
8818 {
8819 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
8820 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
8821 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
8822 label = LABEL();
8823 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
8824 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8825 JUMPTO(SLJIT_NOT_ZERO, label);
8826 }
8827 else
8828 {
8829 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
8830 label = LABEL();
8831 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
8832 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8833 JUMPTO(SLJIT_NOT_ZERO, label);
8834 }
8835 }
8836else if (exact == 1)
8837 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
8838
8839switch(opcode)
8840 {
8841 case OP_STAR:
8842 case OP_UPTO:
8843 SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
8844
8845 if (type == OP_ANYNL || type == OP_EXTUNI)
8846 {
8847 SLJIT_ASSERT(private_data_ptr == 0);
8848 SLJIT_ASSERT(fast_str_ptr == 0);
8849
8850 allocate_stack(common, 2);
8851 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8852 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8853
8854 if (opcode == OP_UPTO)
8855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
8856
8857 label = LABEL();
8858 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
8859 if (opcode == OP_UPTO)
8860 {
8861 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
8862 OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8863 jump = JUMP(SLJIT_ZERO);
8864 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
8865 }
8866
8867 /* We cannot use TMP3 because of this allocate_stack. */
8868 allocate_stack(common, 1);
8869 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8870 JUMPTO(SLJIT_JUMP, label);
8871 if (jump != NULL)
8872 JUMPHERE(jump);
8873 }
8874 else
8875 {
8876 charpos_enabled = FALSE;
8877 charpos_char = 0;
8878 charpos_othercasebit = 0;
8879
8880 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
8881 {
8882 charpos_enabled = TRUE;
8883#ifdef SUPPORT_UTF
8884 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
8885#endif
8886 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
8887 {
8888 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
8889 if (charpos_othercasebit == 0)
8890 charpos_enabled = FALSE;
8891 }
8892
8893 if (charpos_enabled)
8894 {
8895 charpos_char = end[1];
8896 /* Consumpe the OP_CHAR opcode. */
8897 end += 2;
8898#if defined COMPILE_PCRE8
8899 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
8900#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8901 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
8902 if ((charpos_othercasebit & 0x100) != 0)
8903 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
8904#endif
8905 if (charpos_othercasebit != 0)
8906 charpos_char |= charpos_othercasebit;
8907
8908 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
8909 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
8910 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
8911 }
8912 }
8913
8914 if (charpos_enabled)
8915 {
8916 if (opcode == OP_UPTO)
8917 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
8918
8919 /* Search the first instance of charpos_char. */
8920 jump = JUMP(SLJIT_JUMP);
8921 label = LABEL();
8922 if (opcode == OP_UPTO)
8923 {
8924 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8925 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
8926 }
8927 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
8928 if (fast_str_ptr != 0)
8929 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
8930 JUMPHERE(jump);
8931
8932 detect_partial_match(common, &backtrack->topbacktracks);
8933 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8934 if (charpos_othercasebit != 0)
8935 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
8936 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
8937
8938 if (private_data_ptr == 0)
8939 allocate_stack(common, 2);
8940 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8941 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
8942 if (opcode == OP_UPTO)
8943 {
8944 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8945 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
8946 }
8947
8948 /* Search the last instance of charpos_char. */
8949 label = LABEL();
8950 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
8951 if (fast_str_ptr != 0)
8952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
8953 detect_partial_match(common, &no_match);
8954 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8955 if (charpos_othercasebit != 0)
8956 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
8957 if (opcode == OP_STAR)
8958 {
8959 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
8960 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8961 }
8962 else
8963 {
8964 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
8965 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8966 JUMPHERE(jump);
8967 }
8968
8969 if (opcode == OP_UPTO)
8970 {
8971 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8972 JUMPTO(SLJIT_NOT_ZERO, label);
8973 }
8974 else
8975 JUMPTO(SLJIT_JUMP, label);
8976
8977 set_jumps(no_match, LABEL());
8978 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8979 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8980 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8981 }
8982#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
8983 else if (common->utf)
8984 {
8985 if (private_data_ptr == 0)
8986 allocate_stack(common, 2);
8987
8988 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8989 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
8990
8991 if (opcode == OP_UPTO)
8992 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
8993
8994 label = LABEL();
8995 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
8996 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8997
8998 if (opcode == OP_UPTO)
8999 {
9000 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9001 JUMPTO(SLJIT_NOT_ZERO, label);
9002 }
9003 else
9004 JUMPTO(SLJIT_JUMP, label);
9005
9006 set_jumps(no_match, LABEL());
9007 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9008 if (fast_str_ptr != 0)
9009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9010 }
9011#endif
9012 else
9013 {
9014 if (private_data_ptr == 0)
9015 allocate_stack(common, 2);
9016
9017 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9018 if (opcode == OP_UPTO)
9019 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9020
9021 label = LABEL();
9022 detect_partial_match(common, &no_match);
9023 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9024 if (opcode == OP_UPTO)
9025 {
9026 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9027 JUMPTO(SLJIT_NOT_ZERO, label);
9028 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9029 }
9030 else
9031 JUMPTO(SLJIT_JUMP, label);
9032
9033 set_jumps(no_char1_match, LABEL());
9034 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9035 set_jumps(no_match, LABEL());
9036 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9037 if (fast_str_ptr != 0)
9038 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9039 }
9040 }
9041 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9042 break;
9043
9044 case OP_MINSTAR:
9045 if (private_data_ptr == 0)
9046 allocate_stack(common, 1);
9047 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9048 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9049 if (fast_str_ptr != 0)
9050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9051 break;
9052
9053 case OP_MINUPTO:
9054 SLJIT_ASSERT(fast_str_ptr == 0);
9055 if (private_data_ptr == 0)
9056 allocate_stack(common, 2);
9057 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9058 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
9059 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9060 break;
9061
9062 case OP_QUERY:
9063 case OP_MINQUERY:
9064 SLJIT_ASSERT(fast_str_ptr == 0);
9065 if (private_data_ptr == 0)
9066 allocate_stack(common, 1);
9067 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9068 if (opcode == OP_QUERY)
9069 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9070 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9071 break;
9072
9073 case OP_EXACT:
9074 break;
9075
9076 case OP_POSSTAR:
9077#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9078 if (common->utf)
9079 {
9080 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9081 label = LABEL();
9082 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9083 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9084 JUMPTO(SLJIT_JUMP, label);
9085 set_jumps(no_match, LABEL());
9086 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9087 if (fast_str_ptr != 0)
9088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9089 break;
9090 }
9091#endif
9092 label = LABEL();
9093 detect_partial_match(common, &no_match);
9094 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9095 JUMPTO(SLJIT_JUMP, label);
9096 set_jumps(no_char1_match, LABEL());
9097 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9098 set_jumps(no_match, LABEL());
9099 if (fast_str_ptr != 0)
9100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9101 break;
9102
9103 case OP_POSUPTO:
9104 SLJIT_ASSERT(fast_str_ptr == 0);
9105#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9106 if (common->utf)
9107 {
9108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9109 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9110 label = LABEL();
9111 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9113 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9114 JUMPTO(SLJIT_NOT_ZERO, label);
9115 set_jumps(no_match, LABEL());
9116 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9117 break;
9118 }
9119#endif
9120 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9121 label = LABEL();
9122 detect_partial_match(common, &no_match);
9123 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9124 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9125 JUMPTO(SLJIT_NOT_ZERO, label);
9126 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9127 set_jumps(no_char1_match, LABEL());
9128 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9129 set_jumps(no_match, LABEL());
9130 break;
9131
9132 case OP_POSQUERY:
9133 SLJIT_ASSERT(fast_str_ptr == 0);
9134 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9135 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9136 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9137 set_jumps(no_match, LABEL());
9138 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9139 break;
9140
9141 default:
9142 SLJIT_ASSERT_STOP();
9143 break;
9144 }
9145
9146count_match(common);
9147return end;
9148}
9149
9150static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9151{
9152DEFINE_COMPILER;
9153backtrack_common *backtrack;
9154
9155PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9156
9157if (*cc == OP_FAIL)
9158 {
9159 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9160 return cc + 1;
9161 }
9162
9163if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
9164 {
9165 /* No need to check notempty conditions. */
9166 if (common->accept_label == NULL)
9167 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9168 else
9169 JUMPTO(SLJIT_JUMP, common->accept_label);
9170 return cc + 1;
9171 }
9172
9173if (common->accept_label == NULL)
9174 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
9175else
9176 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
9177OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9178OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
9179add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9180OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
9181if (common->accept_label == NULL)
9182 add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9183else
9184 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
9185OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9186if (common->accept_label == NULL)
9187 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
9188else
9189 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
9190add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9191return cc + 1;
9192}
9193
9194static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
9195{
9196DEFINE_COMPILER;
9197int offset = GET2(cc, 1);
9198BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
9199
9200/* Data will be discarded anyway... */
9201if (common->currententry != NULL)
9202 return cc + 1 + IMM2_SIZE;
9203
9204if (!optimized_cbracket)
9205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
9206offset <<= 1;
9207OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9208if (!optimized_cbracket)
9209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9210return cc + 1 + IMM2_SIZE;
9211}
9212
9213static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9214{
9215DEFINE_COMPILER;
9216backtrack_common *backtrack;
9217pcre_uchar opcode = *cc;
9218pcre_uchar *ccend = cc + 1;
9219
9220if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
9221 ccend += 2 + cc[1];
9222
9223PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9224
9225if (opcode == OP_SKIP)
9226 {
9227 allocate_stack(common, 1);
9228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9229 return ccend;
9230 }
9231
9232if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
9233 {
9234 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9235 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9236 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9237 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9238 }
9239
9240return ccend;
9241}
9242
9243static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
9244
9245static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9246{
9247DEFINE_COMPILER;
9248backtrack_common *backtrack;
9249BOOL needs_control_head;
9250int size;
9251
9252PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9253common->then_trap = BACKTRACK_AS(then_trap_backtrack);
9254BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9255BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
9256BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
9257
9258size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9259size = 3 + (size < 0 ? 0 : size);
9260
9261OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9262allocate_stack(common, size);
9263if (size > 3)
9264 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
9265else
9266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9267OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
9268OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
9269OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
9270
9271size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9272if (size >= 0)
9273 init_frame(common, cc, ccend, size - 1, 0, FALSE);
9274}
9275
9276static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9277{
9278DEFINE_COMPILER;
9279backtrack_common *backtrack;
9280BOOL has_then_trap = FALSE;
9281then_trap_backtrack *save_then_trap = NULL;
9282
9283SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
9284
9285if (common->has_then && common->then_offsets[cc - common->start] != 0)
9286 {
9287 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
9288 has_then_trap = TRUE;
9289 save_then_trap = common->then_trap;
9290 /* Tail item on backtrack. */
9291 compile_then_trap_matchingpath(common, cc, ccend, parent);
9292 }
9293
9294while (cc < ccend)
9295 {
9296 switch(*cc)
9297 {
9298 case OP_SOD:
9299 case OP_SOM:
9300 case OP_NOT_WORD_BOUNDARY:
9301 case OP_WORD_BOUNDARY:
9302 case OP_EODN:
9303 case OP_EOD:
9304 case OP_DOLL:
9305 case OP_DOLLM:
9306 case OP_CIRC:
9307 case OP_CIRCM:
9308 case OP_REVERSE:
9309 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9310 break;
9311
9312 case OP_NOT_DIGIT:
9313 case OP_DIGIT:
9314 case OP_NOT_WHITESPACE:
9315 case OP_WHITESPACE:
9316 case OP_NOT_WORDCHAR:
9317 case OP_WORDCHAR:
9318 case OP_ANY:
9319 case OP_ALLANY:
9320 case OP_ANYBYTE:
9321 case OP_NOTPROP:
9322 case OP_PROP:
9323 case OP_ANYNL:
9324 case OP_NOT_HSPACE:
9325 case OP_HSPACE:
9326 case OP_NOT_VSPACE:
9327 case OP_VSPACE:
9328 case OP_EXTUNI:
9329 case OP_NOT:
9330 case OP_NOTI:
9331 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9332 break;
9333
9334 case OP_SET_SOM:
9335 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9336 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
9337 allocate_stack(common, 1);
9338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
9339 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9340 cc++;
9341 break;
9342
9343 case OP_CHAR:
9344 case OP_CHARI:
9345 if (common->mode == JIT_COMPILE)
9346 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9347 else
9348 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9349 break;
9350
9351 case OP_STAR:
9352 case OP_MINSTAR:
9353 case OP_PLUS:
9354 case OP_MINPLUS:
9355 case OP_QUERY:
9356 case OP_MINQUERY:
9357 case OP_UPTO:
9358 case OP_MINUPTO:
9359 case OP_EXACT:
9360 case OP_POSSTAR:
9361 case OP_POSPLUS:
9362 case OP_POSQUERY:
9363 case OP_POSUPTO:
9364 case OP_STARI:
9365 case OP_MINSTARI:
9366 case OP_PLUSI:
9367 case OP_MINPLUSI:
9368 case OP_QUERYI:
9369 case OP_MINQUERYI:
9370 case OP_UPTOI:
9371 case OP_MINUPTOI:
9372 case OP_EXACTI:
9373 case OP_POSSTARI:
9374 case OP_POSPLUSI:
9375 case OP_POSQUERYI:
9376 case OP_POSUPTOI:
9377 case OP_NOTSTAR:
9378 case OP_NOTMINSTAR:
9379 case OP_NOTPLUS:
9380 case OP_NOTMINPLUS:
9381 case OP_NOTQUERY:
9382 case OP_NOTMINQUERY:
9383 case OP_NOTUPTO:
9384 case OP_NOTMINUPTO:
9385 case OP_NOTEXACT:
9386 case OP_NOTPOSSTAR:
9387 case OP_NOTPOSPLUS:
9388 case OP_NOTPOSQUERY:
9389 case OP_NOTPOSUPTO:
9390 case OP_NOTSTARI:
9391 case OP_NOTMINSTARI:
9392 case OP_NOTPLUSI:
9393 case OP_NOTMINPLUSI:
9394 case OP_NOTQUERYI:
9395 case OP_NOTMINQUERYI:
9396 case OP_NOTUPTOI:
9397 case OP_NOTMINUPTOI:
9398 case OP_NOTEXACTI:
9399 case OP_NOTPOSSTARI:
9400 case OP_NOTPOSPLUSI:
9401 case OP_NOTPOSQUERYI:
9402 case OP_NOTPOSUPTOI:
9403 case OP_TYPESTAR:
9404 case OP_TYPEMINSTAR:
9405 case OP_TYPEPLUS:
9406 case OP_TYPEMINPLUS:
9407 case OP_TYPEQUERY:
9408 case OP_TYPEMINQUERY:
9409 case OP_TYPEUPTO:
9410 case OP_TYPEMINUPTO:
9411 case OP_TYPEEXACT:
9412 case OP_TYPEPOSSTAR:
9413 case OP_TYPEPOSPLUS:
9414 case OP_TYPEPOSQUERY:
9415 case OP_TYPEPOSUPTO:
9416 cc = compile_iterator_matchingpath(common, cc, parent);
9417 break;
9418
9419 case OP_CLASS:
9420 case OP_NCLASS:
9421 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
9422 cc = compile_iterator_matchingpath(common, cc, parent);
9423 else
9424 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9425 break;
9426
9427#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9428 case OP_XCLASS:
9429 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
9430 cc = compile_iterator_matchingpath(common, cc, parent);
9431 else
9432 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9433 break;
9434#endif
9435
9436 case OP_REF:
9437 case OP_REFI:
9438 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
9439 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9440 else
9441 {
9442 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9443 cc += 1 + IMM2_SIZE;
9444 }
9445 break;
9446
9447 case OP_DNREF:
9448 case OP_DNREFI:
9449 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
9450 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9451 else
9452 {
9453 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9454 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9455 cc += 1 + 2 * IMM2_SIZE;
9456 }
9457 break;
9458
9459 case OP_RECURSE:
9460 cc = compile_recurse_matchingpath(common, cc, parent);
9461 break;
9462
9463 case OP_CALLOUT:
9464 cc = compile_callout_matchingpath(common, cc, parent);
9465 break;
9466
9467 case OP_ASSERT:
9468 case OP_ASSERT_NOT:
9469 case OP_ASSERTBACK:
9470 case OP_ASSERTBACK_NOT:
9471 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9472 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9473 break;
9474
9475 case OP_BRAMINZERO:
9476 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
9477 cc = bracketend(cc + 1);
9478 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
9479 {
9480 allocate_stack(common, 1);
9481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9482 }
9483 else
9484 {
9485 allocate_stack(common, 2);
9486 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
9488 }
9489 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
9490 count_match(common);
9491 break;
9492
9493 case OP_ONCE:
9494 case OP_ONCE_NC:
9495 case OP_BRA:
9496 case OP_CBRA:
9497 case OP_COND:
9498 case OP_SBRA:
9499 case OP_SCBRA:
9500 case OP_SCOND:
9501 cc = compile_bracket_matchingpath(common, cc, parent);
9502 break;
9503
9504 case OP_BRAZERO:
9505 if (cc[1] > OP_ASSERTBACK_NOT)
9506 cc = compile_bracket_matchingpath(common, cc, parent);
9507 else
9508 {
9509 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9510 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9511 }
9512 break;
9513
9514 case OP_BRAPOS:
9515 case OP_CBRAPOS:
9516 case OP_SBRAPOS:
9517 case OP_SCBRAPOS:
9518 case OP_BRAPOSZERO:
9519 cc = compile_bracketpos_matchingpath(common, cc, parent);
9520 break;
9521
9522 case OP_MARK:
9523 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9524 SLJIT_ASSERT(common->mark_ptr != 0);
9525 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
9526 allocate_stack(common, common->has_skip_arg ? 5 : 1);
9527 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
9529 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9530 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9531 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9532 if (common->has_skip_arg)
9533 {
9534 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9536 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
9537 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
9538 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
9539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9540 }
9541 cc += 1 + 2 + cc[1];
9542 break;
9543
9544 case OP_PRUNE:
9545 case OP_PRUNE_ARG:
9546 case OP_SKIP:
9547 case OP_SKIP_ARG:
9548 case OP_THEN:
9549 case OP_THEN_ARG:
9550 case OP_COMMIT:
9551 cc = compile_control_verb_matchingpath(common, cc, parent);
9552 break;
9553
9554 case OP_FAIL:
9555 case OP_ACCEPT:
9556 case OP_ASSERT_ACCEPT:
9557 cc = compile_fail_accept_matchingpath(common, cc, parent);
9558 break;
9559
9560 case OP_CLOSE:
9561 cc = compile_close_matchingpath(common, cc);
9562 break;
9563
9564 case OP_SKIPZERO:
9565 cc = bracketend(cc + 1);
9566 break;
9567
9568 default:
9569 SLJIT_ASSERT_STOP();
9570 return;
9571 }
9572 if (cc == NULL)
9573 return;
9574 }
9575
9576if (has_then_trap)
9577 {
9578 /* Head item on backtrack. */
9579 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9580 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9581 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
9582 common->then_trap = save_then_trap;
9583 }
9584SLJIT_ASSERT(cc == ccend);
9585}
9586
9587#undef PUSH_BACKTRACK
9588#undef PUSH_BACKTRACK_NOVALUE
9589#undef BACKTRACK_AS
9590
9591#define COMPILE_BACKTRACKINGPATH(current) \
9592 do \
9593 { \
9594 compile_backtrackingpath(common, (current)); \
9595 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9596 return; \
9597 } \
9598 while (0)
9599
9600#define CURRENT_AS(type) ((type *)current)
9601
9602static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9603{
9604DEFINE_COMPILER;
9605pcre_uchar *cc = current->cc;
9606pcre_uchar opcode;
9607pcre_uchar type;
9608sljit_u32 max = 0, exact;
9609struct sljit_label *label = NULL;
9610struct sljit_jump *jump = NULL;
9611jump_list *jumplist = NULL;
9612pcre_uchar *end;
9613int private_data_ptr = PRIVATE_DATA(cc);
9614int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9615int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9616int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9617
9618cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9619
9620switch(opcode)
9621 {
9622 case OP_STAR:
9623 case OP_UPTO:
9624 if (type == OP_ANYNL || type == OP_EXTUNI)
9625 {
9626 SLJIT_ASSERT(private_data_ptr == 0);
9627 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9628 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9629 free_stack(common, 1);
9630 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9631 }
9632 else
9633 {
9634 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
9635 {
9636 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9637 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
9638 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9639
9640 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
9641 label = LABEL();
9642 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
9643 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9644 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
9645 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
9646 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9647 skip_char_back(common);
9648 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
9649 }
9650 else
9651 {
9652 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9653 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
9654 skip_char_back(common);
9655 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9656 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9657 }
9658 JUMPHERE(jump);
9659 if (private_data_ptr == 0)
9660 free_stack(common, 2);
9661 }
9662 break;
9663
9664 case OP_MINSTAR:
9665 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9666 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9667 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9668 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9669 set_jumps(jumplist, LABEL());
9670 if (private_data_ptr == 0)
9671 free_stack(common, 1);
9672 break;
9673
9674 case OP_MINUPTO:
9675 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
9676 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9677 OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9678 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
9679
9680 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
9681 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9682 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9683 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9684
9685 set_jumps(jumplist, LABEL());
9686 if (private_data_ptr == 0)
9687 free_stack(common, 2);
9688 break;
9689
9690 case OP_QUERY:
9691 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9692 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9693 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9694 jump = JUMP(SLJIT_JUMP);
9695 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9696 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9697 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9698 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9699 JUMPHERE(jump);
9700 if (private_data_ptr == 0)
9701 free_stack(common, 1);
9702 break;
9703
9704 case OP_MINQUERY:
9705 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9706 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9707 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9708 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9709 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9710 set_jumps(jumplist, LABEL());
9711 JUMPHERE(jump);
9712 if (private_data_ptr == 0)
9713 free_stack(common, 1);
9714 break;
9715
9716 case OP_EXACT:
9717 case OP_POSSTAR:
9718 case OP_POSQUERY:
9719 case OP_POSUPTO:
9720 break;
9721
9722 default:
9723 SLJIT_ASSERT_STOP();
9724 break;
9725 }
9726
9727set_jumps(current->topbacktracks, LABEL());
9728}
9729
9730static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9731{
9732DEFINE_COMPILER;
9733pcre_uchar *cc = current->cc;
9734BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9735pcre_uchar type;
9736
9737type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
9738
9739if ((type & 0x1) == 0)
9740 {
9741 /* Maximize case. */
9742 set_jumps(current->topbacktracks, LABEL());
9743 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9744 free_stack(common, 1);
9745 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9746 return;
9747 }
9748
9749OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9750CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9751set_jumps(current->topbacktracks, LABEL());
9752free_stack(common, ref ? 2 : 3);
9753}
9754
9755static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9756{
9757DEFINE_COMPILER;
9758
9759if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9760 compile_backtrackingpath(common, current->top);
9761set_jumps(current->topbacktracks, LABEL());
9762if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9763 return;
9764
9765if (common->has_set_som && common->mark_ptr != 0)
9766 {
9767 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9768 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9769 free_stack(common, 2);
9770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
9771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9772 }
9773else if (common->has_set_som || common->mark_ptr != 0)
9774 {
9775 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9776 free_stack(common, 1);
9777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
9778 }
9779}
9780
9781static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9782{
9783DEFINE_COMPILER;
9784pcre_uchar *cc = current->cc;
9785pcre_uchar bra = OP_BRA;
9786struct sljit_jump *brajump = NULL;
9787
9788SLJIT_ASSERT(*cc != OP_BRAMINZERO);
9789if (*cc == OP_BRAZERO)
9790 {
9791 bra = *cc;
9792 cc++;
9793 }
9794
9795if (bra == OP_BRAZERO)
9796 {
9797 SLJIT_ASSERT(current->topbacktracks == NULL);
9798 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9799 }
9800
9801if (CURRENT_AS(assert_backtrack)->framesize < 0)
9802 {
9803 set_jumps(current->topbacktracks, LABEL());
9804
9805 if (bra == OP_BRAZERO)
9806 {
9807 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9808 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9809 free_stack(common, 1);
9810 }
9811 return;
9812 }
9813
9814if (bra == OP_BRAZERO)
9815 {
9816 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
9817 {
9818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9819 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9820 free_stack(common, 1);
9821 return;
9822 }
9823 free_stack(common, 1);
9824 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9825 }
9826
9827if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
9828 {
9829 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
9830 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
9832
9833 set_jumps(current->topbacktracks, LABEL());
9834 }
9835else
9836 set_jumps(current->topbacktracks, LABEL());
9837
9838if (bra == OP_BRAZERO)
9839 {
9840 /* We know there is enough place on the stack. */
9841 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9842 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9843 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
9844 JUMPHERE(brajump);
9845 }
9846}
9847
9848static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9849{
9850DEFINE_COMPILER;
9851int opcode, stacksize, alt_count, alt_max;
9852int offset = 0;
9853int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
9854int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
9855pcre_uchar *cc = current->cc;
9856pcre_uchar *ccbegin;
9857pcre_uchar *ccprev;
9858pcre_uchar bra = OP_BRA;
9859pcre_uchar ket;
9860assert_backtrack *assert;
9861sljit_uw *next_update_addr = NULL;
9862BOOL has_alternatives;
9863BOOL needs_control_head = FALSE;
9864struct sljit_jump *brazero = NULL;
9865struct sljit_jump *alt1 = NULL;
9866struct sljit_jump *alt2 = NULL;
9867struct sljit_jump *once = NULL;
9868struct sljit_jump *cond = NULL;
9869struct sljit_label *rmin_label = NULL;
9870struct sljit_label *exact_label = NULL;
9871
9872if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9873 {
9874 bra = *cc;
9875 cc++;
9876 }
9877
9878opcode = *cc;
9879ccbegin = bracketend(cc) - 1 - LINK_SIZE;
9880ket = *ccbegin;
9881if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
9882 {
9883 repeat_ptr = PRIVATE_DATA(ccbegin);
9884 repeat_type = PRIVATE_DATA(ccbegin + 2);
9885 repeat_count = PRIVATE_DATA(ccbegin + 3);
9886 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
9887 if (repeat_type == OP_UPTO)
9888 ket = OP_KETRMAX;
9889 if (repeat_type == OP_MINUPTO)
9890 ket = OP_KETRMIN;
9891 }
9892ccbegin = cc;
9893cc += GET(cc, 1);
9894has_alternatives = *cc == OP_ALT;
9895if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9896 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
9897if (opcode == OP_CBRA || opcode == OP_SCBRA)
9898 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
9899if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
9900 opcode = OP_SCOND;
9901if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
9902 opcode = OP_ONCE;
9903
9904alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
9905
9906/* Decoding the needs_control_head in framesize. */
9907if (opcode == OP_ONCE)
9908 {
9909 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
9910 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
9911 }
9912
9913if (ket != OP_KET && repeat_type != 0)
9914 {
9915 /* TMP1 is used in OP_KETRMIN below. */
9916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9917 free_stack(common, 1);
9918 if (repeat_type == OP_UPTO)
9919 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
9920 else
9921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9922 }
9923
9924if (ket == OP_KETRMAX)
9925 {
9926 if (bra == OP_BRAZERO)
9927 {
9928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9929 free_stack(common, 1);
9930 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
9931 }
9932 }
9933else if (ket == OP_KETRMIN)
9934 {
9935 if (bra != OP_BRAMINZERO)
9936 {
9937 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9938 if (repeat_type != 0)
9939 {
9940 /* TMP1 was set a few lines above. */
9941 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9942 /* Drop STR_PTR for non-greedy plus quantifier. */
9943 if (opcode != OP_ONCE)
9944 free_stack(common, 1);
9945 }
9946 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
9947 {
9948 /* Checking zero-length iteration. */
9949 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
9950 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9951 else
9952 {
9953 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9954 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9955 }
9956 /* Drop STR_PTR for non-greedy plus quantifier. */
9957 if (opcode != OP_ONCE)
9958 free_stack(common, 1);
9959 }
9960 else
9961 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9962 }
9963 rmin_label = LABEL();
9964 if (repeat_type != 0)
9965 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9966 }
9967else if (bra == OP_BRAZERO)
9968 {
9969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9970 free_stack(common, 1);
9971 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
9972 }
9973else if (repeat_type == OP_EXACT)
9974 {
9975 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9976 exact_label = LABEL();
9977 }
9978
9979if (offset != 0)
9980 {
9981 if (common->capture_last_ptr != 0)
9982 {
9983 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
9984 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9985 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9986 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
9987 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9988 free_stack(common, 3);
9989 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
9990 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9991 }
9992 else if (common->optimized_cbracket[offset >> 1] == 0)
9993 {
9994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9995 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9996 free_stack(common, 2);
9997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9999 }
10000 }
10001
10002if (SLJIT_UNLIKELY(opcode == OP_ONCE))
10003 {
10004 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10005 {
10006 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10007 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10008 }
10009 once = JUMP(SLJIT_JUMP);
10010 }
10011else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10012 {
10013 if (has_alternatives)
10014 {
10015 /* Always exactly one alternative. */
10016 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10017 free_stack(common, 1);
10018
10019 alt_max = 2;
10020 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10021 }
10022 }
10023else if (has_alternatives)
10024 {
10025 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10026 free_stack(common, 1);
10027
10028 if (alt_max > 4)
10029 {
10030 /* Table jump if alt_max is greater than 4. */
10031 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
10032 if (SLJIT_UNLIKELY(next_update_addr == NULL))
10033 return;
10034 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
10035 add_label_addr(common, next_update_addr++);
10036 }
10037 else
10038 {
10039 if (alt_max == 4)
10040 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10041 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10042 }
10043 }
10044
10045COMPILE_BACKTRACKINGPATH(current->top);
10046if (current->topbacktracks)
10047 set_jumps(current->topbacktracks, LABEL());
10048
10049if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10050 {
10051 /* Conditional block always has at most one alternative. */
10052 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
10053 {
10054 SLJIT_ASSERT(has_alternatives);
10055 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10056 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
10057 {
10058 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10059 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
10061 }
10062 cond = JUMP(SLJIT_JUMP);
10063 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
10064 }
10065 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
10066 {
10067 SLJIT_ASSERT(has_alternatives);
10068 cond = JUMP(SLJIT_JUMP);
10069 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
10070 }
10071 else
10072 SLJIT_ASSERT(!has_alternatives);
10073 }
10074
10075if (has_alternatives)
10076 {
10077 alt_count = sizeof(sljit_uw);
10078 do
10079 {
10080 current->top = NULL;
10081 current->topbacktracks = NULL;
10082 current->nextbacktracks = NULL;
10083 /* Conditional blocks always have an additional alternative, even if it is empty. */
10084 if (*cc == OP_ALT)
10085 {
10086 ccprev = cc + 1 + LINK_SIZE;
10087 cc += GET(cc, 1);
10088 if (opcode != OP_COND && opcode != OP_SCOND)
10089 {
10090 if (opcode != OP_ONCE)
10091 {
10092 if (private_data_ptr != 0)
10093 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10094 else
10095 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10096 }
10097 else
10098 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
10099 }
10100 compile_matchingpath(common, ccprev, cc, current);
10101 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10102 return;
10103 }
10104
10105 /* Instructions after the current alternative is successfully matched. */
10106 /* There is a similar code in compile_bracket_matchingpath. */
10107 if (opcode == OP_ONCE)
10108 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10109
10110 stacksize = 0;
10111 if (repeat_type == OP_MINUPTO)
10112 {
10113 /* We need to preserve the counter. TMP2 will be used below. */
10114 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10115 stacksize++;
10116 }
10117 if (ket != OP_KET || bra != OP_BRA)
10118 stacksize++;
10119 if (offset != 0)
10120 {
10121 if (common->capture_last_ptr != 0)
10122 stacksize++;
10123 if (common->optimized_cbracket[offset >> 1] == 0)
10124 stacksize += 2;
10125 }
10126 if (opcode != OP_ONCE)
10127 stacksize++;
10128
10129 if (stacksize > 0)
10130 allocate_stack(common, stacksize);
10131
10132 stacksize = 0;
10133 if (repeat_type == OP_MINUPTO)
10134 {
10135 /* TMP2 was set above. */
10136 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10137 stacksize++;
10138 }
10139
10140 if (ket != OP_KET || bra != OP_BRA)
10141 {
10142 if (ket != OP_KET)
10143 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10144 else
10145 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10146 stacksize++;
10147 }
10148
10149 if (offset != 0)
10150 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10151
10152 if (opcode != OP_ONCE)
10153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
10154
10155 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
10156 {
10157 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
10158 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10159 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10160 }
10161
10162 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
10163
10164 if (opcode != OP_ONCE)
10165 {
10166 if (alt_max > 4)
10167 add_label_addr(common, next_update_addr++);
10168 else
10169 {
10170 if (alt_count != 2 * sizeof(sljit_uw))
10171 {
10172 JUMPHERE(alt1);
10173 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
10174 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10175 }
10176 else
10177 {
10178 JUMPHERE(alt2);
10179 if (alt_max == 4)
10180 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
10181 }
10182 }
10183 alt_count += sizeof(sljit_uw);
10184 }
10185
10186 COMPILE_BACKTRACKINGPATH(current->top);
10187 if (current->topbacktracks)
10188 set_jumps(current->topbacktracks, LABEL());
10189 SLJIT_ASSERT(!current->nextbacktracks);
10190 }
10191 while (*cc == OP_ALT);
10192
10193 if (cond != NULL)
10194 {
10195 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
10196 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10197 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
10198 {
10199 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10200 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10201 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
10202 }
10203 JUMPHERE(cond);
10204 }
10205
10206 /* Free the STR_PTR. */
10207 if (private_data_ptr == 0)
10208 free_stack(common, 1);
10209 }
10210
10211if (offset != 0)
10212 {
10213 /* Using both tmp register is better for instruction scheduling. */
10214 if (common->optimized_cbracket[offset >> 1] != 0)
10215 {
10216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10217 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10218 free_stack(common, 2);
10219 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10220 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10221 }
10222 else
10223 {
10224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10225 free_stack(common, 1);
10226 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10227 }
10228 }
10229else if (opcode == OP_SBRA || opcode == OP_SCOND)
10230 {
10231 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
10232 free_stack(common, 1);
10233 }
10234else if (opcode == OP_ONCE)
10235 {
10236 cc = ccbegin + GET(ccbegin, 1);
10237 stacksize = needs_control_head ? 1 : 0;
10238
10239 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10240 {
10241 /* Reset head and drop saved frame. */
10242 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
10243 }
10244 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
10245 {
10246 /* The STR_PTR must be released. */
10247 stacksize++;
10248 }
10249
10250 if (stacksize > 0)
10251 free_stack(common, stacksize);
10252
10253 JUMPHERE(once);
10254 /* Restore previous private_data_ptr */
10255 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
10257 else if (ket == OP_KETRMIN)
10258 {
10259 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10260 /* See the comment below. */
10261 free_stack(common, 2);
10262 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10263 }
10264 }
10265
10266if (repeat_type == OP_EXACT)
10267 {
10268 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10270 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
10271 }
10272else if (ket == OP_KETRMAX)
10273 {
10274 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10275 if (bra != OP_BRAZERO)
10276 free_stack(common, 1);
10277
10278 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10279 if (bra == OP_BRAZERO)
10280 {
10281 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10282 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10283 JUMPHERE(brazero);
10284 free_stack(common, 1);
10285 }
10286 }
10287else if (ket == OP_KETRMIN)
10288 {
10289 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10290
10291 /* OP_ONCE removes everything in case of a backtrack, so we don't
10292 need to explicitly release the STR_PTR. The extra release would
10293 affect badly the free_stack(2) above. */
10294 if (opcode != OP_ONCE)
10295 free_stack(common, 1);
10296 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
10297 if (opcode == OP_ONCE)
10298 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
10299 else if (bra == OP_BRAMINZERO)
10300 free_stack(common, 1);
10301 }
10302else if (bra == OP_BRAZERO)
10303 {
10304 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10305 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10306 JUMPHERE(brazero);
10307 }
10308}
10309
10310static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10311{
10312DEFINE_COMPILER;
10313int offset;
10314struct sljit_jump *jump;
10315
10316if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
10317 {
10318 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
10319 {
10320 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
10321 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10322 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10323 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10324 if (common->capture_last_ptr != 0)
10325 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10326 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10327 if (common->capture_last_ptr != 0)
10328 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10329 }
10330 set_jumps(current->topbacktracks, LABEL());
10331 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10332 return;
10333 }
10334
10335OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
10336add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10337
10338if (current->topbacktracks)
10339 {
10340 jump = JUMP(SLJIT_JUMP);
10341 set_jumps(current->topbacktracks, LABEL());
10342 /* Drop the stack frame. */
10343 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10344 JUMPHERE(jump);
10345 }
10346OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
10347}
10348
10349static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10350{
10351assert_backtrack backtrack;
10352
10353current->top = NULL;
10354current->topbacktracks = NULL;
10355current->nextbacktracks = NULL;
10356if (current->cc[1] > OP_ASSERTBACK_NOT)
10357 {
10358 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
10359 compile_bracket_matchingpath(common, current->cc, current);
10360 compile_bracket_backtrackingpath(common, current->top);
10361 }
10362else
10363 {
10364 memset(&backtrack, 0, sizeof(backtrack));
10365 backtrack.common.cc = current->cc;
10366 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
10367 /* Manual call of compile_assert_matchingpath. */
10368 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
10369 }
10370SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
10371}
10372
10373static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10374{
10375DEFINE_COMPILER;
10376pcre_uchar opcode = *current->cc;
10377struct sljit_label *loop;
10378struct sljit_jump *jump;
10379
10380if (opcode == OP_THEN || opcode == OP_THEN_ARG)
10381 {
10382 if (common->then_trap != NULL)
10383 {
10384 SLJIT_ASSERT(common->control_head_ptr != 0);
10385
10386 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10387 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
10388 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
10389 jump = JUMP(SLJIT_JUMP);
10390
10391 loop = LABEL();
10392 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
10393 JUMPHERE(jump);
10394 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
10395 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
10396 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
10397 return;
10398 }
10399 else if (common->positive_assert)
10400 {
10401 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
10402 return;
10403 }
10404 }
10405
10406if (common->local_exit)
10407 {
10408 if (common->quit_label == NULL)
10409 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10410 else
10411 JUMPTO(SLJIT_JUMP, common->quit_label);
10412 return;
10413 }
10414
10415if (opcode == OP_SKIP_ARG)
10416 {
10417 SLJIT_ASSERT(common->control_head_ptr != 0);
10418 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10419 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
10420 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
10421 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
10422 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10423
10424 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10425 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
10426 return;
10427 }
10428
10429if (opcode == OP_SKIP)
10430 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10431else
10432 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
10433add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
10434}
10435
10436static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10437{
10438DEFINE_COMPILER;
10439struct sljit_jump *jump;
10440int size;
10441
10442if (CURRENT_AS(then_trap_backtrack)->then_trap)
10443 {
10444 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
10445 return;
10446 }
10447
10448size = CURRENT_AS(then_trap_backtrack)->framesize;
10449size = 3 + (size < 0 ? 0 : size);
10450
10451OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
10452free_stack(common, size);
10453jump = JUMP(SLJIT_JUMP);
10454
10455set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
10456/* STACK_TOP is set by THEN. */
10457if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
10458 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10459OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10460free_stack(common, 3);
10461
10462JUMPHERE(jump);
10463OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10464}
10465
10466static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10467{
10468DEFINE_COMPILER;
10469then_trap_backtrack *save_then_trap = common->then_trap;
10470
10471while (current)
10472 {
10473 if (current->nextbacktracks != NULL)
10474 set_jumps(current->nextbacktracks, LABEL());
10475 switch(*current->cc)
10476 {
10477 case OP_SET_SOM:
10478 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10479 free_stack(common, 1);
10480 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
10481 break;
10482
10483 case OP_STAR:
10484 case OP_MINSTAR:
10485 case OP_PLUS:
10486 case OP_MINPLUS:
10487 case OP_QUERY:
10488 case OP_MINQUERY:
10489 case OP_UPTO:
10490 case OP_MINUPTO:
10491 case OP_EXACT:
10492 case OP_POSSTAR:
10493 case OP_POSPLUS:
10494 case OP_POSQUERY:
10495 case OP_POSUPTO:
10496 case OP_STARI:
10497 case OP_MINSTARI:
10498 case OP_PLUSI:
10499 case OP_MINPLUSI:
10500 case OP_QUERYI:
10501 case OP_MINQUERYI:
10502 case OP_UPTOI:
10503 case OP_MINUPTOI:
10504 case OP_EXACTI:
10505 case OP_POSSTARI:
10506 case OP_POSPLUSI:
10507 case OP_POSQUERYI:
10508 case OP_POSUPTOI:
10509 case OP_NOTSTAR:
10510 case OP_NOTMINSTAR:
10511 case OP_NOTPLUS:
10512 case OP_NOTMINPLUS:
10513 case OP_NOTQUERY:
10514 case OP_NOTMINQUERY:
10515 case OP_NOTUPTO:
10516 case OP_NOTMINUPTO:
10517 case OP_NOTEXACT:
10518 case OP_NOTPOSSTAR:
10519 case OP_NOTPOSPLUS:
10520 case OP_NOTPOSQUERY:
10521 case OP_NOTPOSUPTO:
10522 case OP_NOTSTARI:
10523 case OP_NOTMINSTARI:
10524 case OP_NOTPLUSI:
10525 case OP_NOTMINPLUSI:
10526 case OP_NOTQUERYI:
10527 case OP_NOTMINQUERYI:
10528 case OP_NOTUPTOI:
10529 case OP_NOTMINUPTOI:
10530 case OP_NOTEXACTI:
10531 case OP_NOTPOSSTARI:
10532 case OP_NOTPOSPLUSI:
10533 case OP_NOTPOSQUERYI:
10534 case OP_NOTPOSUPTOI:
10535 case OP_TYPESTAR:
10536 case OP_TYPEMINSTAR:
10537 case OP_TYPEPLUS:
10538 case OP_TYPEMINPLUS:
10539 case OP_TYPEQUERY:
10540 case OP_TYPEMINQUERY:
10541 case OP_TYPEUPTO:
10542 case OP_TYPEMINUPTO:
10543 case OP_TYPEEXACT:
10544 case OP_TYPEPOSSTAR:
10545 case OP_TYPEPOSPLUS:
10546 case OP_TYPEPOSQUERY:
10547 case OP_TYPEPOSUPTO:
10548 case OP_CLASS:
10549 case OP_NCLASS:
10550#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
10551 case OP_XCLASS:
10552#endif
10553 compile_iterator_backtrackingpath(common, current);
10554 break;
10555
10556 case OP_REF:
10557 case OP_REFI:
10558 case OP_DNREF:
10559 case OP_DNREFI:
10560 compile_ref_iterator_backtrackingpath(common, current);
10561 break;
10562
10563 case OP_RECURSE:
10564 compile_recurse_backtrackingpath(common, current);
10565 break;
10566
10567 case OP_ASSERT:
10568 case OP_ASSERT_NOT:
10569 case OP_ASSERTBACK:
10570 case OP_ASSERTBACK_NOT:
10571 compile_assert_backtrackingpath(common, current);
10572 break;
10573
10574 case OP_ONCE:
10575 case OP_ONCE_NC:
10576 case OP_BRA:
10577 case OP_CBRA:
10578 case OP_COND:
10579 case OP_SBRA:
10580 case OP_SCBRA:
10581 case OP_SCOND:
10582 compile_bracket_backtrackingpath(common, current);
10583 break;
10584
10585 case OP_BRAZERO:
10586 if (current->cc[1] > OP_ASSERTBACK_NOT)
10587 compile_bracket_backtrackingpath(common, current);
10588 else
10589 compile_assert_backtrackingpath(common, current);
10590 break;
10591
10592 case OP_BRAPOS:
10593 case OP_CBRAPOS:
10594 case OP_SBRAPOS:
10595 case OP_SCBRAPOS:
10596 case OP_BRAPOSZERO:
10597 compile_bracketpos_backtrackingpath(common, current);
10598 break;
10599
10600 case OP_BRAMINZERO:
10601 compile_braminzero_backtrackingpath(common, current);
10602 break;
10603
10604 case OP_MARK:
10605 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
10606 if (common->has_skip_arg)
10607 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10608 free_stack(common, common->has_skip_arg ? 5 : 1);
10609 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
10610 if (common->has_skip_arg)
10611 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10612 break;
10613
10614 case OP_THEN:
10615 case OP_THEN_ARG:
10616 case OP_PRUNE:
10617 case OP_PRUNE_ARG:
10618 case OP_SKIP:
10619 case OP_SKIP_ARG:
10620 compile_control_verb_backtrackingpath(common, current);
10621 break;
10622
10623 case OP_COMMIT:
10624 if (!common->local_exit)
10625 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10626 if (common->quit_label == NULL)
10627 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10628 else
10629 JUMPTO(SLJIT_JUMP, common->quit_label);
10630 break;
10631
10632 case OP_CALLOUT:
10633 case OP_FAIL:
10634 case OP_ACCEPT:
10635 case OP_ASSERT_ACCEPT:
10636 set_jumps(current->topbacktracks, LABEL());
10637 break;
10638
10639 case OP_THEN_TRAP:
10640 /* A virtual opcode for then traps. */
10641 compile_then_trap_backtrackingpath(common, current);
10642 break;
10643
10644 default:
10645 SLJIT_ASSERT_STOP();
10646 break;
10647 }
10648 current = current->prev;
10649 }
10650common->then_trap = save_then_trap;
10651}
10652
10653static SLJIT_INLINE void compile_recurse(compiler_common *common)
10654{
10655DEFINE_COMPILER;
10656pcre_uchar *cc = common->start + common->currententry->start;
10657pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
10658pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
10659BOOL needs_control_head;
10660int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
10661int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
10662int alternativesize;
10663BOOL needs_frame;
10664backtrack_common altbacktrack;
10665struct sljit_jump *jump;
10666
10667/* Recurse captures then. */
10668common->then_trap = NULL;
10669
10670SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
10671needs_frame = framesize >= 0;
10672if (!needs_frame)
10673 framesize = 0;
10674alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
10675
10676SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
10677common->currententry->entry = LABEL();
10678set_jumps(common->currententry->calls, common->currententry->entry);
10679
10680sljit_emit_fast_enter(compiler, TMP2, 0);
10681count_match(common);
10682allocate_stack(common, private_data_size + framesize + alternativesize);
10683OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
10684copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
10685if (needs_control_head)
10686 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10687OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
10688if (needs_frame)
10689 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
10690
10691if (alternativesize > 0)
10692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10693
10694memset(&altbacktrack, 0, sizeof(backtrack_common));
10695common->quit_label = NULL;
10696common->accept_label = NULL;
10697common->quit = NULL;
10698common->accept = NULL;
10699altbacktrack.cc = ccbegin;
10700cc += GET(cc, 1);
10701while (1)
10702 {
10703 altbacktrack.top = NULL;
10704 altbacktrack.topbacktracks = NULL;
10705
10706 if (altbacktrack.cc != ccbegin)
10707 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10708
10709 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
10710 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10711 return;
10712
10713 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10714
10715 compile_backtrackingpath(common, altbacktrack.top);
10716 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10717 return;
10718 set_jumps(altbacktrack.topbacktracks, LABEL());
10719
10720 if (*cc != OP_ALT)
10721 break;
10722
10723 altbacktrack.cc = cc + 1 + LINK_SIZE;
10724 cc += GET(cc, 1);
10725 }
10726
10727/* None of them matched. */
10728OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10729jump = JUMP(SLJIT_JUMP);
10730
10731if (common->quit != NULL)
10732 {
10733 set_jumps(common->quit, LABEL());
10734 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10735 if (needs_frame)
10736 {
10737 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10738 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10739 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10740 }
10741 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10742 common->quit = NULL;
10743 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10744 }
10745
10746set_jumps(common->accept, LABEL());
10747OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10748if (needs_frame)
10749 {
10750 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10751 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10752 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10753 }
10754OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
10755
10756JUMPHERE(jump);
10757if (common->quit != NULL)
10758 set_jumps(common->quit, LABEL());
10759copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
10760free_stack(common, private_data_size + framesize + alternativesize);
10761if (needs_control_head)
10762 {
10763 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
10764 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
10765 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
10766 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10767 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10768 }
10769else
10770 {
10771 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
10772 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10773 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
10774 }
10775sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
10776}
10777
10778#undef COMPILE_BACKTRACKINGPATH
10779#undef CURRENT_AS
10780
10781void
10782PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
10783{
10784struct sljit_compiler *compiler;
10785backtrack_common rootbacktrack;
10786compiler_common common_data;
10787compiler_common *common = &common_data;
10788const sljit_u8 *tables = re->tables;
10789pcre_study_data *study;
10790int private_data_size;
10791pcre_uchar *ccend;
10792executable_functions *functions;
10793void *executable_func;
10794sljit_uw executable_size;
10795sljit_uw total_length;
10796label_addr_list *label_addr;
10797struct sljit_label *mainloop_label = NULL;
10798struct sljit_label *continue_match_label;
10799struct sljit_label *empty_match_found_label = NULL;
10800struct sljit_label *empty_match_backtrack_label = NULL;
10801struct sljit_label *reset_match_label;
10802struct sljit_label *quit_label;
10803struct sljit_jump *jump;
10804struct sljit_jump *minlength_check_failed = NULL;
10805struct sljit_jump *reqbyte_notfound = NULL;
10806struct sljit_jump *empty_match = NULL;
10807
10808SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
10809study = extra->study_data;
10810
10811if (!tables)
10812 tables = PRIV(default_tables);
10813
10814memset(&rootbacktrack, 0, sizeof(backtrack_common));
10815memset(common, 0, sizeof(compiler_common));
10816rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
10817
10818common->start = rootbacktrack.cc;
10819common->read_only_data_head = NULL;
10820common->fcc = tables + fcc_offset;
10821common->lcc = (sljit_sw)(tables + lcc_offset);
10822common->mode = mode;
10823common->might_be_empty = study->minlength == 0;
10824common->nltype = NLTYPE_FIXED;
10825switch(re->options & PCRE_NEWLINE_BITS)
10826 {
10827 case 0:
10828 /* Compile-time default */
10829 switch(NEWLINE)
10830 {
10831 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
10832 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
10833 default: common->newline = NEWLINE; break;
10834 }
10835 break;
10836 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
10837 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
10838 case PCRE_NEWLINE_CR+
10839 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
10840 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
10841 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
10842 default: return;
10843 }
10844common->nlmax = READ_CHAR_MAX;
10845common->nlmin = 0;
10846if ((re->options & PCRE_BSR_ANYCRLF) != 0)
10847 common->bsr_nltype = NLTYPE_ANYCRLF;
10848else if ((re->options & PCRE_BSR_UNICODE) != 0)
10849 common->bsr_nltype = NLTYPE_ANY;
10850else
10851 {
10852#ifdef BSR_ANYCRLF
10853 common->bsr_nltype = NLTYPE_ANYCRLF;
10854#else
10855 common->bsr_nltype = NLTYPE_ANY;
10856#endif
10857 }
10858common->bsr_nlmax = READ_CHAR_MAX;
10859common->bsr_nlmin = 0;
10860common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
10861common->ctypes = (sljit_sw)(tables + ctypes_offset);
10862common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
10863common->name_count = re->name_count;
10864common->name_entry_size = re->name_entry_size;
10865common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
10866#ifdef SUPPORT_UTF
10867/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
10868common->utf = (re->options & PCRE_UTF8) != 0;
10869#ifdef SUPPORT_UCP
10870common->use_ucp = (re->options & PCRE_UCP) != 0;
10871#endif
10872if (common->utf)
10873 {
10874 if (common->nltype == NLTYPE_ANY)
10875 common->nlmax = 0x2029;
10876 else if (common->nltype == NLTYPE_ANYCRLF)
10877 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
10878 else
10879 {
10880 /* We only care about the first newline character. */
10881 common->nlmax = common->newline & 0xff;
10882 }
10883
10884 if (common->nltype == NLTYPE_FIXED)
10885 common->nlmin = common->newline & 0xff;
10886 else
10887 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
10888
10889 if (common->bsr_nltype == NLTYPE_ANY)
10890 common->bsr_nlmax = 0x2029;
10891 else
10892 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
10893 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
10894 }
10895#endif /* SUPPORT_UTF */
10896ccend = bracketend(common->start);
10897
10898/* Calculate the local space size on the stack. */
10899common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
10900common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
10901if (!common->optimized_cbracket)
10902 return;
10903#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
10904memset(common->optimized_cbracket, 0, re->top_bracket + 1);
10905#else
10906memset(common->optimized_cbracket, 1, re->top_bracket + 1);
10907#endif
10908
10909SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
10910#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
10911common->capture_last_ptr = common->ovector_start;
10912common->ovector_start += sizeof(sljit_sw);
10913#endif
10914if (!check_opcode_types(common, common->start, ccend))
10915 {
10916 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10917 return;
10918 }
10919
10920/* Checking flags and updating ovector_start. */
10921if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10922 {
10923 common->req_char_ptr = common->ovector_start;
10924 common->ovector_start += sizeof(sljit_sw);
10925 }
10926if (mode != JIT_COMPILE)
10927 {
10928 common->start_used_ptr = common->ovector_start;
10929 common->ovector_start += sizeof(sljit_sw);
10930 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10931 {
10932 common->hit_start = common->ovector_start;
10933 common->ovector_start += 2 * sizeof(sljit_sw);
10934 }
10935 }
10936if ((re->options & PCRE_FIRSTLINE) != 0)
10937 {
10938 common->match_end_ptr = common->ovector_start;
10939 common->ovector_start += sizeof(sljit_sw);
10940 }
10941#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
10942common->control_head_ptr = 1;
10943#endif
10944if (common->control_head_ptr != 0)
10945 {
10946 common->control_head_ptr = common->ovector_start;
10947 common->ovector_start += sizeof(sljit_sw);
10948 }
10949if (common->has_set_som)
10950 {
10951 /* Saving the real start pointer is necessary. */
10952 common->start_ptr = common->ovector_start;
10953 common->ovector_start += sizeof(sljit_sw);
10954 }
10955
10956/* Aligning ovector to even number of sljit words. */
10957if ((common->ovector_start & sizeof(sljit_sw)) != 0)
10958 common->ovector_start += sizeof(sljit_sw);
10959
10960if (common->start_ptr == 0)
10961 common->start_ptr = OVECTOR(0);
10962
10963/* Capturing brackets cannot be optimized if callouts are allowed. */
10964if (common->capture_last_ptr != 0)
10965 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
10966
10967SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
10968common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
10969
10970total_length = ccend - common->start;
10971common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data);
10972if (!common->private_data_ptrs)
10973 {
10974 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10975 return;
10976 }
10977memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
10978
10979private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
10980set_private_data_ptrs(common, &private_data_size, ccend);
10981if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10982 {
10983 if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
10984 detect_fast_fail(common, common->start, &private_data_size, 4);
10985 }
10986
10987SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
10988
10989if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
10990 {
10991 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10992 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10993 return;
10994 }
10995
10996if (common->has_then)
10997 {
10998 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
10999 memset(common->then_offsets, 0, total_length);
11000 set_then_offsets(common, common->start, NULL);
11001 }
11002
11003compiler = sljit_create_compiler(NULL);
11004if (!compiler)
11005 {
11006 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11007 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11008 return;
11009 }
11010common->compiler = compiler;
11011
11012/* Main pcre_jit_exec entry. */
11013sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
11014
11015/* Register init. */
11016reset_ovector(common, (re->top_bracket + 1) * 2);
11017if (common->req_char_ptr != 0)
11018 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
11019
11020OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
11021OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
11022OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11023OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
11024OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11025OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
11026OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
11027OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
11028OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11029OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
11030
11031if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
11032 reset_fast_fail(common);
11033
11034if (mode == JIT_PARTIAL_SOFT_COMPILE)
11035 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11036if (common->mark_ptr != 0)
11037 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
11038if (common->control_head_ptr != 0)
11039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11040
11041/* Main part of the matching */
11042if ((re->options & PCRE_ANCHORED) == 0)
11043 {
11044 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
11045 continue_match_label = LABEL();
11046 /* Forward search if possible. */
11047 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
11048 {
11049 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
11050 ;
11051 else if ((re->flags & PCRE_FIRSTSET) != 0)
11052 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
11053 else if ((re->flags & PCRE_STARTLINE) != 0)
11054 fast_forward_newline(common);
11055 else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
11056 fast_forward_start_bits(common, study->start_bits);
11057 }
11058 }
11059else
11060 continue_match_label = LABEL();
11061
11062if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11063 {
11064 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11065 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
11066 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
11067 }
11068if (common->req_char_ptr != 0)
11069 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
11070
11071/* Store the current STR_PTR in OVECTOR(0). */
11072OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11073/* Copy the limit of allowed recursions. */
11074OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
11075if (common->capture_last_ptr != 0)
11076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
11077if (common->fast_forward_bc_ptr != NULL)
11078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
11079
11080if (common->start_ptr != OVECTOR(0))
11081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
11082
11083/* Copy the beginning of the string. */
11084if (mode == JIT_PARTIAL_SOFT_COMPILE)
11085 {
11086 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11087 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
11089 JUMPHERE(jump);
11090 }
11091else if (mode == JIT_PARTIAL_HARD_COMPILE)
11092 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11093
11094compile_matchingpath(common, common->start, ccend, &rootbacktrack);
11095if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11096 {
11097 sljit_free_compiler(compiler);
11098 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11099 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11100 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11101 return;
11102 }
11103
11104if (common->might_be_empty)
11105 {
11106 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11107 empty_match_found_label = LABEL();
11108 }
11109
11110common->accept_label = LABEL();
11111if (common->accept != NULL)
11112 set_jumps(common->accept, common->accept_label);
11113
11114/* This means we have a match. Update the ovector. */
11115copy_ovector(common, re->top_bracket + 1);
11116common->quit_label = common->forced_quit_label = LABEL();
11117if (common->quit != NULL)
11118 set_jumps(common->quit, common->quit_label);
11119if (common->forced_quit != NULL)
11120 set_jumps(common->forced_quit, common->forced_quit_label);
11121if (minlength_check_failed != NULL)
11122 SET_LABEL(minlength_check_failed, common->forced_quit_label);
11123sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
11124
11125if (mode != JIT_COMPILE)
11126 {
11127 common->partialmatchlabel = LABEL();
11128 set_jumps(common->partialmatch, common->partialmatchlabel);
11129 return_with_partial_match(common, common->quit_label);
11130 }
11131
11132if (common->might_be_empty)
11133 empty_match_backtrack_label = LABEL();
11134compile_backtrackingpath(common, rootbacktrack.top);
11135if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11136 {
11137 sljit_free_compiler(compiler);
11138 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11139 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11140 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11141 return;
11142 }
11143
11144SLJIT_ASSERT(rootbacktrack.prev == NULL);
11145reset_match_label = LABEL();
11146
11147if (mode == JIT_PARTIAL_SOFT_COMPILE)
11148 {
11149 /* Update hit_start only in the first time. */
11150 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
11151 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
11152 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
11153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
11154 JUMPHERE(jump);
11155 }
11156
11157/* Check we have remaining characters. */
11158if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
11159 {
11160 SLJIT_ASSERT(common->match_end_ptr != 0);
11161 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
11162 }
11163
11164OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
11165 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
11166
11167if ((re->options & PCRE_ANCHORED) == 0)
11168 {
11169 if (common->ff_newline_shortcut != NULL)
11170 {
11171 if ((re->options & PCRE_FIRSTLINE) == 0)
11172 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
11173 /* There cannot be more newlines here. */
11174 }
11175 else
11176 CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
11177 }
11178
11179/* No more remaining characters. */
11180if (reqbyte_notfound != NULL)
11181 JUMPHERE(reqbyte_notfound);
11182
11183if (mode == JIT_PARTIAL_SOFT_COMPILE)
11184 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
11185
11186OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11187JUMPTO(SLJIT_JUMP, common->quit_label);
11188
11189flush_stubs(common);
11190
11191if (common->might_be_empty)
11192 {
11193 JUMPHERE(empty_match);
11194 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11195 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
11196 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
11197 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
11198 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
11199 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11200 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
11201 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
11202 }
11203
11204common->fast_forward_bc_ptr = NULL;
11205common->fast_fail_start_ptr = 0;
11206common->fast_fail_end_ptr = 0;
11207common->currententry = common->entries;
11208common->local_exit = TRUE;
11209quit_label = common->quit_label;
11210while (common->currententry != NULL)
11211 {
11212 /* Might add new entries. */
11213 compile_recurse(common);
11214 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11215 {
11216 sljit_free_compiler(compiler);
11217 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11218 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11219 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11220 return;
11221 }
11222 flush_stubs(common);
11223 common->currententry = common->currententry->next;
11224 }
11225common->local_exit = FALSE;
11226common->quit_label = quit_label;
11227
11228/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
11229/* This is a (really) rare case. */
11230set_jumps(common->stackalloc, LABEL());
11231/* RETURN_ADDR is not a saved register. */
11232sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11233OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
11234OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11235OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11236OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
11237OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
11238
11239sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
11240jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
11241OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11242OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11243OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
11244OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
11245OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
11246sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11247
11248/* Allocation failed. */
11249JUMPHERE(jump);
11250/* We break the return address cache here, but this is a really rare case. */
11251OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
11252JUMPTO(SLJIT_JUMP, common->quit_label);
11253
11254/* Call limit reached. */
11255set_jumps(common->calllimit, LABEL());
11256OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
11257JUMPTO(SLJIT_JUMP, common->quit_label);
11258
11259if (common->revertframes != NULL)
11260 {
11261 set_jumps(common->revertframes, LABEL());
11262 do_revertframes(common);
11263 }
11264if (common->wordboundary != NULL)
11265 {
11266 set_jumps(common->wordboundary, LABEL());
11267 check_wordboundary(common);
11268 }
11269if (common->anynewline != NULL)
11270 {
11271 set_jumps(common->anynewline, LABEL());
11272 check_anynewline(common);
11273 }
11274if (common->hspace != NULL)
11275 {
11276 set_jumps(common->hspace, LABEL());
11277 check_hspace(common);
11278 }
11279if (common->vspace != NULL)
11280 {
11281 set_jumps(common->vspace, LABEL());
11282 check_vspace(common);
11283 }
11284if (common->casefulcmp != NULL)
11285 {
11286 set_jumps(common->casefulcmp, LABEL());
11287 do_casefulcmp(common);
11288 }
11289if (common->caselesscmp != NULL)
11290 {
11291 set_jumps(common->caselesscmp, LABEL());
11292 do_caselesscmp(common);
11293 }
11294if (common->reset_match != NULL)
11295 {
11296 set_jumps(common->reset_match, LABEL());
11297 do_reset_match(common, (re->top_bracket + 1) * 2);
11298 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
11299 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
11300 JUMPTO(SLJIT_JUMP, reset_match_label);
11301 }
11302#ifdef SUPPORT_UTF
11303#ifdef COMPILE_PCRE8
11304if (common->utfreadchar != NULL)
11305 {
11306 set_jumps(common->utfreadchar, LABEL());
11307 do_utfreadchar(common);
11308 }
11309if (common->utfreadchar16 != NULL)
11310 {
11311 set_jumps(common->utfreadchar16, LABEL());
11312 do_utfreadchar16(common);
11313 }
11314if (common->utfreadtype8 != NULL)
11315 {
11316 set_jumps(common->utfreadtype8, LABEL());
11317 do_utfreadtype8(common);
11318 }
11319#endif /* COMPILE_PCRE8 */
11320#endif /* SUPPORT_UTF */
11321#ifdef SUPPORT_UCP
11322if (common->getucd != NULL)
11323 {
11324 set_jumps(common->getucd, LABEL());
11325 do_getucd(common);
11326 }
11327#endif
11328
11329SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11330SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11331
11332executable_func = sljit_generate_code(compiler);
11333executable_size = sljit_get_generated_code_size(compiler);
11334label_addr = common->label_addrs;
11335while (label_addr != NULL)
11336 {
11337 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
11338 label_addr = label_addr->next;
11339 }
11340sljit_free_compiler(compiler);
11341if (executable_func == NULL)
11342 {
11343 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11344 return;
11345 }
11346
11347/* Reuse the function descriptor if possible. */
11348if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
11349 functions = (executable_functions *)extra->executable_jit;
11350else
11351 {
11352 /* Note: If your memory-checker has flagged the allocation below as a
11353 * memory leak, it is probably because you either forgot to call
11354 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
11355 * pcre16_extra) object, or you called said function after having
11356 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
11357 * of the object. (The function will only free the JIT data if the
11358 * bit remains set, as the bit indicates that the pointer to the data
11359 * is valid.)
11360 */
11361 functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
11362 if (functions == NULL)
11363 {
11364 /* This case is highly unlikely since we just recently
11365 freed a lot of memory. Not impossible though. */
11366 sljit_free_code(executable_func);
11367 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11368 return;
11369 }
11370 memset(functions, 0, sizeof(executable_functions));
11371 functions->top_bracket = (re->top_bracket + 1) * 2;
11372 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
11373 extra->executable_jit = functions;
11374 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
11375 }
11376
11377functions->executable_funcs[mode] = executable_func;
11378functions->read_only_data_heads[mode] = common->read_only_data_head;
11379functions->executable_sizes[mode] = executable_size;
11380}
11381
11382static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
11383{
11384union {
11385 void *executable_func;
11386 jit_function call_executable_func;
11387} convert_executable_func;
11388sljit_u8 local_space[MACHINE_STACK_SIZE];
11389struct sljit_stack local_stack;
11390
11391local_stack.top = (sljit_sw)&local_space;
11392local_stack.base = local_stack.top;
11393local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
11394local_stack.max_limit = local_stack.limit;
11395arguments->stack = &local_stack;
11396convert_executable_func.executable_func = executable_func;
11397return convert_executable_func.call_executable_func(arguments);
11398}
11399
11400int
11401PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
11402 int length, int start_offset, int options, int *offsets, int offset_count)
11403{
11404executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11405union {
11406 void *executable_func;
11407 jit_function call_executable_func;
11408} convert_executable_func;
11409jit_arguments arguments;
11410int max_offset_count;
11411int retval;
11412int mode = JIT_COMPILE;
11413
11414if ((options & PCRE_PARTIAL_HARD) != 0)
11415 mode = JIT_PARTIAL_HARD_COMPILE;
11416else if ((options & PCRE_PARTIAL_SOFT) != 0)
11417 mode = JIT_PARTIAL_SOFT_COMPILE;
11418
11419if (functions->executable_funcs[mode] == NULL)
11420 return PCRE_ERROR_JIT_BADOPTION;
11421
11422/* Sanity checks should be handled by pcre_exec. */
11423arguments.str = subject + start_offset;
11424arguments.begin = subject;
11425arguments.end = subject + length;
11426arguments.mark_ptr = NULL;
11427/* JIT decreases this value less frequently than the interpreter. */
11428arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11429if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11430 arguments.limit_match = functions->limit_match;
11431arguments.notbol = (options & PCRE_NOTBOL) != 0;
11432arguments.noteol = (options & PCRE_NOTEOL) != 0;
11433arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11434arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11435arguments.offsets = offsets;
11436arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11437arguments.real_offset_count = offset_count;
11438
11439/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11440the output vector for storing captured strings, with the remainder used as
11441workspace. We don't need the workspace here. For compatibility, we limit the
11442number of captured strings in the same way as pcre_exec(), so that the user
11443gets the same result with and without JIT. */
11444
11445if (offset_count != 2)
11446 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11447max_offset_count = functions->top_bracket;
11448if (offset_count > max_offset_count)
11449 offset_count = max_offset_count;
11450arguments.offset_count = offset_count;
11451
11452if (functions->callback)
11453 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
11454else
11455 arguments.stack = (struct sljit_stack *)functions->userdata;
11456
11457if (arguments.stack == NULL)
11458 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
11459else
11460 {
11461 convert_executable_func.executable_func = functions->executable_funcs[mode];
11462 retval = convert_executable_func.call_executable_func(&arguments);
11463 }
11464
11465if (retval * 2 > offset_count)
11466 retval = 0;
11467if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11468 *(extra_data->mark) = arguments.mark_ptr;
11469
11470return retval;
11471}
11472
11473#if defined COMPILE_PCRE8
11474PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11475pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
11476 PCRE_SPTR subject, int length, int start_offset, int options,
11477 int *offsets, int offset_count, pcre_jit_stack *stack)
11478#elif defined COMPILE_PCRE16
11479PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11480pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
11481 PCRE_SPTR16 subject, int length, int start_offset, int options,
11482 int *offsets, int offset_count, pcre16_jit_stack *stack)
11483#elif defined COMPILE_PCRE32
11484PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11485pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
11486 PCRE_SPTR32 subject, int length, int start_offset, int options,
11487 int *offsets, int offset_count, pcre32_jit_stack *stack)
11488#endif
11489{
11490pcre_uchar *subject_ptr = (pcre_uchar *)subject;
11491executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11492union {
11493 void *executable_func;
11494 jit_function call_executable_func;
11495} convert_executable_func;
11496jit_arguments arguments;
11497int max_offset_count;
11498int retval;
11499int mode = JIT_COMPILE;
11500
11501SLJIT_UNUSED_ARG(argument_re);
11502
11503/* Plausibility checks */
11504if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
11505
11506if ((options & PCRE_PARTIAL_HARD) != 0)
11507 mode = JIT_PARTIAL_HARD_COMPILE;
11508else if ((options & PCRE_PARTIAL_SOFT) != 0)
11509 mode = JIT_PARTIAL_SOFT_COMPILE;
11510
11511if (functions->executable_funcs[mode] == NULL)
11512 return PCRE_ERROR_JIT_BADOPTION;
11513
11514/* Sanity checks should be handled by pcre_exec. */
11515arguments.stack = (struct sljit_stack *)stack;
11516arguments.str = subject_ptr + start_offset;
11517arguments.begin = subject_ptr;
11518arguments.end = subject_ptr + length;
11519arguments.mark_ptr = NULL;
11520/* JIT decreases this value less frequently than the interpreter. */
11521arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11522if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11523 arguments.limit_match = functions->limit_match;
11524arguments.notbol = (options & PCRE_NOTBOL) != 0;
11525arguments.noteol = (options & PCRE_NOTEOL) != 0;
11526arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11527arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11528arguments.offsets = offsets;
11529arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11530arguments.real_offset_count = offset_count;
11531
11532/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11533the output vector for storing captured strings, with the remainder used as
11534workspace. We don't need the workspace here. For compatibility, we limit the
11535number of captured strings in the same way as pcre_exec(), so that the user
11536gets the same result with and without JIT. */
11537
11538if (offset_count != 2)
11539 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11540max_offset_count = functions->top_bracket;
11541if (offset_count > max_offset_count)
11542 offset_count = max_offset_count;
11543arguments.offset_count = offset_count;
11544
11545convert_executable_func.executable_func = functions->executable_funcs[mode];
11546retval = convert_executable_func.call_executable_func(&arguments);
11547
11548if (retval * 2 > offset_count)
11549 retval = 0;
11550if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11551 *(extra_data->mark) = arguments.mark_ptr;
11552
11553return retval;
11554}
11555
11556void
11557PRIV(jit_free)(void *executable_funcs)
11558{
11559int i;
11560executable_functions *functions = (executable_functions *)executable_funcs;
11561for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11562 {
11563 if (functions->executable_funcs[i] != NULL)
11564 sljit_free_code(functions->executable_funcs[i]);
11565 free_read_only_data(functions->read_only_data_heads[i], NULL);
11566 }
11567SLJIT_FREE(functions, compiler->allocator_data);
11568}
11569
11570int
11571PRIV(jit_get_size)(void *executable_funcs)
11572{
11573int i;
11574sljit_uw size = 0;
11575sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
11576for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11577 size += executable_sizes[i];
11578return (int)size;
11579}
11580
11581const char*
11582PRIV(jit_get_target)(void)
11583{
11584return sljit_get_platform_name();
11585}
11586
11587#if defined COMPILE_PCRE8
11588PCRE_EXP_DECL pcre_jit_stack *
11589pcre_jit_stack_alloc(int startsize, int maxsize)
11590#elif defined COMPILE_PCRE16
11591PCRE_EXP_DECL pcre16_jit_stack *
11592pcre16_jit_stack_alloc(int startsize, int maxsize)
11593#elif defined COMPILE_PCRE32
11594PCRE_EXP_DECL pcre32_jit_stack *
11595pcre32_jit_stack_alloc(int startsize, int maxsize)
11596#endif
11597{
11598if (startsize < 1 || maxsize < 1)
11599 return NULL;
11600if (startsize > maxsize)
11601 startsize = maxsize;
11602startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11603maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11604return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
11605}
11606
11607#if defined COMPILE_PCRE8
11608PCRE_EXP_DECL void
11609pcre_jit_stack_free(pcre_jit_stack *stack)
11610#elif defined COMPILE_PCRE16
11611PCRE_EXP_DECL void
11612pcre16_jit_stack_free(pcre16_jit_stack *stack)
11613#elif defined COMPILE_PCRE32
11614PCRE_EXP_DECL void
11615pcre32_jit_stack_free(pcre32_jit_stack *stack)
11616#endif
11617{
11618sljit_free_stack((struct sljit_stack *)stack, NULL);
11619}
11620
11621#if defined COMPILE_PCRE8
11622PCRE_EXP_DECL void
11623pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11624#elif defined COMPILE_PCRE16
11625PCRE_EXP_DECL void
11626pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11627#elif defined COMPILE_PCRE32
11628PCRE_EXP_DECL void
11629pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11630#endif
11631{
11632executable_functions *functions;
11633if (extra != NULL &&
11634 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
11635 extra->executable_jit != NULL)
11636 {
11637 functions = (executable_functions *)extra->executable_jit;
11638 functions->callback = callback;
11639 functions->userdata = userdata;
11640 }
11641}
11642
11643#if defined COMPILE_PCRE8
11644PCRE_EXP_DECL void
11645pcre_jit_free_unused_memory(void)
11646#elif defined COMPILE_PCRE16
11647PCRE_EXP_DECL void
11648pcre16_jit_free_unused_memory(void)
11649#elif defined COMPILE_PCRE32
11650PCRE_EXP_DECL void
11651pcre32_jit_free_unused_memory(void)
11652#endif
11653{
11654sljit_free_unused_memory_exec();
11655}
11656
11657#else /* SUPPORT_JIT */
11658
11659/* These are dummy functions to avoid linking errors when JIT support is not
11660being compiled. */
11661
11662#if defined COMPILE_PCRE8
11663PCRE_EXP_DECL pcre_jit_stack *
11664pcre_jit_stack_alloc(int startsize, int maxsize)
11665#elif defined COMPILE_PCRE16
11666PCRE_EXP_DECL pcre16_jit_stack *
11667pcre16_jit_stack_alloc(int startsize, int maxsize)
11668#elif defined COMPILE_PCRE32
11669PCRE_EXP_DECL pcre32_jit_stack *
11670pcre32_jit_stack_alloc(int startsize, int maxsize)
11671#endif
11672{
11673(void)startsize;
11674(void)maxsize;
11675return NULL;
11676}
11677
11678#if defined COMPILE_PCRE8
11679PCRE_EXP_DECL void
11680pcre_jit_stack_free(pcre_jit_stack *stack)
11681#elif defined COMPILE_PCRE16
11682PCRE_EXP_DECL void
11683pcre16_jit_stack_free(pcre16_jit_stack *stack)
11684#elif defined COMPILE_PCRE32
11685PCRE_EXP_DECL void
11686pcre32_jit_stack_free(pcre32_jit_stack *stack)
11687#endif
11688{
11689(void)stack;
11690}
11691
11692#if defined COMPILE_PCRE8
11693PCRE_EXP_DECL void
11694pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11695#elif defined COMPILE_PCRE16
11696PCRE_EXP_DECL void
11697pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11698#elif defined COMPILE_PCRE32
11699PCRE_EXP_DECL void
11700pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11701#endif
11702{
11703(void)extra;
11704(void)callback;
11705(void)userdata;
11706}
11707
11708#if defined COMPILE_PCRE8
11709PCRE_EXP_DECL void
11710pcre_jit_free_unused_memory(void)
11711#elif defined COMPILE_PCRE16
11712PCRE_EXP_DECL void
11713pcre16_jit_free_unused_memory(void)
11714#elif defined COMPILE_PCRE32
11715PCRE_EXP_DECL void
11716pcre32_jit_free_unused_memory(void)
11717#endif
11718{
11719}
11720
11721#endif
11722
11723/* End of pcre_jit_compile.c */
11724