1/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14-----------------------------------------------------------------------------
15Redistribution and use in source and binary forms, with or without
16modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39POSSIBILITY OF SUCH DAMAGE.
40-----------------------------------------------------------------------------
41*/
42
43#include "pcre_config.h"
44
45
46#include "pcre_internal.h"
47
48#if defined SUPPORT_JIT
49
50/* All-in-one: Since we use the JIT compiler only from here,
51we just include it. This way we don't need to touch the build
52system files. */
53
54#define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
55#define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
56#define SLJIT_CONFIG_AUTO 1
57#define SLJIT_CONFIG_STATIC 1
58#define SLJIT_VERBOSE 0
59#define SLJIT_DEBUG 0
60
61#include "sljit/sljitLir.c"
62
63#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
64#error Unsupported architecture
65#endif
66
67/* Defines for debugging purposes. */
68
69/* 1 - Use unoptimized capturing brackets.
70 2 - Enable capture_last_ptr (includes option 1). */
71/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
72
73/* 1 - Always have a control head. */
74/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
75
76/* Allocate memory for the regex stack on the real machine stack.
77Fast, but limited size. */
78#define MACHINE_STACK_SIZE 32768
79
80/* Growth rate for stack allocated by the OS. Should be the multiply
81of page size. */
82#define STACK_GROWTH_RATE 8192
83
84/* Enable to check that the allocation could destroy temporaries. */
85#if defined SLJIT_DEBUG && SLJIT_DEBUG
86#define DESTROY_REGISTERS 1
87#endif
88
89/*
90Short summary about the backtracking mechanism empolyed by the jit code generator:
91
92The code generator follows the recursive nature of the PERL compatible regular
93expressions. The basic blocks of regular expressions are condition checkers
94whose execute different commands depending on the result of the condition check.
95The relationship between the operators can be horizontal (concatenation) and
96vertical (sub-expression) (See struct backtrack_common for more details).
97
98 'ab' - 'a' and 'b' regexps are concatenated
99 'a+' - 'a' is the sub-expression of the '+' operator
100
101The condition checkers are boolean (true/false) checkers. Machine code is generated
102for the checker itself and for the actions depending on the result of the checker.
103The 'true' case is called as the matching path (expected path), and the other is called as
104the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
105branches on the matching path.
106
107 Greedy star operator (*) :
108 Matching path: match happens.
109 Backtrack path: match failed.
110 Non-greedy star operator (*?) :
111 Matching path: no need to perform a match.
112 Backtrack path: match is required.
113
114The following example shows how the code generated for a capturing bracket
115with two alternatives. Let A, B, C, D are arbirary regular expressions, and
116we have the following regular expression:
117
118 A(B|C)D
119
120The generated code will be the following:
121
122 A matching path
123 '(' matching path (pushing arguments to the stack)
124 B matching path
125 ')' matching path (pushing arguments to the stack)
126 D matching path
127 return with successful match
128
129 D backtrack path
130 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
131 B backtrack path
132 C expected path
133 jump to D matching path
134 C backtrack path
135 A backtrack path
136
137 Notice, that the order of backtrack code paths are the opposite of the fast
138 code paths. In this way the topmost value on the stack is always belong
139 to the current backtrack code path. The backtrack path must check
140 whether there is a next alternative. If so, it needs to jump back to
141 the matching path eventually. Otherwise it needs to clear out its own stack
142 frame and continue the execution on the backtrack code paths.
143*/
144
145/*
146Saved stack frames:
147
148Atomic blocks and asserts require reloading the values of private data
149when the backtrack mechanism performed. Because of OP_RECURSE, the data
150are not necessarly known in compile time, thus we need a dynamic restore
151mechanism.
152
153The stack frames are stored in a chain list, and have the following format:
154([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
155
156Thus we can restore the private data to a particular point in the stack.
157*/
158
159typedef struct jit_arguments {
160 /* Pointers first. */
161 struct sljit_stack *stack;
162 const pcre_uchar *str;
163 const pcre_uchar *begin;
164 const pcre_uchar *end;
165 int *offsets;
166 pcre_uchar *mark_ptr;
167 void *callout_data;
168 /* Everything else after. */
169 sljit_u32 limit_match;
170 int real_offset_count;
171 int offset_count;
172 sljit_u8 notbol;
173 sljit_u8 noteol;
174 sljit_u8 notempty;
175 sljit_u8 notempty_atstart;
176} jit_arguments;
177
178typedef struct executable_functions {
179 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
180 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
181 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 sljit_u32 top_bracket;
185 sljit_u32 limit_match;
186} executable_functions;
187
188typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191} jump_list;
192
193typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197} stub_list;
198
199typedef struct label_addr_list {
200 struct sljit_label *label;
201 sljit_uw *update_addr;
202 struct label_addr_list *next;
203} label_addr_list;
204
205enum frame_types {
206 no_frame = -1,
207 no_stack = -2
208};
209
210enum control_types {
211 type_mark = 0,
212 type_then_trap = 1
213};
214
215typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
216
217/* The following structure is the key data type for the recursive
218code generator. It is allocated by compile_matchingpath, and contains
219the arguments for compile_backtrackingpath. Must be the first member
220of its descendants. */
221typedef struct backtrack_common {
222 /* Concatenation stack. */
223 struct backtrack_common *prev;
224 jump_list *nextbacktracks;
225 /* Internal stack (for component operators). */
226 struct backtrack_common *top;
227 jump_list *topbacktracks;
228 /* Opcode pointer. */
229 pcre_uchar *cc;
230} backtrack_common;
231
232typedef struct assert_backtrack {
233 backtrack_common common;
234 jump_list *condfailed;
235 /* Less than 0 if a frame is not needed. */
236 int framesize;
237 /* Points to our private memory word on the stack. */
238 int private_data_ptr;
239 /* For iterators. */
240 struct sljit_label *matchingpath;
241} assert_backtrack;
242
243typedef struct bracket_backtrack {
244 backtrack_common common;
245 /* Where to coninue if an alternative is successfully matched. */
246 struct sljit_label *alternative_matchingpath;
247 /* For rmin and rmax iterators. */
248 struct sljit_label *recursive_matchingpath;
249 /* For greedy ? operator. */
250 struct sljit_label *zero_matchingpath;
251 /* Contains the branches of a failed condition. */
252 union {
253 /* Both for OP_COND, OP_SCOND. */
254 jump_list *condfailed;
255 assert_backtrack *assert;
256 /* For OP_ONCE. Less than 0 if not needed. */
257 int framesize;
258 } u;
259 /* Points to our private memory word on the stack. */
260 int private_data_ptr;
261} bracket_backtrack;
262
263typedef struct bracketpos_backtrack {
264 backtrack_common common;
265 /* Points to our private memory word on the stack. */
266 int private_data_ptr;
267 /* Reverting stack is needed. */
268 int framesize;
269 /* Allocated stack size. */
270 int stacksize;
271} bracketpos_backtrack;
272
273typedef struct braminzero_backtrack {
274 backtrack_common common;
275 struct sljit_label *matchingpath;
276} braminzero_backtrack;
277
278typedef struct char_iterator_backtrack {
279 backtrack_common common;
280 /* Next iteration. */
281 struct sljit_label *matchingpath;
282 union {
283 jump_list *backtracks;
284 struct {
285 unsigned int othercasebit;
286 pcre_uchar chr;
287 BOOL enabled;
288 } charpos;
289 } u;
290} char_iterator_backtrack;
291
292typedef struct ref_iterator_backtrack {
293 backtrack_common common;
294 /* Next iteration. */
295 struct sljit_label *matchingpath;
296} ref_iterator_backtrack;
297
298typedef struct recurse_entry {
299 struct recurse_entry *next;
300 /* Contains the function entry. */
301 struct sljit_label *entry;
302 /* Collects the calls until the function is not created. */
303 jump_list *calls;
304 /* Points to the starting opcode. */
305 sljit_sw start;
306} recurse_entry;
307
308typedef struct recurse_backtrack {
309 backtrack_common common;
310 BOOL inlined_pattern;
311} recurse_backtrack;
312
313#define OP_THEN_TRAP OP_TABLE_LENGTH
314
315typedef struct then_trap_backtrack {
316 backtrack_common common;
317 /* If then_trap is not NULL, this structure contains the real
318 then_trap for the backtracking path. */
319 struct then_trap_backtrack *then_trap;
320 /* Points to the starting opcode. */
321 sljit_sw start;
322 /* Exit point for the then opcodes of this alternative. */
323 jump_list *quit;
324 /* Frame size of the current alternative. */
325 int framesize;
326} then_trap_backtrack;
327
328#define MAX_RANGE_SIZE 4
329
330typedef struct compiler_common {
331 /* The sljit ceneric compiler. */
332 struct sljit_compiler *compiler;
333 /* First byte code. */
334 pcre_uchar *start;
335 /* Maps private data offset to each opcode. */
336 sljit_s32 *private_data_ptrs;
337 /* Chain list of read-only data ptrs. */
338 void *read_only_data_head;
339 /* Tells whether the capturing bracket is optimized. */
340 sljit_u8 *optimized_cbracket;
341 /* Tells whether the starting offset is a target of then. */
342 sljit_u8 *then_offsets;
343 /* Current position where a THEN must jump. */
344 then_trap_backtrack *then_trap;
345 /* Starting offset of private data for capturing brackets. */
346 sljit_s32 cbra_ptr;
347 /* Output vector starting point. Must be divisible by 2. */
348 sljit_s32 ovector_start;
349 /* Points to the starting character of the current match. */
350 sljit_s32 start_ptr;
351 /* Last known position of the requested byte. */
352 sljit_s32 req_char_ptr;
353 /* Head of the last recursion. */
354 sljit_s32 recursive_head_ptr;
355 /* First inspected character for partial matching.
356 (Needed for avoiding zero length partial matches.) */
357 sljit_s32 start_used_ptr;
358 /* Starting pointer for partial soft matches. */
359 sljit_s32 hit_start;
360 /* Pointer of the match end position. */
361 sljit_s32 match_end_ptr;
362 /* Points to the marked string. */
363 sljit_s32 mark_ptr;
364 /* Recursive control verb management chain. */
365 sljit_s32 control_head_ptr;
366 /* Points to the last matched capture block index. */
367 sljit_s32 capture_last_ptr;
368 /* Fast forward skipping byte code pointer. */
369 pcre_uchar *fast_forward_bc_ptr;
370 /* Locals used by fast fail optimization. */
371 sljit_s32 fast_fail_start_ptr;
372 sljit_s32 fast_fail_end_ptr;
373
374 /* Flipped and lower case tables. */
375 const sljit_u8 *fcc;
376 sljit_sw lcc;
377 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
378 int mode;
379 /* TRUE, when minlength is greater than 0. */
380 BOOL might_be_empty;
381 /* \K is found in the pattern. */
382 BOOL has_set_som;
383 /* (*SKIP:arg) is found in the pattern. */
384 BOOL has_skip_arg;
385 /* (*THEN) is found in the pattern. */
386 BOOL has_then;
387 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
388 BOOL has_skip_in_assert_back;
389 /* Currently in recurse or negative assert. */
390 BOOL local_exit;
391 /* Currently in a positive assert. */
392 BOOL positive_assert;
393 /* Newline control. */
394 int nltype;
395 sljit_u32 nlmax;
396 sljit_u32 nlmin;
397 int newline;
398 int bsr_nltype;
399 sljit_u32 bsr_nlmax;
400 sljit_u32 bsr_nlmin;
401 /* Dollar endonly. */
402 int endonly;
403 /* Tables. */
404 sljit_sw ctypes;
405 /* Named capturing brackets. */
406 pcre_uchar *name_table;
407 sljit_sw name_count;
408 sljit_sw name_entry_size;
409
410 /* Labels and jump lists. */
411 struct sljit_label *partialmatchlabel;
412 struct sljit_label *quit_label;
413 struct sljit_label *forced_quit_label;
414 struct sljit_label *accept_label;
415 struct sljit_label *ff_newline_shortcut;
416 stub_list *stubs;
417 label_addr_list *label_addrs;
418 recurse_entry *entries;
419 recurse_entry *currententry;
420 jump_list *partialmatch;
421 jump_list *quit;
422 jump_list *positive_assert_quit;
423 jump_list *forced_quit;
424 jump_list *accept;
425 jump_list *calllimit;
426 jump_list *stackalloc;
427 jump_list *revertframes;
428 jump_list *wordboundary;
429 jump_list *anynewline;
430 jump_list *hspace;
431 jump_list *vspace;
432 jump_list *casefulcmp;
433 jump_list *caselesscmp;
434 jump_list *reset_match;
435 BOOL jscript_compat;
436#ifdef SUPPORT_UTF
437 BOOL utf;
438#ifdef SUPPORT_UCP
439 BOOL use_ucp;
440 jump_list *getucd;
441#endif
442#ifdef COMPILE_PCRE8
443 jump_list *utfreadchar;
444 jump_list *utfreadchar16;
445 jump_list *utfreadtype8;
446#endif
447#endif /* SUPPORT_UTF */
448} compiler_common;
449
450/* For byte_sequence_compare. */
451
452typedef struct compare_context {
453 int length;
454 int sourcereg;
455#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
456 int ucharptr;
457 union {
458 sljit_s32 asint;
459 sljit_u16 asushort;
460#if defined COMPILE_PCRE8
461 sljit_u8 asbyte;
462 sljit_u8 asuchars[4];
463#elif defined COMPILE_PCRE16
464 sljit_u16 asuchars[2];
465#elif defined COMPILE_PCRE32
466 sljit_u32 asuchars[1];
467#endif
468 } c;
469 union {
470 sljit_s32 asint;
471 sljit_u16 asushort;
472#if defined COMPILE_PCRE8
473 sljit_u8 asbyte;
474 sljit_u8 asuchars[4];
475#elif defined COMPILE_PCRE16
476 sljit_u16 asuchars[2];
477#elif defined COMPILE_PCRE32
478 sljit_u32 asuchars[1];
479#endif
480 } oc;
481#endif
482} compare_context;
483
484/* Undefine sljit macros. */
485#undef CMP
486
487/* Used for accessing the elements of the stack. */
488#define STACK(i) ((i) * (int)sizeof(sljit_sw))
489
490#ifdef SLJIT_PREF_SHIFT_REG
491#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
492/* Nothing. */
493#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
494#define SHIFT_REG_IS_R3
495#else
496#error "Unsupported shift register"
497#endif
498#endif
499
500#define TMP1 SLJIT_R0
501#ifdef SHIFT_REG_IS_R3
502#define TMP2 SLJIT_R3
503#define TMP3 SLJIT_R2
504#else
505#define TMP2 SLJIT_R2
506#define TMP3 SLJIT_R3
507#endif
508#define STR_PTR SLJIT_S0
509#define STR_END SLJIT_S1
510#define STACK_TOP SLJIT_R1
511#define STACK_LIMIT SLJIT_S2
512#define COUNT_MATCH SLJIT_S3
513#define ARGUMENTS SLJIT_S4
514#define RETURN_ADDR SLJIT_R4
515
516/* Local space layout. */
517/* These two locals can be used by the current opcode. */
518#define LOCALS0 (0 * sizeof(sljit_sw))
519#define LOCALS1 (1 * sizeof(sljit_sw))
520/* Two local variables for possessive quantifiers (char1 cannot use them). */
521#define POSSESSIVE0 (2 * sizeof(sljit_sw))
522#define POSSESSIVE1 (3 * sizeof(sljit_sw))
523/* Max limit of recursions. */
524#define LIMIT_MATCH (4 * sizeof(sljit_sw))
525/* The output vector is stored on the stack, and contains pointers
526to characters. The vector data is divided into two groups: the first
527group contains the start / end character pointers, and the second is
528the start pointers when the end of the capturing group has not yet reached. */
529#define OVECTOR_START (common->ovector_start)
530#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
531#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
532#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
533
534#if defined COMPILE_PCRE8
535#define MOV_UCHAR SLJIT_MOV_U8
536#elif defined COMPILE_PCRE16
537#define MOV_UCHAR SLJIT_MOV_U16
538#elif defined COMPILE_PCRE32
539#define MOV_UCHAR SLJIT_MOV_U32
540#else
541#error Unsupported compiling mode
542#endif
543
544/* Shortcuts. */
545#define DEFINE_COMPILER \
546 struct sljit_compiler *compiler = common->compiler
547#define OP1(op, dst, dstw, src, srcw) \
548 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
549#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
550 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
551#define LABEL() \
552 sljit_emit_label(compiler)
553#define JUMP(type) \
554 sljit_emit_jump(compiler, (type))
555#define JUMPTO(type, label) \
556 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
557#define JUMPHERE(jump) \
558 sljit_set_label((jump), sljit_emit_label(compiler))
559#define SET_LABEL(jump, label) \
560 sljit_set_label((jump), (label))
561#define CMP(type, src1, src1w, src2, src2w) \
562 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
563#define CMPTO(type, src1, src1w, src2, src2w, label) \
564 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
565#define OP_FLAGS(op, dst, dstw, type) \
566 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
567#define GET_LOCAL_BASE(dst, dstw, offset) \
568 sljit_get_local_base(compiler, (dst), (dstw), (offset))
569
570#define READ_CHAR_MAX 0x7fffffff
571
572#define INVALID_UTF_CHAR 888
573
574static pcre_uchar *bracketend(pcre_uchar *cc)
575{
576SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
577do cc += GET(cc, 1); while (*cc == OP_ALT);
578SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
579cc += 1 + LINK_SIZE;
580return cc;
581}
582
583static int no_alternatives(pcre_uchar *cc)
584{
585int count = 0;
586SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
587do
588 {
589 cc += GET(cc, 1);
590 count++;
591 }
592while (*cc == OP_ALT);
593SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
594return count;
595}
596
597/* Functions whose might need modification for all new supported opcodes:
598 next_opcode
599 check_opcode_types
600 set_private_data_ptrs
601 get_framesize
602 init_frame
603 get_private_data_copy_length
604 copy_private_data
605 compile_matchingpath
606 compile_backtrackingpath
607*/
608
609static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
610{
611SLJIT_UNUSED_ARG(common);
612switch(*cc)
613 {
614 case OP_SOD:
615 case OP_SOM:
616 case OP_SET_SOM:
617 case OP_NOT_WORD_BOUNDARY:
618 case OP_WORD_BOUNDARY:
619 case OP_NOT_DIGIT:
620 case OP_DIGIT:
621 case OP_NOT_WHITESPACE:
622 case OP_WHITESPACE:
623 case OP_NOT_WORDCHAR:
624 case OP_WORDCHAR:
625 case OP_ANY:
626 case OP_ALLANY:
627 case OP_NOTPROP:
628 case OP_PROP:
629 case OP_ANYNL:
630 case OP_NOT_HSPACE:
631 case OP_HSPACE:
632 case OP_NOT_VSPACE:
633 case OP_VSPACE:
634 case OP_EXTUNI:
635 case OP_EODN:
636 case OP_EOD:
637 case OP_CIRC:
638 case OP_CIRCM:
639 case OP_DOLL:
640 case OP_DOLLM:
641 case OP_CRSTAR:
642 case OP_CRMINSTAR:
643 case OP_CRPLUS:
644 case OP_CRMINPLUS:
645 case OP_CRQUERY:
646 case OP_CRMINQUERY:
647 case OP_CRRANGE:
648 case OP_CRMINRANGE:
649 case OP_CRPOSSTAR:
650 case OP_CRPOSPLUS:
651 case OP_CRPOSQUERY:
652 case OP_CRPOSRANGE:
653 case OP_CLASS:
654 case OP_NCLASS:
655 case OP_REF:
656 case OP_REFI:
657 case OP_DNREF:
658 case OP_DNREFI:
659 case OP_RECURSE:
660 case OP_CALLOUT:
661 case OP_ALT:
662 case OP_KET:
663 case OP_KETRMAX:
664 case OP_KETRMIN:
665 case OP_KETRPOS:
666 case OP_REVERSE:
667 case OP_ASSERT:
668 case OP_ASSERT_NOT:
669 case OP_ASSERTBACK:
670 case OP_ASSERTBACK_NOT:
671 case OP_ONCE:
672 case OP_ONCE_NC:
673 case OP_BRA:
674 case OP_BRAPOS:
675 case OP_CBRA:
676 case OP_CBRAPOS:
677 case OP_COND:
678 case OP_SBRA:
679 case OP_SBRAPOS:
680 case OP_SCBRA:
681 case OP_SCBRAPOS:
682 case OP_SCOND:
683 case OP_CREF:
684 case OP_DNCREF:
685 case OP_RREF:
686 case OP_DNRREF:
687 case OP_DEF:
688 case OP_BRAZERO:
689 case OP_BRAMINZERO:
690 case OP_BRAPOSZERO:
691 case OP_PRUNE:
692 case OP_SKIP:
693 case OP_THEN:
694 case OP_COMMIT:
695 case OP_FAIL:
696 case OP_ACCEPT:
697 case OP_ASSERT_ACCEPT:
698 case OP_CLOSE:
699 case OP_SKIPZERO:
700 return cc + PRIV(OP_lengths)[*cc];
701
702 case OP_CHAR:
703 case OP_CHARI:
704 case OP_NOT:
705 case OP_NOTI:
706 case OP_STAR:
707 case OP_MINSTAR:
708 case OP_PLUS:
709 case OP_MINPLUS:
710 case OP_QUERY:
711 case OP_MINQUERY:
712 case OP_UPTO:
713 case OP_MINUPTO:
714 case OP_EXACT:
715 case OP_POSSTAR:
716 case OP_POSPLUS:
717 case OP_POSQUERY:
718 case OP_POSUPTO:
719 case OP_STARI:
720 case OP_MINSTARI:
721 case OP_PLUSI:
722 case OP_MINPLUSI:
723 case OP_QUERYI:
724 case OP_MINQUERYI:
725 case OP_UPTOI:
726 case OP_MINUPTOI:
727 case OP_EXACTI:
728 case OP_POSSTARI:
729 case OP_POSPLUSI:
730 case OP_POSQUERYI:
731 case OP_POSUPTOI:
732 case OP_NOTSTAR:
733 case OP_NOTMINSTAR:
734 case OP_NOTPLUS:
735 case OP_NOTMINPLUS:
736 case OP_NOTQUERY:
737 case OP_NOTMINQUERY:
738 case OP_NOTUPTO:
739 case OP_NOTMINUPTO:
740 case OP_NOTEXACT:
741 case OP_NOTPOSSTAR:
742 case OP_NOTPOSPLUS:
743 case OP_NOTPOSQUERY:
744 case OP_NOTPOSUPTO:
745 case OP_NOTSTARI:
746 case OP_NOTMINSTARI:
747 case OP_NOTPLUSI:
748 case OP_NOTMINPLUSI:
749 case OP_NOTQUERYI:
750 case OP_NOTMINQUERYI:
751 case OP_NOTUPTOI:
752 case OP_NOTMINUPTOI:
753 case OP_NOTEXACTI:
754 case OP_NOTPOSSTARI:
755 case OP_NOTPOSPLUSI:
756 case OP_NOTPOSQUERYI:
757 case OP_NOTPOSUPTOI:
758 cc += PRIV(OP_lengths)[*cc];
759#ifdef SUPPORT_UTF
760 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
761#endif
762 return cc;
763
764 /* Special cases. */
765 case OP_TYPESTAR:
766 case OP_TYPEMINSTAR:
767 case OP_TYPEPLUS:
768 case OP_TYPEMINPLUS:
769 case OP_TYPEQUERY:
770 case OP_TYPEMINQUERY:
771 case OP_TYPEUPTO:
772 case OP_TYPEMINUPTO:
773 case OP_TYPEEXACT:
774 case OP_TYPEPOSSTAR:
775 case OP_TYPEPOSPLUS:
776 case OP_TYPEPOSQUERY:
777 case OP_TYPEPOSUPTO:
778 return cc + PRIV(OP_lengths)[*cc] - 1;
779
780 case OP_ANYBYTE:
781#ifdef SUPPORT_UTF
782 if (common->utf) return NULL;
783#endif
784 return cc + 1;
785
786#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
787 case OP_XCLASS:
788 return cc + GET(cc, 1);
789#endif
790
791 case OP_MARK:
792 case OP_PRUNE_ARG:
793 case OP_SKIP_ARG:
794 case OP_THEN_ARG:
795 return cc + 1 + 2 + cc[1];
796
797 default:
798 /* All opcodes are supported now! */
799 SLJIT_UNREACHABLE();
800 return NULL;
801 }
802}
803
804static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
805{
806int count;
807pcre_uchar *slot;
808pcre_uchar *assert_back_end = cc - 1;
809
810/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
811while (cc < ccend)
812 {
813 switch(*cc)
814 {
815 case OP_SET_SOM:
816 common->has_set_som = TRUE;
817 common->might_be_empty = TRUE;
818 cc += 1;
819 break;
820
821 case OP_REF:
822 case OP_REFI:
823 common->optimized_cbracket[GET2(cc, 1)] = 0;
824 cc += 1 + IMM2_SIZE;
825 break;
826
827 case OP_CBRAPOS:
828 case OP_SCBRAPOS:
829 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
830 cc += 1 + LINK_SIZE + IMM2_SIZE;
831 break;
832
833 case OP_COND:
834 case OP_SCOND:
835 /* Only AUTO_CALLOUT can insert this opcode. We do
836 not intend to support this case. */
837 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
838 return FALSE;
839 cc += 1 + LINK_SIZE;
840 break;
841
842 case OP_CREF:
843 common->optimized_cbracket[GET2(cc, 1)] = 0;
844 cc += 1 + IMM2_SIZE;
845 break;
846
847 case OP_DNREF:
848 case OP_DNREFI:
849 case OP_DNCREF:
850 count = GET2(cc, 1 + IMM2_SIZE);
851 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
852 while (count-- > 0)
853 {
854 common->optimized_cbracket[GET2(slot, 0)] = 0;
855 slot += common->name_entry_size;
856 }
857 cc += 1 + 2 * IMM2_SIZE;
858 break;
859
860 case OP_RECURSE:
861 /* Set its value only once. */
862 if (common->recursive_head_ptr == 0)
863 {
864 common->recursive_head_ptr = common->ovector_start;
865 common->ovector_start += sizeof(sljit_sw);
866 }
867 cc += 1 + LINK_SIZE;
868 break;
869
870 case OP_CALLOUT:
871 if (common->capture_last_ptr == 0)
872 {
873 common->capture_last_ptr = common->ovector_start;
874 common->ovector_start += sizeof(sljit_sw);
875 }
876 cc += 2 + 2 * LINK_SIZE;
877 break;
878
879 case OP_ASSERTBACK:
880 slot = bracketend(cc);
881 if (slot > assert_back_end)
882 assert_back_end = slot;
883 cc += 1 + LINK_SIZE;
884 break;
885
886 case OP_THEN_ARG:
887 common->has_then = TRUE;
888 common->control_head_ptr = 1;
889 /* Fall through. */
890
891 case OP_PRUNE_ARG:
892 case OP_MARK:
893 if (common->mark_ptr == 0)
894 {
895 common->mark_ptr = common->ovector_start;
896 common->ovector_start += sizeof(sljit_sw);
897 }
898 cc += 1 + 2 + cc[1];
899 break;
900
901 case OP_THEN:
902 common->has_then = TRUE;
903 common->control_head_ptr = 1;
904 cc += 1;
905 break;
906
907 case OP_SKIP:
908 if (cc < assert_back_end)
909 common->has_skip_in_assert_back = TRUE;
910 cc += 1;
911 break;
912
913 case OP_SKIP_ARG:
914 common->control_head_ptr = 1;
915 common->has_skip_arg = TRUE;
916 if (cc < assert_back_end)
917 common->has_skip_in_assert_back = TRUE;
918 cc += 1 + 2 + cc[1];
919 break;
920
921 default:
922 cc = next_opcode(common, cc);
923 if (cc == NULL)
924 return FALSE;
925 break;
926 }
927 }
928return TRUE;
929}
930
931static BOOL is_accelerated_repeat(pcre_uchar *cc)
932{
933switch(*cc)
934 {
935 case OP_TYPESTAR:
936 case OP_TYPEMINSTAR:
937 case OP_TYPEPLUS:
938 case OP_TYPEMINPLUS:
939 case OP_TYPEPOSSTAR:
940 case OP_TYPEPOSPLUS:
941 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
942
943 case OP_STAR:
944 case OP_MINSTAR:
945 case OP_PLUS:
946 case OP_MINPLUS:
947 case OP_POSSTAR:
948 case OP_POSPLUS:
949
950 case OP_STARI:
951 case OP_MINSTARI:
952 case OP_PLUSI:
953 case OP_MINPLUSI:
954 case OP_POSSTARI:
955 case OP_POSPLUSI:
956
957 case OP_NOTSTAR:
958 case OP_NOTMINSTAR:
959 case OP_NOTPLUS:
960 case OP_NOTMINPLUS:
961 case OP_NOTPOSSTAR:
962 case OP_NOTPOSPLUS:
963
964 case OP_NOTSTARI:
965 case OP_NOTMINSTARI:
966 case OP_NOTPLUSI:
967 case OP_NOTMINPLUSI:
968 case OP_NOTPOSSTARI:
969 case OP_NOTPOSPLUSI:
970 return TRUE;
971
972 case OP_CLASS:
973 case OP_NCLASS:
974#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
975 case OP_XCLASS:
976 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
977#else
978 cc += (1 + (32 / sizeof(pcre_uchar)));
979#endif
980
981 switch(*cc)
982 {
983 case OP_CRSTAR:
984 case OP_CRMINSTAR:
985 case OP_CRPLUS:
986 case OP_CRMINPLUS:
987 case OP_CRPOSSTAR:
988 case OP_CRPOSPLUS:
989 return TRUE;
990 }
991 break;
992 }
993return FALSE;
994}
995
996static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
997{
998pcre_uchar *cc = common->start;
999pcre_uchar *end;
1000
1001/* Skip not repeated brackets. */
1002while (TRUE)
1003 {
1004 switch(*cc)
1005 {
1006 case OP_SOD:
1007 case OP_SOM:
1008 case OP_SET_SOM:
1009 case OP_NOT_WORD_BOUNDARY:
1010 case OP_WORD_BOUNDARY:
1011 case OP_EODN:
1012 case OP_EOD:
1013 case OP_CIRC:
1014 case OP_CIRCM:
1015 case OP_DOLL:
1016 case OP_DOLLM:
1017 /* Zero width assertions. */
1018 cc++;
1019 continue;
1020 }
1021
1022 if (*cc != OP_BRA && *cc != OP_CBRA)
1023 break;
1024
1025 end = cc + GET(cc, 1);
1026 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1027 return FALSE;
1028 if (*cc == OP_CBRA)
1029 {
1030 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1031 return FALSE;
1032 cc += IMM2_SIZE;
1033 }
1034 cc += 1 + LINK_SIZE;
1035 }
1036
1037if (is_accelerated_repeat(cc))
1038 {
1039 common->fast_forward_bc_ptr = cc;
1040 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1041 *private_data_start += sizeof(sljit_sw);
1042 return TRUE;
1043 }
1044return FALSE;
1045}
1046
1047static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1048{
1049 pcre_uchar *next_alt;
1050
1051 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1052
1053 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1054 return;
1055
1056 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1057 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1058 return;
1059
1060 do
1061 {
1062 next_alt = cc + GET(cc, 1);
1063
1064 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1065
1066 while (TRUE)
1067 {
1068 switch(*cc)
1069 {
1070 case OP_SOD:
1071 case OP_SOM:
1072 case OP_SET_SOM:
1073 case OP_NOT_WORD_BOUNDARY:
1074 case OP_WORD_BOUNDARY:
1075 case OP_EODN:
1076 case OP_EOD:
1077 case OP_CIRC:
1078 case OP_CIRCM:
1079 case OP_DOLL:
1080 case OP_DOLLM:
1081 /* Zero width assertions. */
1082 cc++;
1083 continue;
1084 }
1085 break;
1086 }
1087
1088 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1089 detect_fast_fail(common, cc, private_data_start, depth - 1);
1090
1091 if (is_accelerated_repeat(cc))
1092 {
1093 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1094
1095 if (common->fast_fail_start_ptr == 0)
1096 common->fast_fail_start_ptr = *private_data_start;
1097
1098 *private_data_start += sizeof(sljit_sw);
1099 common->fast_fail_end_ptr = *private_data_start;
1100
1101 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1102 return;
1103 }
1104
1105 cc = next_alt;
1106 }
1107 while (*cc == OP_ALT);
1108}
1109
1110static int get_class_iterator_size(pcre_uchar *cc)
1111{
1112sljit_u32 min;
1113sljit_u32 max;
1114switch(*cc)
1115 {
1116 case OP_CRSTAR:
1117 case OP_CRPLUS:
1118 return 2;
1119
1120 case OP_CRMINSTAR:
1121 case OP_CRMINPLUS:
1122 case OP_CRQUERY:
1123 case OP_CRMINQUERY:
1124 return 1;
1125
1126 case OP_CRRANGE:
1127 case OP_CRMINRANGE:
1128 min = GET2(cc, 1);
1129 max = GET2(cc, 1 + IMM2_SIZE);
1130 if (max == 0)
1131 return (*cc == OP_CRRANGE) ? 2 : 1;
1132 max -= min;
1133 if (max > 2)
1134 max = 2;
1135 return max;
1136
1137 default:
1138 return 0;
1139 }
1140}
1141
1142static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1143{
1144pcre_uchar *end = bracketend(begin);
1145pcre_uchar *next;
1146pcre_uchar *next_end;
1147pcre_uchar *max_end;
1148pcre_uchar type;
1149sljit_sw length = end - begin;
1150int min, max, i;
1151
1152/* Detect fixed iterations first. */
1153if (end[-(1 + LINK_SIZE)] != OP_KET)
1154 return FALSE;
1155
1156/* Already detected repeat. */
1157if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1158 return TRUE;
1159
1160next = end;
1161min = 1;
1162while (1)
1163 {
1164 if (*next != *begin)
1165 break;
1166 next_end = bracketend(next);
1167 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1168 break;
1169 next = next_end;
1170 min++;
1171 }
1172
1173if (min == 2)
1174 return FALSE;
1175
1176max = 0;
1177max_end = next;
1178if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1179 {
1180 type = *next;
1181 while (1)
1182 {
1183 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1184 break;
1185 next_end = bracketend(next + 2 + LINK_SIZE);
1186 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1187 break;
1188 next = next_end;
1189 max++;
1190 }
1191
1192 if (next[0] == type && next[1] == *begin && max >= 1)
1193 {
1194 next_end = bracketend(next + 1);
1195 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1196 {
1197 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1198 if (*next_end != OP_KET)
1199 break;
1200
1201 if (i == max)
1202 {
1203 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1204 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1205 /* +2 the original and the last. */
1206 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1207 if (min == 1)
1208 return TRUE;
1209 min--;
1210 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1211 }
1212 }
1213 }
1214 }
1215
1216if (min >= 3)
1217 {
1218 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1219 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1220 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1221 return TRUE;
1222 }
1223
1224return FALSE;
1225}
1226
1227#define CASE_ITERATOR_PRIVATE_DATA_1 \
1228 case OP_MINSTAR: \
1229 case OP_MINPLUS: \
1230 case OP_QUERY: \
1231 case OP_MINQUERY: \
1232 case OP_MINSTARI: \
1233 case OP_MINPLUSI: \
1234 case OP_QUERYI: \
1235 case OP_MINQUERYI: \
1236 case OP_NOTMINSTAR: \
1237 case OP_NOTMINPLUS: \
1238 case OP_NOTQUERY: \
1239 case OP_NOTMINQUERY: \
1240 case OP_NOTMINSTARI: \
1241 case OP_NOTMINPLUSI: \
1242 case OP_NOTQUERYI: \
1243 case OP_NOTMINQUERYI:
1244
1245#define CASE_ITERATOR_PRIVATE_DATA_2A \
1246 case OP_STAR: \
1247 case OP_PLUS: \
1248 case OP_STARI: \
1249 case OP_PLUSI: \
1250 case OP_NOTSTAR: \
1251 case OP_NOTPLUS: \
1252 case OP_NOTSTARI: \
1253 case OP_NOTPLUSI:
1254
1255#define CASE_ITERATOR_PRIVATE_DATA_2B \
1256 case OP_UPTO: \
1257 case OP_MINUPTO: \
1258 case OP_UPTOI: \
1259 case OP_MINUPTOI: \
1260 case OP_NOTUPTO: \
1261 case OP_NOTMINUPTO: \
1262 case OP_NOTUPTOI: \
1263 case OP_NOTMINUPTOI:
1264
1265#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1266 case OP_TYPEMINSTAR: \
1267 case OP_TYPEMINPLUS: \
1268 case OP_TYPEQUERY: \
1269 case OP_TYPEMINQUERY:
1270
1271#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1272 case OP_TYPESTAR: \
1273 case OP_TYPEPLUS:
1274
1275#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1276 case OP_TYPEUPTO: \
1277 case OP_TYPEMINUPTO:
1278
1279static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1280{
1281pcre_uchar *cc = common->start;
1282pcre_uchar *alternative;
1283pcre_uchar *end = NULL;
1284int private_data_ptr = *private_data_start;
1285int space, size, bracketlen;
1286BOOL repeat_check = TRUE;
1287
1288while (cc < ccend)
1289 {
1290 space = 0;
1291 size = 0;
1292 bracketlen = 0;
1293 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1294 break;
1295
1296 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1297 {
1298 if (detect_repeat(common, cc))
1299 {
1300 /* These brackets are converted to repeats, so no global
1301 based single character repeat is allowed. */
1302 if (cc >= end)
1303 end = bracketend(cc);
1304 }
1305 }
1306 repeat_check = TRUE;
1307
1308 switch(*cc)
1309 {
1310 case OP_KET:
1311 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1312 {
1313 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1314 private_data_ptr += sizeof(sljit_sw);
1315 cc += common->private_data_ptrs[cc + 1 - common->start];
1316 }
1317 cc += 1 + LINK_SIZE;
1318 break;
1319
1320 case OP_ASSERT:
1321 case OP_ASSERT_NOT:
1322 case OP_ASSERTBACK:
1323 case OP_ASSERTBACK_NOT:
1324 case OP_ONCE:
1325 case OP_ONCE_NC:
1326 case OP_BRAPOS:
1327 case OP_SBRA:
1328 case OP_SBRAPOS:
1329 case OP_SCOND:
1330 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1331 private_data_ptr += sizeof(sljit_sw);
1332 bracketlen = 1 + LINK_SIZE;
1333 break;
1334
1335 case OP_CBRAPOS:
1336 case OP_SCBRAPOS:
1337 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1338 private_data_ptr += sizeof(sljit_sw);
1339 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1340 break;
1341
1342 case OP_COND:
1343 /* Might be a hidden SCOND. */
1344 alternative = cc + GET(cc, 1);
1345 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1346 {
1347 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1348 private_data_ptr += sizeof(sljit_sw);
1349 }
1350 bracketlen = 1 + LINK_SIZE;
1351 break;
1352
1353 case OP_BRA:
1354 bracketlen = 1 + LINK_SIZE;
1355 break;
1356
1357 case OP_CBRA:
1358 case OP_SCBRA:
1359 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1360 break;
1361
1362 case OP_BRAZERO:
1363 case OP_BRAMINZERO:
1364 case OP_BRAPOSZERO:
1365 repeat_check = FALSE;
1366 size = 1;
1367 break;
1368
1369 CASE_ITERATOR_PRIVATE_DATA_1
1370 space = 1;
1371 size = -2;
1372 break;
1373
1374 CASE_ITERATOR_PRIVATE_DATA_2A
1375 space = 2;
1376 size = -2;
1377 break;
1378
1379 CASE_ITERATOR_PRIVATE_DATA_2B
1380 space = 2;
1381 size = -(2 + IMM2_SIZE);
1382 break;
1383
1384 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1385 space = 1;
1386 size = 1;
1387 break;
1388
1389 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1390 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1391 space = 2;
1392 size = 1;
1393 break;
1394
1395 case OP_TYPEUPTO:
1396 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1397 space = 2;
1398 size = 1 + IMM2_SIZE;
1399 break;
1400
1401 case OP_TYPEMINUPTO:
1402 space = 2;
1403 size = 1 + IMM2_SIZE;
1404 break;
1405
1406 case OP_CLASS:
1407 case OP_NCLASS:
1408 space = get_class_iterator_size(cc + size);
1409 size = 1 + 32 / sizeof(pcre_uchar);
1410 break;
1411
1412#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1413 case OP_XCLASS:
1414 space = get_class_iterator_size(cc + size);
1415 size = GET(cc, 1);
1416 break;
1417#endif
1418
1419 default:
1420 cc = next_opcode(common, cc);
1421 SLJIT_ASSERT(cc != NULL);
1422 break;
1423 }
1424
1425 /* Character iterators, which are not inside a repeated bracket,
1426 gets a private slot instead of allocating it on the stack. */
1427 if (space > 0 && cc >= end)
1428 {
1429 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1430 private_data_ptr += sizeof(sljit_sw) * space;
1431 }
1432
1433 if (size != 0)
1434 {
1435 if (size < 0)
1436 {
1437 cc += -size;
1438#ifdef SUPPORT_UTF
1439 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1440#endif
1441 }
1442 else
1443 cc += size;
1444 }
1445
1446 if (bracketlen > 0)
1447 {
1448 if (cc >= end)
1449 {
1450 end = bracketend(cc);
1451 if (end[-1 - LINK_SIZE] == OP_KET)
1452 end = NULL;
1453 }
1454 cc += bracketlen;
1455 }
1456 }
1457*private_data_start = private_data_ptr;
1458}
1459
1460/* Returns with a frame_types (always < 0) if no need for frame. */
1461static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1462{
1463int length = 0;
1464int possessive = 0;
1465BOOL stack_restore = FALSE;
1466BOOL setsom_found = recursive;
1467BOOL setmark_found = recursive;
1468/* The last capture is a local variable even for recursions. */
1469BOOL capture_last_found = FALSE;
1470
1471#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1472SLJIT_ASSERT(common->control_head_ptr != 0);
1473*needs_control_head = TRUE;
1474#else
1475*needs_control_head = FALSE;
1476#endif
1477
1478if (ccend == NULL)
1479 {
1480 ccend = bracketend(cc) - (1 + LINK_SIZE);
1481 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1482 {
1483 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1484 /* This is correct regardless of common->capture_last_ptr. */
1485 capture_last_found = TRUE;
1486 }
1487 cc = next_opcode(common, cc);
1488 }
1489
1490SLJIT_ASSERT(cc != NULL);
1491while (cc < ccend)
1492 switch(*cc)
1493 {
1494 case OP_SET_SOM:
1495 SLJIT_ASSERT(common->has_set_som);
1496 stack_restore = TRUE;
1497 if (!setsom_found)
1498 {
1499 length += 2;
1500 setsom_found = TRUE;
1501 }
1502 cc += 1;
1503 break;
1504
1505 case OP_MARK:
1506 case OP_PRUNE_ARG:
1507 case OP_THEN_ARG:
1508 SLJIT_ASSERT(common->mark_ptr != 0);
1509 stack_restore = TRUE;
1510 if (!setmark_found)
1511 {
1512 length += 2;
1513 setmark_found = TRUE;
1514 }
1515 if (common->control_head_ptr != 0)
1516 *needs_control_head = TRUE;
1517 cc += 1 + 2 + cc[1];
1518 break;
1519
1520 case OP_RECURSE:
1521 stack_restore = TRUE;
1522 if (common->has_set_som && !setsom_found)
1523 {
1524 length += 2;
1525 setsom_found = TRUE;
1526 }
1527 if (common->mark_ptr != 0 && !setmark_found)
1528 {
1529 length += 2;
1530 setmark_found = TRUE;
1531 }
1532 if (common->capture_last_ptr != 0 && !capture_last_found)
1533 {
1534 length += 2;
1535 capture_last_found = TRUE;
1536 }
1537 cc += 1 + LINK_SIZE;
1538 break;
1539
1540 case OP_CBRA:
1541 case OP_CBRAPOS:
1542 case OP_SCBRA:
1543 case OP_SCBRAPOS:
1544 stack_restore = TRUE;
1545 if (common->capture_last_ptr != 0 && !capture_last_found)
1546 {
1547 length += 2;
1548 capture_last_found = TRUE;
1549 }
1550 length += 3;
1551 cc += 1 + LINK_SIZE + IMM2_SIZE;
1552 break;
1553
1554 case OP_THEN:
1555 stack_restore = TRUE;
1556 if (common->control_head_ptr != 0)
1557 *needs_control_head = TRUE;
1558 cc ++;
1559 break;
1560
1561 default:
1562 stack_restore = TRUE;
1563 /* Fall through. */
1564
1565 case OP_NOT_WORD_BOUNDARY:
1566 case OP_WORD_BOUNDARY:
1567 case OP_NOT_DIGIT:
1568 case OP_DIGIT:
1569 case OP_NOT_WHITESPACE:
1570 case OP_WHITESPACE:
1571 case OP_NOT_WORDCHAR:
1572 case OP_WORDCHAR:
1573 case OP_ANY:
1574 case OP_ALLANY:
1575 case OP_ANYBYTE:
1576 case OP_NOTPROP:
1577 case OP_PROP:
1578 case OP_ANYNL:
1579 case OP_NOT_HSPACE:
1580 case OP_HSPACE:
1581 case OP_NOT_VSPACE:
1582 case OP_VSPACE:
1583 case OP_EXTUNI:
1584 case OP_EODN:
1585 case OP_EOD:
1586 case OP_CIRC:
1587 case OP_CIRCM:
1588 case OP_DOLL:
1589 case OP_DOLLM:
1590 case OP_CHAR:
1591 case OP_CHARI:
1592 case OP_NOT:
1593 case OP_NOTI:
1594
1595 case OP_EXACT:
1596 case OP_POSSTAR:
1597 case OP_POSPLUS:
1598 case OP_POSQUERY:
1599 case OP_POSUPTO:
1600
1601 case OP_EXACTI:
1602 case OP_POSSTARI:
1603 case OP_POSPLUSI:
1604 case OP_POSQUERYI:
1605 case OP_POSUPTOI:
1606
1607 case OP_NOTEXACT:
1608 case OP_NOTPOSSTAR:
1609 case OP_NOTPOSPLUS:
1610 case OP_NOTPOSQUERY:
1611 case OP_NOTPOSUPTO:
1612
1613 case OP_NOTEXACTI:
1614 case OP_NOTPOSSTARI:
1615 case OP_NOTPOSPLUSI:
1616 case OP_NOTPOSQUERYI:
1617 case OP_NOTPOSUPTOI:
1618
1619 case OP_TYPEEXACT:
1620 case OP_TYPEPOSSTAR:
1621 case OP_TYPEPOSPLUS:
1622 case OP_TYPEPOSQUERY:
1623 case OP_TYPEPOSUPTO:
1624
1625 case OP_CLASS:
1626 case OP_NCLASS:
1627 case OP_XCLASS:
1628 case OP_CALLOUT:
1629
1630 cc = next_opcode(common, cc);
1631 SLJIT_ASSERT(cc != NULL);
1632 break;
1633 }
1634
1635/* Possessive quantifiers can use a special case. */
1636if (SLJIT_UNLIKELY(possessive == length))
1637 return stack_restore ? no_frame : no_stack;
1638
1639if (length > 0)
1640 return length + 1;
1641return stack_restore ? no_frame : no_stack;
1642}
1643
1644static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1645{
1646DEFINE_COMPILER;
1647BOOL setsom_found = recursive;
1648BOOL setmark_found = recursive;
1649/* The last capture is a local variable even for recursions. */
1650BOOL capture_last_found = FALSE;
1651int offset;
1652
1653/* >= 1 + shortest item size (2) */
1654SLJIT_UNUSED_ARG(stacktop);
1655SLJIT_ASSERT(stackpos >= stacktop + 2);
1656
1657stackpos = STACK(stackpos);
1658if (ccend == NULL)
1659 {
1660 ccend = bracketend(cc) - (1 + LINK_SIZE);
1661 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1662 cc = next_opcode(common, cc);
1663 }
1664
1665SLJIT_ASSERT(cc != NULL);
1666while (cc < ccend)
1667 switch(*cc)
1668 {
1669 case OP_SET_SOM:
1670 SLJIT_ASSERT(common->has_set_som);
1671 if (!setsom_found)
1672 {
1673 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1675 stackpos -= (int)sizeof(sljit_sw);
1676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1677 stackpos -= (int)sizeof(sljit_sw);
1678 setsom_found = TRUE;
1679 }
1680 cc += 1;
1681 break;
1682
1683 case OP_MARK:
1684 case OP_PRUNE_ARG:
1685 case OP_THEN_ARG:
1686 SLJIT_ASSERT(common->mark_ptr != 0);
1687 if (!setmark_found)
1688 {
1689 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1690 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1691 stackpos -= (int)sizeof(sljit_sw);
1692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1693 stackpos -= (int)sizeof(sljit_sw);
1694 setmark_found = TRUE;
1695 }
1696 cc += 1 + 2 + cc[1];
1697 break;
1698
1699 case OP_RECURSE:
1700 if (common->has_set_som && !setsom_found)
1701 {
1702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1703 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1704 stackpos -= (int)sizeof(sljit_sw);
1705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1706 stackpos -= (int)sizeof(sljit_sw);
1707 setsom_found = TRUE;
1708 }
1709 if (common->mark_ptr != 0 && !setmark_found)
1710 {
1711 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1712 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1713 stackpos -= (int)sizeof(sljit_sw);
1714 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1715 stackpos -= (int)sizeof(sljit_sw);
1716 setmark_found = TRUE;
1717 }
1718 if (common->capture_last_ptr != 0 && !capture_last_found)
1719 {
1720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1722 stackpos -= (int)sizeof(sljit_sw);
1723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1724 stackpos -= (int)sizeof(sljit_sw);
1725 capture_last_found = TRUE;
1726 }
1727 cc += 1 + LINK_SIZE;
1728 break;
1729
1730 case OP_CBRA:
1731 case OP_CBRAPOS:
1732 case OP_SCBRA:
1733 case OP_SCBRAPOS:
1734 if (common->capture_last_ptr != 0 && !capture_last_found)
1735 {
1736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1737 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1738 stackpos -= (int)sizeof(sljit_sw);
1739 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1740 stackpos -= (int)sizeof(sljit_sw);
1741 capture_last_found = TRUE;
1742 }
1743 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1744 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1745 stackpos -= (int)sizeof(sljit_sw);
1746 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1747 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1749 stackpos -= (int)sizeof(sljit_sw);
1750 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1751 stackpos -= (int)sizeof(sljit_sw);
1752
1753 cc += 1 + LINK_SIZE + IMM2_SIZE;
1754 break;
1755
1756 default:
1757 cc = next_opcode(common, cc);
1758 SLJIT_ASSERT(cc != NULL);
1759 break;
1760 }
1761
1762OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1763SLJIT_ASSERT(stackpos == STACK(stacktop));
1764}
1765
1766static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1767{
1768int private_data_length = needs_control_head ? 3 : 2;
1769int size;
1770pcre_uchar *alternative;
1771/* Calculate the sum of the private machine words. */
1772while (cc < ccend)
1773 {
1774 size = 0;
1775 switch(*cc)
1776 {
1777 case OP_KET:
1778 if (PRIVATE_DATA(cc) != 0)
1779 {
1780 private_data_length++;
1781 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1782 cc += PRIVATE_DATA(cc + 1);
1783 }
1784 cc += 1 + LINK_SIZE;
1785 break;
1786
1787 case OP_ASSERT:
1788 case OP_ASSERT_NOT:
1789 case OP_ASSERTBACK:
1790 case OP_ASSERTBACK_NOT:
1791 case OP_ONCE:
1792 case OP_ONCE_NC:
1793 case OP_BRAPOS:
1794 case OP_SBRA:
1795 case OP_SBRAPOS:
1796 case OP_SCOND:
1797 private_data_length++;
1798 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1799 cc += 1 + LINK_SIZE;
1800 break;
1801
1802 case OP_CBRA:
1803 case OP_SCBRA:
1804 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1805 private_data_length++;
1806 cc += 1 + LINK_SIZE + IMM2_SIZE;
1807 break;
1808
1809 case OP_CBRAPOS:
1810 case OP_SCBRAPOS:
1811 private_data_length += 2;
1812 cc += 1 + LINK_SIZE + IMM2_SIZE;
1813 break;
1814
1815 case OP_COND:
1816 /* Might be a hidden SCOND. */
1817 alternative = cc + GET(cc, 1);
1818 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1819 private_data_length++;
1820 cc += 1 + LINK_SIZE;
1821 break;
1822
1823 CASE_ITERATOR_PRIVATE_DATA_1
1824 if (PRIVATE_DATA(cc))
1825 private_data_length++;
1826 cc += 2;
1827#ifdef SUPPORT_UTF
1828 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1829#endif
1830 break;
1831
1832 CASE_ITERATOR_PRIVATE_DATA_2A
1833 if (PRIVATE_DATA(cc))
1834 private_data_length += 2;
1835 cc += 2;
1836#ifdef SUPPORT_UTF
1837 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1838#endif
1839 break;
1840
1841 CASE_ITERATOR_PRIVATE_DATA_2B
1842 if (PRIVATE_DATA(cc))
1843 private_data_length += 2;
1844 cc += 2 + IMM2_SIZE;
1845#ifdef SUPPORT_UTF
1846 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1847#endif
1848 break;
1849
1850 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1851 if (PRIVATE_DATA(cc))
1852 private_data_length++;
1853 cc += 1;
1854 break;
1855
1856 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1857 if (PRIVATE_DATA(cc))
1858 private_data_length += 2;
1859 cc += 1;
1860 break;
1861
1862 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1863 if (PRIVATE_DATA(cc))
1864 private_data_length += 2;
1865 cc += 1 + IMM2_SIZE;
1866 break;
1867
1868 case OP_CLASS:
1869 case OP_NCLASS:
1870#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1871 case OP_XCLASS:
1872 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1873#else
1874 size = 1 + 32 / (int)sizeof(pcre_uchar);
1875#endif
1876 if (PRIVATE_DATA(cc))
1877 private_data_length += get_class_iterator_size(cc + size);
1878 cc += size;
1879 break;
1880
1881 default:
1882 cc = next_opcode(common, cc);
1883 SLJIT_ASSERT(cc != NULL);
1884 break;
1885 }
1886 }
1887SLJIT_ASSERT(cc == ccend);
1888return private_data_length;
1889}
1890
1891static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1892 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1893{
1894DEFINE_COMPILER;
1895int srcw[2];
1896int count, size;
1897BOOL tmp1next = TRUE;
1898BOOL tmp1empty = TRUE;
1899BOOL tmp2empty = TRUE;
1900pcre_uchar *alternative;
1901enum {
1902 loop,
1903 end
1904} status;
1905
1906status = loop;
1907stackptr = STACK(stackptr);
1908stacktop = STACK(stacktop - 1);
1909
1910if (!save)
1911 {
1912 stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1913 if (stackptr < stacktop)
1914 {
1915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1916 stackptr += sizeof(sljit_sw);
1917 tmp1empty = FALSE;
1918 }
1919 if (stackptr < stacktop)
1920 {
1921 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1922 stackptr += sizeof(sljit_sw);
1923 tmp2empty = FALSE;
1924 }
1925 /* The tmp1next must be TRUE in either way. */
1926 }
1927
1928SLJIT_ASSERT(common->recursive_head_ptr != 0);
1929
1930do
1931 {
1932 count = 0;
1933 if (cc >= ccend)
1934 {
1935 if (!save)
1936 break;
1937
1938 count = 1;
1939 srcw[0] = common->recursive_head_ptr;
1940 if (needs_control_head)
1941 {
1942 SLJIT_ASSERT(common->control_head_ptr != 0);
1943 count = 2;
1944 srcw[0] = common->control_head_ptr;
1945 srcw[1] = common->recursive_head_ptr;
1946 }
1947 status = end;
1948 }
1949 else switch(*cc)
1950 {
1951 case OP_KET:
1952 if (PRIVATE_DATA(cc) != 0)
1953 {
1954 count = 1;
1955 srcw[0] = PRIVATE_DATA(cc);
1956 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1957 cc += PRIVATE_DATA(cc + 1);
1958 }
1959 cc += 1 + LINK_SIZE;
1960 break;
1961
1962 case OP_ASSERT:
1963 case OP_ASSERT_NOT:
1964 case OP_ASSERTBACK:
1965 case OP_ASSERTBACK_NOT:
1966 case OP_ONCE:
1967 case OP_ONCE_NC:
1968 case OP_BRAPOS:
1969 case OP_SBRA:
1970 case OP_SBRAPOS:
1971 case OP_SCOND:
1972 count = 1;
1973 srcw[0] = PRIVATE_DATA(cc);
1974 SLJIT_ASSERT(srcw[0] != 0);
1975 cc += 1 + LINK_SIZE;
1976 break;
1977
1978 case OP_CBRA:
1979 case OP_SCBRA:
1980 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1981 {
1982 count = 1;
1983 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1984 }
1985 cc += 1 + LINK_SIZE + IMM2_SIZE;
1986 break;
1987
1988 case OP_CBRAPOS:
1989 case OP_SCBRAPOS:
1990 count = 2;
1991 srcw[0] = PRIVATE_DATA(cc);
1992 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1993 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1994 cc += 1 + LINK_SIZE + IMM2_SIZE;
1995 break;
1996
1997 case OP_COND:
1998 /* Might be a hidden SCOND. */
1999 alternative = cc + GET(cc, 1);
2000 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2001 {
2002 count = 1;
2003 srcw[0] = PRIVATE_DATA(cc);
2004 SLJIT_ASSERT(srcw[0] != 0);
2005 }
2006 cc += 1 + LINK_SIZE;
2007 break;
2008
2009 CASE_ITERATOR_PRIVATE_DATA_1
2010 if (PRIVATE_DATA(cc))
2011 {
2012 count = 1;
2013 srcw[0] = PRIVATE_DATA(cc);
2014 }
2015 cc += 2;
2016#ifdef SUPPORT_UTF
2017 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2018#endif
2019 break;
2020
2021 CASE_ITERATOR_PRIVATE_DATA_2A
2022 if (PRIVATE_DATA(cc))
2023 {
2024 count = 2;
2025 srcw[0] = PRIVATE_DATA(cc);
2026 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2027 }
2028 cc += 2;
2029#ifdef SUPPORT_UTF
2030 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2031#endif
2032 break;
2033
2034 CASE_ITERATOR_PRIVATE_DATA_2B
2035 if (PRIVATE_DATA(cc))
2036 {
2037 count = 2;
2038 srcw[0] = PRIVATE_DATA(cc);
2039 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2040 }
2041 cc += 2 + IMM2_SIZE;
2042#ifdef SUPPORT_UTF
2043 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2044#endif
2045 break;
2046
2047 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2048 if (PRIVATE_DATA(cc))
2049 {
2050 count = 1;
2051 srcw[0] = PRIVATE_DATA(cc);
2052 }
2053 cc += 1;
2054 break;
2055
2056 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2057 if (PRIVATE_DATA(cc))
2058 {
2059 count = 2;
2060 srcw[0] = PRIVATE_DATA(cc);
2061 srcw[1] = srcw[0] + sizeof(sljit_sw);
2062 }
2063 cc += 1;
2064 break;
2065
2066 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2067 if (PRIVATE_DATA(cc))
2068 {
2069 count = 2;
2070 srcw[0] = PRIVATE_DATA(cc);
2071 srcw[1] = srcw[0] + sizeof(sljit_sw);
2072 }
2073 cc += 1 + IMM2_SIZE;
2074 break;
2075
2076 case OP_CLASS:
2077 case OP_NCLASS:
2078#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2079 case OP_XCLASS:
2080 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2081#else
2082 size = 1 + 32 / (int)sizeof(pcre_uchar);
2083#endif
2084 if (PRIVATE_DATA(cc))
2085 switch(get_class_iterator_size(cc + size))
2086 {
2087 case 1:
2088 count = 1;
2089 srcw[0] = PRIVATE_DATA(cc);
2090 break;
2091
2092 case 2:
2093 count = 2;
2094 srcw[0] = PRIVATE_DATA(cc);
2095 srcw[1] = srcw[0] + sizeof(sljit_sw);
2096 break;
2097
2098 default:
2099 SLJIT_UNREACHABLE();
2100 break;
2101 }
2102 cc += size;
2103 break;
2104
2105 default:
2106 cc = next_opcode(common, cc);
2107 SLJIT_ASSERT(cc != NULL);
2108 break;
2109 }
2110
2111 while (count > 0)
2112 {
2113 count--;
2114 if (save)
2115 {
2116 if (tmp1next)
2117 {
2118 if (!tmp1empty)
2119 {
2120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2121 stackptr += sizeof(sljit_sw);
2122 }
2123 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2124 tmp1empty = FALSE;
2125 tmp1next = FALSE;
2126 }
2127 else
2128 {
2129 if (!tmp2empty)
2130 {
2131 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2132 stackptr += sizeof(sljit_sw);
2133 }
2134 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2135 tmp2empty = FALSE;
2136 tmp1next = TRUE;
2137 }
2138 }
2139 else
2140 {
2141 if (tmp1next)
2142 {
2143 SLJIT_ASSERT(!tmp1empty);
2144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2145 tmp1empty = stackptr >= stacktop;
2146 if (!tmp1empty)
2147 {
2148 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2149 stackptr += sizeof(sljit_sw);
2150 }
2151 tmp1next = FALSE;
2152 }
2153 else
2154 {
2155 SLJIT_ASSERT(!tmp2empty);
2156 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2157 tmp2empty = stackptr >= stacktop;
2158 if (!tmp2empty)
2159 {
2160 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2161 stackptr += sizeof(sljit_sw);
2162 }
2163 tmp1next = TRUE;
2164 }
2165 }
2166 }
2167 }
2168while (status != end);
2169
2170if (save)
2171 {
2172 if (tmp1next)
2173 {
2174 if (!tmp1empty)
2175 {
2176 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2177 stackptr += sizeof(sljit_sw);
2178 }
2179 if (!tmp2empty)
2180 {
2181 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2182 stackptr += sizeof(sljit_sw);
2183 }
2184 }
2185 else
2186 {
2187 if (!tmp2empty)
2188 {
2189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2190 stackptr += sizeof(sljit_sw);
2191 }
2192 if (!tmp1empty)
2193 {
2194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2195 stackptr += sizeof(sljit_sw);
2196 }
2197 }
2198 }
2199SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2200}
2201
2202static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2203{
2204pcre_uchar *end = bracketend(cc);
2205BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2206
2207/* Assert captures then. */
2208if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2209 current_offset = NULL;
2210/* Conditional block does not. */
2211if (*cc == OP_COND || *cc == OP_SCOND)
2212 has_alternatives = FALSE;
2213
2214cc = next_opcode(common, cc);
2215if (has_alternatives)
2216 current_offset = common->then_offsets + (cc - common->start);
2217
2218while (cc < end)
2219 {
2220 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2221 cc = set_then_offsets(common, cc, current_offset);
2222 else
2223 {
2224 if (*cc == OP_ALT && has_alternatives)
2225 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2226 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2227 *current_offset = 1;
2228 cc = next_opcode(common, cc);
2229 }
2230 }
2231
2232return end;
2233}
2234
2235#undef CASE_ITERATOR_PRIVATE_DATA_1
2236#undef CASE_ITERATOR_PRIVATE_DATA_2A
2237#undef CASE_ITERATOR_PRIVATE_DATA_2B
2238#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2239#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2240#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2241
2242static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2243{
2244return (value & (value - 1)) == 0;
2245}
2246
2247static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2248{
2249while (list)
2250 {
2251 /* sljit_set_label is clever enough to do nothing
2252 if either the jump or the label is NULL. */
2253 SET_LABEL(list->jump, label);
2254 list = list->next;
2255 }
2256}
2257
2258static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2259{
2260jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2261if (list_item)
2262 {
2263 list_item->next = *list;
2264 list_item->jump = jump;
2265 *list = list_item;
2266 }
2267}
2268
2269static void add_stub(compiler_common *common, struct sljit_jump *start)
2270{
2271DEFINE_COMPILER;
2272stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2273
2274if (list_item)
2275 {
2276 list_item->start = start;
2277 list_item->quit = LABEL();
2278 list_item->next = common->stubs;
2279 common->stubs = list_item;
2280 }
2281}
2282
2283static void flush_stubs(compiler_common *common)
2284{
2285DEFINE_COMPILER;
2286stub_list *list_item = common->stubs;
2287
2288while (list_item)
2289 {
2290 JUMPHERE(list_item->start);
2291 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2292 JUMPTO(SLJIT_JUMP, list_item->quit);
2293 list_item = list_item->next;
2294 }
2295common->stubs = NULL;
2296}
2297
2298static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2299{
2300DEFINE_COMPILER;
2301label_addr_list *label_addr;
2302
2303label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2304if (label_addr == NULL)
2305 return;
2306label_addr->label = LABEL();
2307label_addr->update_addr = update_addr;
2308label_addr->next = common->label_addrs;
2309common->label_addrs = label_addr;
2310}
2311
2312static SLJIT_INLINE void count_match(compiler_common *common)
2313{
2314DEFINE_COMPILER;
2315
2316OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2317add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2318}
2319
2320static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2321{
2322/* May destroy all locals and registers except TMP2. */
2323DEFINE_COMPILER;
2324
2325SLJIT_ASSERT(size > 0);
2326OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2327#ifdef DESTROY_REGISTERS
2328OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2329OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2330OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2331OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2332OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2333#endif
2334add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2335}
2336
2337static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2338{
2339DEFINE_COMPILER;
2340
2341SLJIT_ASSERT(size > 0);
2342OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2343}
2344
2345static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2346{
2347DEFINE_COMPILER;
2348sljit_uw *result;
2349
2350if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2351 return NULL;
2352
2353result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2354if (SLJIT_UNLIKELY(result == NULL))
2355 {
2356 sljit_set_compiler_memory_error(compiler);
2357 return NULL;
2358 }
2359
2360*(void**)result = common->read_only_data_head;
2361common->read_only_data_head = (void *)result;
2362return result + 1;
2363}
2364
2365static void free_read_only_data(void *current, void *allocator_data)
2366{
2367void *next;
2368
2369SLJIT_UNUSED_ARG(allocator_data);
2370
2371while (current != NULL)
2372 {
2373 next = *(void**)current;
2374 SLJIT_FREE(current, allocator_data);
2375 current = next;
2376 }
2377}
2378
2379static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2380{
2381DEFINE_COMPILER;
2382struct sljit_label *loop;
2383int i;
2384
2385/* At this point we can freely use all temporary registers. */
2386SLJIT_ASSERT(length > 1);
2387/* TMP1 returns with begin - 1. */
2388OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2389if (length < 8)
2390 {
2391 for (i = 1; i < length; i++)
2392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2393 }
2394else
2395 {
2396 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2397 {
2398 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2399 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2400 loop = LABEL();
2401 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
2402 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2403 JUMPTO(SLJIT_NOT_ZERO, loop);
2404 }
2405 else
2406 {
2407 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
2408 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2409 loop = LABEL();
2410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
2411 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
2412 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2413 JUMPTO(SLJIT_NOT_ZERO, loop);
2414 }
2415 }
2416}
2417
2418static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2419{
2420DEFINE_COMPILER;
2421sljit_s32 i;
2422
2423SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2424
2425OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2426for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2427 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2428}
2429
2430static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2431{
2432DEFINE_COMPILER;
2433struct sljit_label *loop;
2434int i;
2435
2436SLJIT_ASSERT(length > 1);
2437/* OVECTOR(1) contains the "string begin - 1" constant. */
2438if (length > 2)
2439 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2440if (length < 8)
2441 {
2442 for (i = 2; i < length; i++)
2443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2444 }
2445else
2446 {
2447 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2448 {
2449 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2450 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2451 loop = LABEL();
2452 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
2453 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2454 JUMPTO(SLJIT_NOT_ZERO, loop);
2455 }
2456 else
2457 {
2458 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
2459 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2460 loop = LABEL();
2461 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
2462 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
2463 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2464 JUMPTO(SLJIT_NOT_ZERO, loop);
2465 }
2466 }
2467
2468OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2469if (common->mark_ptr != 0)
2470 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2471if (common->control_head_ptr != 0)
2472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2473OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2474OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2475OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
2476}
2477
2478static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2479{
2480while (current != NULL)
2481 {
2482 switch (current[1])
2483 {
2484 case type_then_trap:
2485 break;
2486
2487 case type_mark:
2488 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
2489 return current[3];
2490 break;
2491
2492 default:
2493 SLJIT_UNREACHABLE();
2494 break;
2495 }
2496 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2497 current = (sljit_sw*)current[0];
2498 }
2499return 0;
2500}
2501
2502static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2503{
2504DEFINE_COMPILER;
2505struct sljit_label *loop;
2506struct sljit_jump *early_quit;
2507BOOL has_pre;
2508
2509/* At this point we can freely use all registers. */
2510OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2511OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2512
2513OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2514if (common->mark_ptr != 0)
2515 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2516OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2517if (common->mark_ptr != 0)
2518 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2519OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2520OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2521
2522has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
2523GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
2524
2525/* Unlikely, but possible */
2526early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2527loop = LABEL();
2528
2529if (has_pre)
2530 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
2531else
2532 {
2533 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
2534 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2535 }
2536
2537OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(int));
2538OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
2539/* Copy the integer value to the output buffer */
2540#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2541OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2542#endif
2543
2544OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
2545OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2546JUMPTO(SLJIT_NOT_ZERO, loop);
2547JUMPHERE(early_quit);
2548
2549/* Calculate the return value, which is the maximum ovector value. */
2550if (topbracket > 1)
2551 {
2552 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
2553 {
2554 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2555 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2556
2557 /* OVECTOR(0) is never equal to SLJIT_S2. */
2558 loop = LABEL();
2559 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2560 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2561 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2562 }
2563 else
2564 {
2565 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
2566 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2567
2568 /* OVECTOR(0) is never equal to SLJIT_S2. */
2569 loop = LABEL();
2570 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
2571 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
2572 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2573 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2574 }
2575 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2576 }
2577else
2578 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2579}
2580
2581static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2582{
2583DEFINE_COMPILER;
2584struct sljit_jump *jump;
2585
2586SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2587SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2588 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2589
2590OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2591OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2592OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2593CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2594
2595/* Store match begin and end. */
2596OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2597OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2598
2599jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2600OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2601#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2602OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2603#endif
2604OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2605JUMPHERE(jump);
2606
2607OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2608OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2609#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2610OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2611#endif
2612OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2613
2614OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2615#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2616OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2617#endif
2618OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2619
2620JUMPTO(SLJIT_JUMP, quit);
2621}
2622
2623static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2624{
2625/* May destroy TMP1. */
2626DEFINE_COMPILER;
2627struct sljit_jump *jump;
2628
2629if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2630 {
2631 /* The value of -1 must be kept for start_used_ptr! */
2632 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2633 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2634 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2635 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2636 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2637 JUMPHERE(jump);
2638 }
2639else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2640 {
2641 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2643 JUMPHERE(jump);
2644 }
2645}
2646
2647static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2648{
2649/* Detects if the character has an othercase. */
2650unsigned int c;
2651
2652#ifdef SUPPORT_UTF
2653if (common->utf)
2654 {
2655 GETCHAR(c, cc);
2656 if (c > 127)
2657 {
2658#ifdef SUPPORT_UCP
2659 return c != UCD_OTHERCASE(c);
2660#else
2661 return FALSE;
2662#endif
2663 }
2664#ifndef COMPILE_PCRE8
2665 return common->fcc[c] != c;
2666#endif
2667 }
2668else
2669#endif
2670 c = *cc;
2671return MAX_255(c) ? common->fcc[c] != c : FALSE;
2672}
2673
2674static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2675{
2676/* Returns with the othercase. */
2677#ifdef SUPPORT_UTF
2678if (common->utf && c > 127)
2679 {
2680#ifdef SUPPORT_UCP
2681 return UCD_OTHERCASE(c);
2682#else
2683 return c;
2684#endif
2685 }
2686#endif
2687return TABLE_GET(c, common->fcc, c);
2688}
2689
2690static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2691{
2692/* Detects if the character and its othercase has only 1 bit difference. */
2693unsigned int c, oc, bit;
2694#if defined SUPPORT_UTF && defined COMPILE_PCRE8
2695int n;
2696#endif
2697
2698#ifdef SUPPORT_UTF
2699if (common->utf)
2700 {
2701 GETCHAR(c, cc);
2702 if (c <= 127)
2703 oc = common->fcc[c];
2704 else
2705 {
2706#ifdef SUPPORT_UCP
2707 oc = UCD_OTHERCASE(c);
2708#else
2709 oc = c;
2710#endif
2711 }
2712 }
2713else
2714 {
2715 c = *cc;
2716 oc = TABLE_GET(c, common->fcc, c);
2717 }
2718#else
2719c = *cc;
2720oc = TABLE_GET(c, common->fcc, c);
2721#endif
2722
2723SLJIT_ASSERT(c != oc);
2724
2725bit = c ^ oc;
2726/* Optimized for English alphabet. */
2727if (c <= 127 && bit == 0x20)
2728 return (0 << 8) | 0x20;
2729
2730/* Since c != oc, they must have at least 1 bit difference. */
2731if (!is_powerof2(bit))
2732 return 0;
2733
2734#if defined COMPILE_PCRE8
2735
2736#ifdef SUPPORT_UTF
2737if (common->utf && c > 127)
2738 {
2739 n = GET_EXTRALEN(*cc);
2740 while ((bit & 0x3f) == 0)
2741 {
2742 n--;
2743 bit >>= 6;
2744 }
2745 return (n << 8) | bit;
2746 }
2747#endif /* SUPPORT_UTF */
2748return (0 << 8) | bit;
2749
2750#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2751
2752#ifdef SUPPORT_UTF
2753if (common->utf && c > 65535)
2754 {
2755 if (bit >= (1 << 10))
2756 bit >>= 10;
2757 else
2758 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2759 }
2760#endif /* SUPPORT_UTF */
2761return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2762
2763#endif /* COMPILE_PCRE[8|16|32] */
2764}
2765
2766static void check_partial(compiler_common *common, BOOL force)
2767{
2768/* Checks whether a partial matching is occurred. Does not modify registers. */
2769DEFINE_COMPILER;
2770struct sljit_jump *jump = NULL;
2771
2772SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2773
2774if (common->mode == JIT_COMPILE)
2775 return;
2776
2777if (!force)
2778 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2779else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2780 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2781
2782if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2784else
2785 {
2786 if (common->partialmatchlabel != NULL)
2787 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2788 else
2789 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2790 }
2791
2792if (jump != NULL)
2793 JUMPHERE(jump);
2794}
2795
2796static void check_str_end(compiler_common *common, jump_list **end_reached)
2797{
2798/* Does not affect registers. Usually used in a tight spot. */
2799DEFINE_COMPILER;
2800struct sljit_jump *jump;
2801
2802if (common->mode == JIT_COMPILE)
2803 {
2804 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2805 return;
2806 }
2807
2808jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2809if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2810 {
2811 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2812 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2813 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2814 }
2815else
2816 {
2817 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2818 if (common->partialmatchlabel != NULL)
2819 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2820 else
2821 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2822 }
2823JUMPHERE(jump);
2824}
2825
2826static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2827{
2828DEFINE_COMPILER;
2829struct sljit_jump *jump;
2830
2831if (common->mode == JIT_COMPILE)
2832 {
2833 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2834 return;
2835 }
2836
2837/* Partial matching mode. */
2838jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2839add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2840if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2841 {
2842 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2843 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2844 }
2845else
2846 {
2847 if (common->partialmatchlabel != NULL)
2848 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2849 else
2850 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2851 }
2852JUMPHERE(jump);
2853}
2854
2855static void peek_char(compiler_common *common, sljit_u32 max)
2856{
2857/* Reads the character into TMP1, keeps STR_PTR.
2858Does not check STR_END. TMP2 Destroyed. */
2859DEFINE_COMPILER;
2860#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2861struct sljit_jump *jump;
2862#endif
2863
2864SLJIT_UNUSED_ARG(max);
2865
2866OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2867#if defined SUPPORT_UTF && defined COMPILE_PCRE8
2868if (common->utf)
2869 {
2870 if (max < 128) return;
2871
2872 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2873 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2874 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2875 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2876 JUMPHERE(jump);
2877 }
2878#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2879
2880#if defined SUPPORT_UTF && defined COMPILE_PCRE16
2881if (common->utf)
2882 {
2883 if (max < 0xd800) return;
2884
2885 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2886 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2887 /* TMP2 contains the high surrogate. */
2888 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2889 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2890 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2891 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2892 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2893 JUMPHERE(jump);
2894 }
2895#endif
2896}
2897
2898#if defined SUPPORT_UTF && defined COMPILE_PCRE8
2899
2900static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2901{
2902/* Tells whether the character codes below 128 are enough
2903to determine a match. */
2904const sljit_u8 value = nclass ? 0xff : 0;
2905const sljit_u8 *end = bitset + 32;
2906
2907bitset += 16;
2908do
2909 {
2910 if (*bitset++ != value)
2911 return FALSE;
2912 }
2913while (bitset < end);
2914return TRUE;
2915}
2916
2917static void read_char7_type(compiler_common *common, BOOL full_read)
2918{
2919/* Reads the precise character type of a character into TMP1, if the character
2920is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2921full_read argument tells whether characters above max are accepted or not. */
2922DEFINE_COMPILER;
2923struct sljit_jump *jump;
2924
2925SLJIT_ASSERT(common->utf);
2926
2927OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2928OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2929
2930OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2931
2932if (full_read)
2933 {
2934 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2935 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2936 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2937 JUMPHERE(jump);
2938 }
2939}
2940
2941#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2942
2943static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2944{
2945/* Reads the precise value of a character into TMP1, if the character is
2946between min and max (c >= min && c <= max). Otherwise it returns with a value
2947outside the range. Does not check STR_END. */
2948DEFINE_COMPILER;
2949#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2950struct sljit_jump *jump;
2951#endif
2952#if defined SUPPORT_UTF && defined COMPILE_PCRE8
2953struct sljit_jump *jump2;
2954#endif
2955
2956SLJIT_UNUSED_ARG(update_str_ptr);
2957SLJIT_UNUSED_ARG(min);
2958SLJIT_UNUSED_ARG(max);
2959SLJIT_ASSERT(min <= max);
2960
2961OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2962OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2963
2964#if defined SUPPORT_UTF && defined COMPILE_PCRE8
2965if (common->utf)
2966 {
2967 if (max < 128 && !update_str_ptr) return;
2968
2969 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2970 if (min >= 0x10000)
2971 {
2972 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2973 if (update_str_ptr)
2974 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2975 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2976 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2977 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2978 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2979 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2980 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2981 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2982 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2983 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2984 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2985 if (!update_str_ptr)
2986 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2987 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2988 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2989 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2990 JUMPHERE(jump2);
2991 if (update_str_ptr)
2992 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2993 }
2994 else if (min >= 0x800 && max <= 0xffff)
2995 {
2996 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2997 if (update_str_ptr)
2998 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2999 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3000 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3001 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3002 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3003 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3004 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3005 if (!update_str_ptr)
3006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3007 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3008 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3009 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3010 JUMPHERE(jump2);
3011 if (update_str_ptr)
3012 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3013 }
3014 else if (max >= 0x800)
3015 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3016 else if (max < 128)
3017 {
3018 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3019 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3020 }
3021 else
3022 {
3023 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3024 if (!update_str_ptr)
3025 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3026 else
3027 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3028 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3029 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3030 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3031 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3032 if (update_str_ptr)
3033 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3034 }
3035 JUMPHERE(jump);
3036 }
3037#endif
3038
3039#if defined SUPPORT_UTF && defined COMPILE_PCRE16
3040if (common->utf)
3041 {
3042 if (max >= 0x10000)
3043 {
3044 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3045 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3046 /* TMP2 contains the high surrogate. */
3047 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3048 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
3049 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3050 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3051 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
3052 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3053 JUMPHERE(jump);
3054 return;
3055 }
3056
3057 if (max < 0xd800 && !update_str_ptr) return;
3058
3059 /* Skip low surrogate if necessary. */
3060 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3061 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3062 if (update_str_ptr)
3063 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3064 if (max >= 0xd800)
3065 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3066 JUMPHERE(jump);
3067 }
3068#endif
3069}
3070
3071static SLJIT_INLINE void read_char(compiler_common *common)
3072{
3073read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3074}
3075
3076static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3077{
3078/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3079DEFINE_COMPILER;
3080#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3081struct sljit_jump *jump;
3082#endif
3083#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3084struct sljit_jump *jump2;
3085#endif
3086
3087SLJIT_UNUSED_ARG(update_str_ptr);
3088
3089OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3090OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3091
3092#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3093if (common->utf)
3094 {
3095 /* This can be an extra read in some situations, but hopefully
3096 it is needed in most cases. */
3097 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3098 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3099 if (!update_str_ptr)
3100 {
3101 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3102 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3103 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3104 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3105 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3106 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3107 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3108 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3109 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3110 JUMPHERE(jump2);
3111 }
3112 else
3113 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3114 JUMPHERE(jump);
3115 return;
3116 }
3117#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3118
3119#if !defined COMPILE_PCRE8
3120/* The ctypes array contains only 256 values. */
3121OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3122jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3123#endif
3124OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3125#if !defined COMPILE_PCRE8
3126JUMPHERE(jump);
3127#endif
3128
3129#if defined SUPPORT_UTF && defined COMPILE_PCRE16
3130if (common->utf && update_str_ptr)
3131 {
3132 /* Skip low surrogate if necessary. */
3133 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3134 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3135 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3136 JUMPHERE(jump);
3137 }
3138#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3139}
3140
3141static void skip_char_back(compiler_common *common)
3142{
3143/* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3144DEFINE_COMPILER;
3145#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3146#if defined COMPILE_PCRE8
3147struct sljit_label *label;
3148
3149if (common->utf)
3150 {
3151 label = LABEL();
3152 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3153 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3154 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3155 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3156 return;
3157 }
3158#elif defined COMPILE_PCRE16
3159if (common->utf)
3160 {
3161 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3162 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3163 /* Skip low surrogate if necessary. */
3164 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3165 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3166 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3167 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3168 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3169 return;
3170 }
3171#endif /* COMPILE_PCRE[8|16] */
3172#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3173OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3174}
3175
3176static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3177{
3178/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3179DEFINE_COMPILER;
3180struct sljit_jump *jump;
3181
3182if (nltype == NLTYPE_ANY)
3183 {
3184 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3185 sljit_set_current_flags(compiler, SLJIT_SET_Z);
3186 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3187 }
3188else if (nltype == NLTYPE_ANYCRLF)
3189 {
3190 if (jumpifmatch)
3191 {
3192 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3193 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3194 }
3195 else
3196 {
3197 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3198 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3199 JUMPHERE(jump);
3200 }
3201 }
3202else
3203 {
3204 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3205 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3206 }
3207}
3208
3209#ifdef SUPPORT_UTF
3210
3211#if defined COMPILE_PCRE8
3212static void do_utfreadchar(compiler_common *common)
3213{
3214/* Fast decoding a UTF-8 character. TMP1 contains the first byte
3215of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3216DEFINE_COMPILER;
3217struct sljit_jump *jump;
3218
3219sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3220OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3221OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3222OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3223OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3224OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3225
3226/* Searching for the first zero. */
3227OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3228jump = JUMP(SLJIT_NOT_ZERO);
3229/* Two byte sequence. */
3230OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3231OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3232sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3233
3234JUMPHERE(jump);
3235OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3236OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3237OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3238OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3239OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3240
3241OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3242jump = JUMP(SLJIT_NOT_ZERO);
3243/* Three byte sequence. */
3244OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3245OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3246sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3247
3248/* Four byte sequence. */
3249JUMPHERE(jump);
3250OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3251OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3252OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3253OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3254OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3255OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3256OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3257sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3258}
3259
3260static void do_utfreadchar16(compiler_common *common)
3261{
3262/* Fast decoding a UTF-8 character. TMP1 contains the first byte
3263of the character (>= 0xc0). Return value in TMP1. */
3264DEFINE_COMPILER;
3265struct sljit_jump *jump;
3266
3267sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3268OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3269OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3270OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3271OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3272OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3273
3274/* Searching for the first zero. */
3275OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3276jump = JUMP(SLJIT_NOT_ZERO);
3277/* Two byte sequence. */
3278OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3279sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3280
3281JUMPHERE(jump);
3282OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3283OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3284/* This code runs only in 8 bit mode. No need to shift the value. */
3285OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3286OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3287OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3288OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3289OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3290OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3291/* Three byte sequence. */
3292OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3293sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3294}
3295
3296static void do_utfreadtype8(compiler_common *common)
3297{
3298/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3299of the character (>= 0xc0). Return value in TMP1. */
3300DEFINE_COMPILER;
3301struct sljit_jump *jump;
3302struct sljit_jump *compare;
3303
3304sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3305
3306OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3307jump = JUMP(SLJIT_NOT_ZERO);
3308/* Two byte sequence. */
3309OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3310OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3311OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3312/* The upper 5 bits are known at this point. */
3313compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3314OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3315OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3316OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3317OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3318sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3319
3320JUMPHERE(compare);
3321OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3322sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3323
3324/* We only have types for characters less than 256. */
3325JUMPHERE(jump);
3326OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3327OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3328OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3329sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3330}
3331
3332#endif /* COMPILE_PCRE8 */
3333
3334#endif /* SUPPORT_UTF */
3335
3336#ifdef SUPPORT_UCP
3337
3338/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3339#define UCD_BLOCK_MASK 127
3340#define UCD_BLOCK_SHIFT 7
3341
3342static void do_getucd(compiler_common *common)
3343{
3344/* Search the UCD record for the character comes in TMP1.
3345Returns chartype in TMP1 and UCD offset in TMP2. */
3346DEFINE_COMPILER;
3347#ifdef COMPILE_PCRE32
3348struct sljit_jump *jump;
3349#endif
3350
3351#if defined SLJIT_DEBUG && SLJIT_DEBUG
3352/* dummy_ucd_record */
3353const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3354SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3355SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3356#endif
3357
3358SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3359
3360sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3361
3362#ifdef COMPILE_PCRE32
3363if (!common->utf)
3364 {
3365 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
3366 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3367 JUMPHERE(jump);
3368 }
3369#endif
3370
3371OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3372OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3373OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3374OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3375OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3376OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3377OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3378OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3379OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3380sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3381}
3382#endif
3383
3384static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3385{
3386DEFINE_COMPILER;
3387struct sljit_label *mainloop;
3388struct sljit_label *newlinelabel = NULL;
3389struct sljit_jump *start;
3390struct sljit_jump *end = NULL;
3391struct sljit_jump *end2 = NULL;
3392#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3393struct sljit_jump *singlechar;
3394#endif
3395jump_list *newline = NULL;
3396BOOL newlinecheck = FALSE;
3397BOOL readuchar = FALSE;
3398
3399if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3400 (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3401 newlinecheck = TRUE;
3402
3403if (common->match_end_ptr != 0)
3404 {
3405 /* Search for the end of the first line. */
3406 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3407
3408 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3409 {
3410 mainloop = LABEL();
3411 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3412 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3413 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3414 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3415 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3416 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3417 JUMPHERE(end);
3418 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3419 }
3420 else
3421 {
3422 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3423 mainloop = LABEL();
3424 /* Continual stores does not cause data dependency. */
3425 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3426 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3427 check_newlinechar(common, common->nltype, &newline, TRUE);
3428 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3429 JUMPHERE(end);
3430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3431 set_jumps(newline, LABEL());
3432 }
3433
3434 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3435 }
3436
3437start = JUMP(SLJIT_JUMP);
3438
3439if (newlinecheck)
3440 {
3441 newlinelabel = LABEL();
3442 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3443 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3444 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3445 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3446 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3447#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3448 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3449#endif
3450 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3451 end2 = JUMP(SLJIT_JUMP);
3452 }
3453
3454mainloop = LABEL();
3455
3456/* Increasing the STR_PTR here requires one less jump in the most common case. */
3457#ifdef SUPPORT_UTF
3458if (common->utf) readuchar = TRUE;
3459#endif
3460if (newlinecheck) readuchar = TRUE;
3461
3462if (readuchar)
3463 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3464
3465if (newlinecheck)
3466 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3467
3468OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3469#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3470#if defined COMPILE_PCRE8
3471if (common->utf)
3472 {
3473 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3474 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3475 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3476 JUMPHERE(singlechar);
3477 }
3478#elif defined COMPILE_PCRE16
3479if (common->utf)
3480 {
3481 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3482 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3483 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3484 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3485 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3486 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3487 JUMPHERE(singlechar);
3488 }
3489#endif /* COMPILE_PCRE[8|16] */
3490#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3491JUMPHERE(start);
3492
3493if (newlinecheck)
3494 {
3495 JUMPHERE(end);
3496 JUMPHERE(end2);
3497 }
3498
3499return mainloop;
3500}
3501
3502#define MAX_N_CHARS 16
3503#define MAX_DIFF_CHARS 6
3504
3505static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3506{
3507pcre_uchar i, len;
3508
3509len = chars[0];
3510if (len == 255)
3511 return;
3512
3513if (len == 0)
3514 {
3515 chars[0] = 1;
3516 chars[1] = chr;
3517 return;
3518 }
3519
3520for (i = len; i > 0; i--)
3521 if (chars[i] == chr)
3522 return;
3523
3524if (len >= MAX_DIFF_CHARS - 1)
3525 {
3526 chars[0] = 255;
3527 return;
3528 }
3529
3530len++;
3531chars[len] = chr;
3532chars[0] = len;
3533}
3534
3535static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3536{
3537/* Recursive function, which scans prefix literals. */
3538BOOL last, any, class, caseless;
3539int len, repeat, len_save, consumed = 0;
3540sljit_u32 chr; /* Any unicode character. */
3541sljit_u8 *bytes, *bytes_end, byte;
3542pcre_uchar *alternative, *cc_save, *oc;
3543#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3544pcre_uchar othercase[8];
3545#elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3546pcre_uchar othercase[2];
3547#else
3548pcre_uchar othercase[1];
3549#endif
3550
3551repeat = 1;
3552while (TRUE)
3553 {
3554 if (*rec_count == 0)
3555 return 0;
3556 (*rec_count)--;
3557
3558 last = TRUE;
3559 any = FALSE;
3560 class = FALSE;
3561 caseless = FALSE;
3562
3563 switch (*cc)
3564 {
3565 case OP_CHARI:
3566 caseless = TRUE;
3567 case OP_CHAR:
3568 last = FALSE;
3569 cc++;
3570 break;
3571
3572 case OP_SOD:
3573 case OP_SOM:
3574 case OP_SET_SOM:
3575 case OP_NOT_WORD_BOUNDARY:
3576 case OP_WORD_BOUNDARY:
3577 case OP_EODN:
3578 case OP_EOD:
3579 case OP_CIRC:
3580 case OP_CIRCM:
3581 case OP_DOLL:
3582 case OP_DOLLM:
3583 /* Zero width assertions. */
3584 cc++;
3585 continue;
3586
3587 case OP_ASSERT:
3588 case OP_ASSERT_NOT:
3589 case OP_ASSERTBACK:
3590 case OP_ASSERTBACK_NOT:
3591 cc = bracketend(cc);
3592 continue;
3593
3594 case OP_PLUSI:
3595 case OP_MINPLUSI:
3596 case OP_POSPLUSI:
3597 caseless = TRUE;
3598 case OP_PLUS:
3599 case OP_MINPLUS:
3600 case OP_POSPLUS:
3601 cc++;
3602 break;
3603
3604 case OP_EXACTI:
3605 caseless = TRUE;
3606 case OP_EXACT:
3607 repeat = GET2(cc, 1);
3608 last = FALSE;
3609 cc += 1 + IMM2_SIZE;
3610 break;
3611
3612 case OP_QUERYI:
3613 case OP_MINQUERYI:
3614 case OP_POSQUERYI:
3615 caseless = TRUE;
3616 case OP_QUERY:
3617 case OP_MINQUERY:
3618 case OP_POSQUERY:
3619 len = 1;
3620 cc++;
3621#ifdef SUPPORT_UTF
3622 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3623#endif
3624 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3625 if (max_chars == 0)
3626 return consumed;
3627 last = FALSE;
3628 break;
3629
3630 case OP_KET:
3631 cc += 1 + LINK_SIZE;
3632 continue;
3633
3634 case OP_ALT:
3635 cc += GET(cc, 1);
3636 continue;
3637
3638 case OP_ONCE:
3639 case OP_ONCE_NC:
3640 case OP_BRA:
3641 case OP_BRAPOS:
3642 case OP_CBRA:
3643 case OP_CBRAPOS:
3644 alternative = cc + GET(cc, 1);
3645 while (*alternative == OP_ALT)
3646 {
3647 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3648 if (max_chars == 0)
3649 return consumed;
3650 alternative += GET(alternative, 1);
3651 }
3652
3653 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3654 cc += IMM2_SIZE;
3655 cc += 1 + LINK_SIZE;
3656 continue;
3657
3658 case OP_CLASS:
3659#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3660 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3661 return consumed;
3662#endif
3663 class = TRUE;
3664 break;
3665
3666 case OP_NCLASS:
3667#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3668 if (common->utf) return consumed;
3669#endif
3670 class = TRUE;
3671 break;
3672
3673#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3674 case OP_XCLASS:
3675#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3676 if (common->utf) return consumed;
3677#endif
3678 any = TRUE;
3679 cc += GET(cc, 1);
3680 break;
3681#endif
3682
3683 case OP_DIGIT:
3684#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3685 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3686 return consumed;
3687#endif
3688 any = TRUE;
3689 cc++;
3690 break;
3691
3692 case OP_WHITESPACE:
3693#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3694 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3695 return consumed;
3696#endif
3697 any = TRUE;
3698 cc++;
3699 break;
3700
3701 case OP_WORDCHAR:
3702#if defined SUPPORT_UTF && defined COMPILE_PCRE8
3703 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3704 return consumed;
3705#endif
3706 any = TRUE;
3707 cc++;
3708 break;
3709
3710 case OP_NOT:
3711 case OP_NOTI:
3712 cc++;
3713 /* Fall through. */
3714 case OP_NOT_DIGIT:
3715 case OP_NOT_WHITESPACE:
3716 case OP_NOT_WORDCHAR:
3717 case OP_ANY:
3718 case OP_ALLANY:
3719#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3720 if (common->utf) return consumed;
3721#endif
3722 any = TRUE;
3723 cc++;
3724 break;
3725
3726#ifdef SUPPORT_UTF
3727 case OP_NOTPROP:
3728 case OP_PROP:
3729#ifndef COMPILE_PCRE32
3730 if (common->utf) return consumed;
3731#endif
3732 any = TRUE;
3733 cc += 1 + 2;
3734 break;
3735#endif
3736
3737 case OP_TYPEEXACT:
3738 repeat = GET2(cc, 1);
3739 cc += 1 + IMM2_SIZE;
3740 continue;
3741
3742 case OP_NOTEXACT:
3743 case OP_NOTEXACTI:
3744#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3745 if (common->utf) return consumed;
3746#endif
3747 any = TRUE;
3748 repeat = GET2(cc, 1);
3749 cc += 1 + IMM2_SIZE + 1;
3750 break;
3751
3752 default:
3753 return consumed;
3754 }
3755
3756 if (any)
3757 {
3758 do
3759 {
3760 chars[0] = 255;
3761
3762 consumed++;
3763 if (--max_chars == 0)
3764 return consumed;
3765 chars += MAX_DIFF_CHARS;
3766 }
3767 while (--repeat > 0);
3768
3769 repeat = 1;
3770 continue;
3771 }
3772
3773 if (class)
3774 {
3775 bytes = (sljit_u8*) (cc + 1);
3776 cc += 1 + 32 / sizeof(pcre_uchar);
3777
3778 switch (*cc)
3779 {
3780 case OP_CRSTAR:
3781 case OP_CRMINSTAR:
3782 case OP_CRPOSSTAR:
3783 case OP_CRQUERY:
3784 case OP_CRMINQUERY:
3785 case OP_CRPOSQUERY:
3786 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3787 if (max_chars == 0)
3788 return consumed;
3789 break;
3790
3791 default:
3792 case OP_CRPLUS:
3793 case OP_CRMINPLUS:
3794 case OP_CRPOSPLUS:
3795 break;
3796
3797 case OP_CRRANGE:
3798 case OP_CRMINRANGE:
3799 case OP_CRPOSRANGE:
3800 repeat = GET2(cc, 1);
3801 if (repeat <= 0)
3802 return consumed;
3803 break;
3804 }
3805
3806 do
3807 {
3808 if (bytes[31] & 0x80)
3809 chars[0] = 255;
3810 else if (chars[0] != 255)
3811 {
3812 bytes_end = bytes + 32;
3813 chr = 0;
3814 do
3815 {
3816 byte = *bytes++;
3817 SLJIT_ASSERT((chr & 0x7) == 0);
3818 if (byte == 0)
3819 chr += 8;
3820 else
3821 {
3822 do
3823 {
3824 if ((byte & 0x1) != 0)
3825 add_prefix_char(chr, chars);
3826 byte >>= 1;
3827 chr++;
3828 }
3829 while (byte != 0);
3830 chr = (chr + 7) & ~7;
3831 }
3832 }
3833 while (chars[0] != 255 && bytes < bytes_end);
3834 bytes = bytes_end - 32;
3835 }
3836
3837 consumed++;
3838 if (--max_chars == 0)
3839 return consumed;
3840 chars += MAX_DIFF_CHARS;
3841 }
3842 while (--repeat > 0);
3843
3844 switch (*cc)
3845 {
3846 case OP_CRSTAR:
3847 case OP_CRMINSTAR:
3848 case OP_CRPOSSTAR:
3849 return consumed;
3850
3851 case OP_CRQUERY:
3852 case OP_CRMINQUERY:
3853 case OP_CRPOSQUERY:
3854 cc++;
3855 break;
3856
3857 case OP_CRRANGE:
3858 case OP_CRMINRANGE:
3859 case OP_CRPOSRANGE:
3860 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3861 return consumed;
3862 cc += 1 + 2 * IMM2_SIZE;
3863 break;
3864 }
3865
3866 repeat = 1;
3867 continue;
3868 }
3869
3870 len = 1;
3871#ifdef SUPPORT_UTF
3872 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3873#endif
3874
3875 if (caseless && char_has_othercase(common, cc))
3876 {
3877#ifdef SUPPORT_UTF
3878 if (common->utf)
3879 {
3880 GETCHAR(chr, cc);
3881 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3882 return consumed;
3883 }
3884 else
3885#endif
3886 {
3887 chr = *cc;
3888 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3889 }
3890 }
3891 else
3892 {
3893 caseless = FALSE;
3894 othercase[0] = 0; /* Stops compiler warning - PH */
3895 }
3896
3897 len_save = len;
3898 cc_save = cc;
3899 while (TRUE)
3900 {
3901 oc = othercase;
3902 do
3903 {
3904 chr = *cc;
3905 add_prefix_char(*cc, chars);
3906
3907 if (caseless)
3908 add_prefix_char(*oc, chars);
3909
3910 len--;
3911 consumed++;
3912 if (--max_chars == 0)
3913 return consumed;
3914 chars += MAX_DIFF_CHARS;
3915 cc++;
3916 oc++;
3917 }
3918 while (len > 0);
3919
3920 if (--repeat == 0)
3921 break;
3922
3923 len = len_save;
3924 cc = cc_save;
3925 }
3926
3927 repeat = 1;
3928 if (last)
3929 return consumed;
3930 }
3931}
3932
3933#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
3934
3935static sljit_s32 character_to_int32(pcre_uchar chr)
3936{
3937sljit_s32 value = (sljit_s32)chr;
3938#if defined COMPILE_PCRE8
3939#define SSE2_COMPARE_TYPE_INDEX 0
3940return (value << 24) | (value << 16) | (value << 8) | value;
3941#elif defined COMPILE_PCRE16
3942#define SSE2_COMPARE_TYPE_INDEX 1
3943return (value << 16) | value;
3944#elif defined COMPILE_PCRE32
3945#define SSE2_COMPARE_TYPE_INDEX 2
3946return value;
3947#else
3948#error "Unsupported unit width"
3949#endif
3950}
3951
3952static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3953{
3954DEFINE_COMPILER;
3955struct sljit_label *start;
3956struct sljit_jump *quit[3];
3957struct sljit_jump *nomatch;
3958sljit_u8 instruction[8];
3959sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3960sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3961sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3962BOOL load_twice = FALSE;
3963pcre_uchar bit;
3964
3965bit = char1 ^ char2;
3966if (!is_powerof2(bit))
3967 bit = 0;
3968
3969if ((char1 != char2) && bit == 0)
3970 load_twice = TRUE;
3971
3972quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3973
3974/* First part (unaligned start) */
3975
3976OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3977
3978SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3979
3980/* MOVD xmm, r/m32 */
3981instruction[0] = 0x66;
3982instruction[1] = 0x0f;
3983instruction[2] = 0x6e;
3984instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3985sljit_emit_op_custom(compiler, instruction, 4);
3986
3987if (char1 != char2)
3988 {
3989 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3990
3991 /* MOVD xmm, r/m32 */
3992 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3993 sljit_emit_op_custom(compiler, instruction, 4);
3994 }
3995
3996/* PSHUFD xmm1, xmm2/m128, imm8 */
3997instruction[2] = 0x70;
3998instruction[3] = 0xc0 | (2 << 3) | 2;
3999instruction[4] = 0;
4000sljit_emit_op_custom(compiler, instruction, 5);
4001
4002if (char1 != char2)
4003 {
4004 /* PSHUFD xmm1, xmm2/m128, imm8 */
4005 instruction[3] = 0xc0 | (3 << 3) | 3;
4006 instruction[4] = 0;
4007 sljit_emit_op_custom(compiler, instruction, 5);
4008 }
4009
4010OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
4011OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
4012
4013/* MOVDQA xmm1, xmm2/m128 */
4014#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4015
4016if (str_ptr_ind < 8)
4017 {
4018 instruction[2] = 0x6f;
4019 instruction[3] = (0 << 3) | str_ptr_ind;
4020 sljit_emit_op_custom(compiler, instruction, 4);
4021
4022 if (load_twice)
4023 {
4024 instruction[3] = (1 << 3) | str_ptr_ind;
4025 sljit_emit_op_custom(compiler, instruction, 4);
4026 }
4027 }
4028else
4029 {
4030 instruction[1] = 0x41;
4031 instruction[2] = 0x0f;
4032 instruction[3] = 0x6f;
4033 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4034 sljit_emit_op_custom(compiler, instruction, 5);
4035
4036 if (load_twice)
4037 {
4038 instruction[4] = (1 << 3) | str_ptr_ind;
4039 sljit_emit_op_custom(compiler, instruction, 5);
4040 }
4041 instruction[1] = 0x0f;
4042 }
4043
4044#else
4045
4046instruction[2] = 0x6f;
4047instruction[3] = (0 << 3) | str_ptr_ind;
4048sljit_emit_op_custom(compiler, instruction, 4);
4049
4050if (load_twice)
4051 {
4052 instruction[3] = (1 << 3) | str_ptr_ind;
4053 sljit_emit_op_custom(compiler, instruction, 4);
4054 }
4055
4056#endif
4057
4058if (bit != 0)
4059 {
4060 /* POR xmm1, xmm2/m128 */
4061 instruction[2] = 0xeb;
4062 instruction[3] = 0xc0 | (0 << 3) | 3;
4063 sljit_emit_op_custom(compiler, instruction, 4);
4064 }
4065
4066/* PCMPEQB/W/D xmm1, xmm2/m128 */
4067instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4068instruction[3] = 0xc0 | (0 << 3) | 2;
4069sljit_emit_op_custom(compiler, instruction, 4);
4070
4071if (load_twice)
4072 {
4073 instruction[3] = 0xc0 | (1 << 3) | 3;
4074 sljit_emit_op_custom(compiler, instruction, 4);
4075 }
4076
4077/* PMOVMSKB reg, xmm */
4078instruction[2] = 0xd7;
4079instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4080sljit_emit_op_custom(compiler, instruction, 4);
4081
4082if (load_twice)
4083 {
4084 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4085 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4086 sljit_emit_op_custom(compiler, instruction, 4);
4087
4088 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4089 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4090 }
4091
4092OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4093
4094/* BSF r32, r/m32 */
4095instruction[0] = 0x0f;
4096instruction[1] = 0xbc;
4097instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4098sljit_emit_op_custom(compiler, instruction, 3);
4099sljit_set_current_flags(compiler, SLJIT_SET_Z);
4100
4101nomatch = JUMP(SLJIT_ZERO);
4102
4103OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4104OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4105quit[1] = JUMP(SLJIT_JUMP);
4106
4107JUMPHERE(nomatch);
4108
4109start = LABEL();
4110OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4111quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4112
4113/* Second part (aligned) */
4114
4115instruction[0] = 0x66;
4116instruction[1] = 0x0f;
4117
4118/* MOVDQA xmm1, xmm2/m128 */
4119#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4120
4121if (str_ptr_ind < 8)
4122 {
4123 instruction[2] = 0x6f;
4124 instruction[3] = (0 << 3) | str_ptr_ind;
4125 sljit_emit_op_custom(compiler, instruction, 4);
4126
4127 if (load_twice)
4128 {
4129 instruction[3] = (1 << 3) | str_ptr_ind;
4130 sljit_emit_op_custom(compiler, instruction, 4);
4131 }
4132 }
4133else
4134 {
4135 instruction[1] = 0x41;
4136 instruction[2] = 0x0f;
4137 instruction[3] = 0x6f;
4138 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4139 sljit_emit_op_custom(compiler, instruction, 5);
4140
4141 if (load_twice)
4142 {
4143 instruction[4] = (1 << 3) | str_ptr_ind;
4144 sljit_emit_op_custom(compiler, instruction, 5);
4145 }
4146 instruction[1] = 0x0f;
4147 }
4148
4149#else
4150
4151instruction[2] = 0x6f;
4152instruction[3] = (0 << 3) | str_ptr_ind;
4153sljit_emit_op_custom(compiler, instruction, 4);
4154
4155if (load_twice)
4156 {
4157 instruction[3] = (1 << 3) | str_ptr_ind;
4158 sljit_emit_op_custom(compiler, instruction, 4);
4159 }
4160
4161#endif
4162
4163if (bit != 0)
4164 {
4165 /* POR xmm1, xmm2/m128 */
4166 instruction[2] = 0xeb;
4167 instruction[3] = 0xc0 | (0 << 3) | 3;
4168 sljit_emit_op_custom(compiler, instruction, 4);
4169 }
4170
4171/* PCMPEQB/W/D xmm1, xmm2/m128 */
4172instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4173instruction[3] = 0xc0 | (0 << 3) | 2;
4174sljit_emit_op_custom(compiler, instruction, 4);
4175
4176if (load_twice)
4177 {
4178 instruction[3] = 0xc0 | (1 << 3) | 3;
4179 sljit_emit_op_custom(compiler, instruction, 4);
4180 }
4181
4182/* PMOVMSKB reg, xmm */
4183instruction[2] = 0xd7;
4184instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4185sljit_emit_op_custom(compiler, instruction, 4);
4186
4187if (load_twice)
4188 {
4189 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4190 sljit_emit_op_custom(compiler, instruction, 4);
4191
4192 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4193 }
4194
4195/* BSF r32, r/m32 */
4196instruction[0] = 0x0f;
4197instruction[1] = 0xbc;
4198instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4199sljit_emit_op_custom(compiler, instruction, 3);
4200sljit_set_current_flags(compiler, SLJIT_SET_Z);
4201
4202JUMPTO(SLJIT_ZERO, start);
4203
4204OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4205
4206start = LABEL();
4207SET_LABEL(quit[0], start);
4208SET_LABEL(quit[1], start);
4209SET_LABEL(quit[2], start);
4210}
4211
4212#undef SSE2_COMPARE_TYPE_INDEX
4213
4214#endif
4215
4216static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4217{
4218DEFINE_COMPILER;
4219struct sljit_label *start;
4220struct sljit_jump *quit;
4221struct sljit_jump *found;
4222pcre_uchar mask;
4223#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4224struct sljit_label *utf_start = NULL;
4225struct sljit_jump *utf_quit = NULL;
4226#endif
4227BOOL has_match_end = (common->match_end_ptr != 0);
4228
4229if (offset > 0)
4230 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4231
4232if (has_match_end)
4233 {
4234 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4235
4236 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4237 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4238 sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4239 }
4240
4241#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4242if (common->utf && offset > 0)
4243 utf_start = LABEL();
4244#endif
4245
4246#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4247
4248/* SSE2 accelerated first character search. */
4249
4250if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
4251 {
4252 fast_forward_first_char2_sse2(common, char1, char2);
4253
4254 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4255 if (common->mode == JIT_COMPILE)
4256 {
4257 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4258 SLJIT_ASSERT(common->forced_quit_label == NULL);
4259 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4260 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4261
4262#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4263 if (common->utf && offset > 0)
4264 {
4265 SLJIT_ASSERT(common->mode == JIT_COMPILE);
4266
4267 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4268 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4269#if defined COMPILE_PCRE8
4270 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4271 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4272#elif defined COMPILE_PCRE16
4273 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4274 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4275#else
4276#error "Unknown code width"
4277#endif
4278 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279 }
4280#endif
4281
4282 if (offset > 0)
4283 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4284 }
4285 else
4286 {
4287 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4288 if (has_match_end)
4289 {
4290 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4291 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
4292 }
4293 else
4294 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
4295 }
4296
4297 if (has_match_end)
4298 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4299 return;
4300 }
4301
4302#endif
4303
4304quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4305
4306start = LABEL();
4307OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4308
4309if (char1 == char2)
4310 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4311else
4312 {
4313 mask = char1 ^ char2;
4314 if (is_powerof2(mask))
4315 {
4316 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4317 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4318 }
4319 else
4320 {
4321 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4322 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
4323 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4324 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
4325 found = JUMP(SLJIT_NOT_ZERO);
4326 }
4327 }
4328
4329OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4330CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4331
4332#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4333if (common->utf && offset > 0)
4334 utf_quit = JUMP(SLJIT_JUMP);
4335#endif
4336
4337JUMPHERE(found);
4338
4339#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4340if (common->utf && offset > 0)
4341 {
4342 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4343 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4344#if defined COMPILE_PCRE8
4345 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4346 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4347#elif defined COMPILE_PCRE16
4348 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4349 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4350#else
4351#error "Unknown code width"
4352#endif
4353 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4354 JUMPHERE(utf_quit);
4355 }
4356#endif
4357
4358JUMPHERE(quit);
4359
4360if (has_match_end)
4361 {
4362 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4363 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4364 if (offset > 0)
4365 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4366 JUMPHERE(quit);
4367 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4368 }
4369
4370if (offset > 0)
4371 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4372}
4373
4374static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4375{
4376DEFINE_COMPILER;
4377struct sljit_label *start;
4378struct sljit_jump *quit;
4379struct sljit_jump *match;
4380/* bytes[0] represent the number of characters between 0
4381and MAX_N_BYTES - 1, 255 represents any character. */
4382pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4383sljit_s32 offset;
4384pcre_uchar mask;
4385pcre_uchar *char_set, *char_set_end;
4386int i, max, from;
4387int range_right = -1, range_len;
4388sljit_u8 *update_table = NULL;
4389BOOL in_range;
4390sljit_u32 rec_count;
4391
4392for (i = 0; i < MAX_N_CHARS; i++)
4393 chars[i * MAX_DIFF_CHARS] = 0;
4394
4395rec_count = 10000;
4396max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4397
4398if (max < 1)
4399 return FALSE;
4400
4401in_range = FALSE;
4402/* Prevent compiler "uninitialized" warning */
4403from = 0;
4404range_len = 4 /* minimum length */ - 1;
4405for (i = 0; i <= max; i++)
4406 {
4407 if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4408 {
4409 range_len = i - from;
4410 range_right = i - 1;
4411 }
4412
4413 if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4414 {
4415 SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4416 if (!in_range)
4417 {
4418 in_range = TRUE;
4419 from = i;
4420 }
4421 }
4422 else
4423 in_range = FALSE;
4424 }
4425
4426if (range_right >= 0)
4427 {
4428 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4429 if (update_table == NULL)
4430 return TRUE;
4431 memset(update_table, IN_UCHARS(range_len), 256);
4432
4433 for (i = 0; i < range_len; i++)
4434 {
4435 char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4436 SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4437 char_set_end = char_set + char_set[0];
4438 char_set++;
4439 while (char_set <= char_set_end)
4440 {
4441 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4442 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4443 char_set++;
4444 }
4445 }
4446 }
4447
4448offset = -1;
4449/* Scan forward. */
4450for (i = 0; i < max; i++)
4451 {
4452 if (offset == -1)
4453 {
4454 if (chars[i * MAX_DIFF_CHARS] <= 2)
4455 offset = i;
4456 }
4457 else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4458 {
4459 if (chars[i * MAX_DIFF_CHARS] == 1)
4460 offset = i;
4461 else
4462 {
4463 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4464 if (!is_powerof2(mask))
4465 {
4466 mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4467 if (is_powerof2(mask))
4468 offset = i;
4469 }
4470 }
4471 }
4472 }
4473
4474if (range_right < 0)
4475 {
4476 if (offset < 0)
4477 return FALSE;
4478 SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4479 /* Works regardless the value is 1 or 2. */
4480 mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4481 fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4482 return TRUE;
4483 }
4484
4485if (range_right == offset)
4486 offset = -1;
4487
4488SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4489
4490max -= 1;
4491SLJIT_ASSERT(max > 0);
4492if (common->match_end_ptr != 0)
4493 {
4494 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4495 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4496 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4497 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4498 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4499 JUMPHERE(quit);
4500 }
4501else
4502 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4503
4504SLJIT_ASSERT(range_right >= 0);
4505
4506#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4507OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4508#endif
4509
4510start = LABEL();
4511quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4512
4513#if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4514OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4515#else
4516OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4517#endif
4518
4519#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4520OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4521#else
4522OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4523#endif
4524OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4525CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4526
4527if (offset >= 0)
4528 {
4529 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4530 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4531
4532 if (chars[offset * MAX_DIFF_CHARS] == 1)
4533 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4534 else
4535 {
4536 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4537 if (is_powerof2(mask))
4538 {
4539 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4540 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4541 }
4542 else
4543 {
4544 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4545 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4546 JUMPHERE(match);
4547 }
4548 }
4549 }
4550
4551#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4552if (common->utf && offset != 0)
4553 {
4554 if (offset < 0)
4555 {
4556 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4557 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4558 }
4559 else
4560 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4561#if defined COMPILE_PCRE8
4562 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4563 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4564#elif defined COMPILE_PCRE16
4565 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4566 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4567#else
4568#error "Unknown code width"
4569#endif
4570 if (offset < 0)
4571 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4572 }
4573#endif
4574
4575if (offset >= 0)
4576 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4577
4578JUMPHERE(quit);
4579
4580if (common->match_end_ptr != 0)
4581 {
4582 if (range_right >= 0)
4583 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4584 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4585 if (range_right >= 0)
4586 {
4587 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4588 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4589 JUMPHERE(quit);
4590 }
4591 }
4592else
4593 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4594return TRUE;
4595}
4596
4597#undef MAX_N_CHARS
4598#undef MAX_DIFF_CHARS
4599
4600static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4601{
4602pcre_uchar oc;
4603
4604oc = first_char;
4605if (caseless)
4606 {
4607 oc = TABLE_GET(first_char, common->fcc, first_char);
4608#if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4609 if (first_char > 127 && common->utf)
4610 oc = UCD_OTHERCASE(first_char);
4611#endif
4612 }
4613
4614fast_forward_first_char2(common, first_char, oc, 0);
4615}
4616
4617static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4618{
4619DEFINE_COMPILER;
4620struct sljit_label *loop;
4621struct sljit_jump *lastchar;
4622struct sljit_jump *firstchar;
4623struct sljit_jump *quit;
4624struct sljit_jump *foundcr = NULL;
4625struct sljit_jump *notfoundnl;
4626jump_list *newline = NULL;
4627
4628if (common->match_end_ptr != 0)
4629 {
4630 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4631 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4632 }
4633
4634if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4635 {
4636 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4637 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4638 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4639 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4640 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4641
4642 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4643 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4644 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
4645#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4646 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4647#endif
4648 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4649
4650 loop = LABEL();
4651 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4652 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4653 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4655 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4656 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4657
4658 JUMPHERE(quit);
4659 JUMPHERE(firstchar);
4660 JUMPHERE(lastchar);
4661
4662 if (common->match_end_ptr != 0)
4663 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4664 return;
4665 }
4666
4667OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4668OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4669firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4670skip_char_back(common);
4671
4672loop = LABEL();
4673common->ff_newline_shortcut = loop;
4674
4675read_char_range(common, common->nlmin, common->nlmax, TRUE);
4676lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4677if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4678 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4679check_newlinechar(common, common->nltype, &newline, FALSE);
4680set_jumps(newline, loop);
4681
4682if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4683 {
4684 quit = JUMP(SLJIT_JUMP);
4685 JUMPHERE(foundcr);
4686 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4687 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4688 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4689 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4690#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4691 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4692#endif
4693 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4694 JUMPHERE(notfoundnl);
4695 JUMPHERE(quit);
4696 }
4697JUMPHERE(lastchar);
4698JUMPHERE(firstchar);
4699
4700if (common->match_end_ptr != 0)
4701 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4702}
4703
4704static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4705
4706static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4707{
4708DEFINE_COMPILER;
4709struct sljit_label *start;
4710struct sljit_jump *quit;
4711struct sljit_jump *found = NULL;
4712jump_list *matches = NULL;
4713#ifndef COMPILE_PCRE8
4714struct sljit_jump *jump;
4715#endif
4716
4717if (common->match_end_ptr != 0)
4718 {
4719 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4720 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4721 }
4722
4723start = LABEL();
4724quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4725OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4726#ifdef SUPPORT_UTF
4727if (common->utf)
4728 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4729#endif
4730
4731if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4732 {
4733#ifndef COMPILE_PCRE8
4734 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4736 JUMPHERE(jump);
4737#endif
4738 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4739 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4740 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4741 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4742 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4743 found = JUMP(SLJIT_NOT_ZERO);
4744 }
4745
4746#ifdef SUPPORT_UTF
4747if (common->utf)
4748 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4749#endif
4750OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4751#ifdef SUPPORT_UTF
4752#if defined COMPILE_PCRE8
4753if (common->utf)
4754 {
4755 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4756 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4757 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4758 }
4759#elif defined COMPILE_PCRE16
4760if (common->utf)
4761 {
4762 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4763 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4764 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4765 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4766 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4767 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4768 }
4769#endif /* COMPILE_PCRE[8|16] */
4770#endif /* SUPPORT_UTF */
4771JUMPTO(SLJIT_JUMP, start);
4772if (found != NULL)
4773 JUMPHERE(found);
4774if (matches != NULL)
4775 set_jumps(matches, LABEL());
4776JUMPHERE(quit);
4777
4778if (common->match_end_ptr != 0)
4779 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4780}
4781
4782static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4783{
4784DEFINE_COMPILER;
4785struct sljit_label *loop;
4786struct sljit_jump *toolong;
4787struct sljit_jump *alreadyfound;
4788struct sljit_jump *found;
4789struct sljit_jump *foundoc = NULL;
4790struct sljit_jump *notfound;
4791sljit_u32 oc, bit;
4792
4793SLJIT_ASSERT(common->req_char_ptr != 0);
4794OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4795OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4796toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4797alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4798
4799if (has_firstchar)
4800 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4801else
4802 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4803
4804loop = LABEL();
4805notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4806
4807OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4808oc = req_char;
4809if (caseless)
4810 {
4811 oc = TABLE_GET(req_char, common->fcc, req_char);
4812#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4813 if (req_char > 127 && common->utf)
4814 oc = UCD_OTHERCASE(req_char);
4815#endif
4816 }
4817if (req_char == oc)
4818 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4819else
4820 {
4821 bit = req_char ^ oc;
4822 if (is_powerof2(bit))
4823 {
4824 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4825 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4826 }
4827 else
4828 {
4829 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4830 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4831 }
4832 }
4833OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4834JUMPTO(SLJIT_JUMP, loop);
4835
4836JUMPHERE(found);
4837if (foundoc)
4838 JUMPHERE(foundoc);
4839OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4840JUMPHERE(alreadyfound);
4841JUMPHERE(toolong);
4842return notfound;
4843}
4844
4845static void do_revertframes(compiler_common *common)
4846{
4847DEFINE_COMPILER;
4848struct sljit_jump *jump;
4849struct sljit_label *mainloop;
4850
4851sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4852OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0);
4853GET_LOCAL_BASE(TMP1, 0, 0);
4854
4855/* Drop frames until we reach STACK_TOP. */
4856mainloop = LABEL();
4857OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
4858jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4859
4860OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4861OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4862OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw));
4863OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4864JUMPTO(SLJIT_JUMP, mainloop);
4865
4866JUMPHERE(jump);
4867jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
4868/* End of reverting values. */
4869OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0);
4870sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4871
4872JUMPHERE(jump);
4873OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4874OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4875OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4876OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4877JUMPTO(SLJIT_JUMP, mainloop);
4878}
4879
4880static void check_wordboundary(compiler_common *common)
4881{
4882DEFINE_COMPILER;
4883struct sljit_jump *skipread;
4884jump_list *skipread_list = NULL;
4885#if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4886struct sljit_jump *jump;
4887#endif
4888
4889SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4890
4891sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4892/* Get type of the previous char, and put it to LOCALS1. */
4893OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4894OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4895OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4896skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4897skip_char_back(common);
4898check_start_used_ptr(common);
4899read_char(common);
4900
4901/* Testing char type. */
4902#ifdef SUPPORT_UCP
4903if (common->use_ucp)
4904 {
4905 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4906 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4907 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4908 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4909 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4910 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4911 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4912 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4913 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4914 JUMPHERE(jump);
4915 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4916 }
4917else
4918#endif
4919 {
4920#ifndef COMPILE_PCRE8
4921 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4922#elif defined SUPPORT_UTF
4923 /* Here LOCALS1 has already been zeroed. */
4924 jump = NULL;
4925 if (common->utf)
4926 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4927#endif /* COMPILE_PCRE8 */
4928 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4929 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4930 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4932#ifndef COMPILE_PCRE8
4933 JUMPHERE(jump);
4934#elif defined SUPPORT_UTF
4935 if (jump != NULL)
4936 JUMPHERE(jump);
4937#endif /* COMPILE_PCRE8 */
4938 }
4939JUMPHERE(skipread);
4940
4941OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4942check_str_end(common, &skipread_list);
4943peek_char(common, READ_CHAR_MAX);
4944
4945/* Testing char type. This is a code duplication. */
4946#ifdef SUPPORT_UCP
4947if (common->use_ucp)
4948 {
4949 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4950 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4951 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4952 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4953 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4954 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4955 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4956 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4957 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4958 JUMPHERE(jump);
4959 }
4960else
4961#endif
4962 {
4963#ifndef COMPILE_PCRE8
4964 /* TMP2 may be destroyed by peek_char. */
4965 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4966 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4967#elif defined SUPPORT_UTF
4968 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4969 jump = NULL;
4970 if (common->utf)
4971 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4972#endif
4973 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4974 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4975 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4976#ifndef COMPILE_PCRE8
4977 JUMPHERE(jump);
4978#elif defined SUPPORT_UTF
4979 if (jump != NULL)
4980 JUMPHERE(jump);
4981#endif /* COMPILE_PCRE8 */
4982 }
4983set_jumps(skipread_list, LABEL());
4984
4985OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4986sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4987}
4988
4989static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4990{
4991/* May destroy TMP1. */
4992DEFINE_COMPILER;
4993int ranges[MAX_RANGE_SIZE];
4994sljit_u8 bit, cbit, all;
4995int i, byte, length = 0;
4996
4997bit = bits[0] & 0x1;
4998/* All bits will be zero or one (since bit is zero or one). */
4999all = -bit;
5000
5001for (i = 0; i < 256; )
5002 {
5003 byte = i >> 3;
5004 if ((i & 0x7) == 0 && bits[byte] == all)
5005 i += 8;
5006 else
5007 {
5008 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
5009 if (cbit != bit)
5010 {
5011 if (length >= MAX_RANGE_SIZE)
5012 return FALSE;
5013 ranges[length] = i;
5014 length++;
5015 bit = cbit;
5016 all = -cbit;
5017 }
5018 i++;
5019 }
5020 }
5021
5022if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
5023 {
5024 if (length >= MAX_RANGE_SIZE)
5025 return FALSE;
5026 ranges[length] = 256;
5027 length++;
5028 }
5029
5030if (length < 0 || length > 4)
5031 return FALSE;
5032
5033bit = bits[0] & 0x1;
5034if (invert) bit ^= 0x1;
5035
5036/* No character is accepted. */
5037if (length == 0 && bit == 0)
5038 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5039
5040switch(length)
5041 {
5042 case 0:
5043 /* When bit != 0, all characters are accepted. */
5044 return TRUE;
5045
5046 case 1:
5047 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5048 return TRUE;
5049
5050 case 2:
5051 if (ranges[0] + 1 != ranges[1])
5052 {
5053 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5054 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5055 }
5056 else
5057 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5058 return TRUE;
5059
5060 case 3:
5061 if (bit != 0)
5062 {
5063 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5064 if (ranges[0] + 1 != ranges[1])
5065 {
5066 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5067 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5068 }
5069 else
5070 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5071 return TRUE;
5072 }
5073
5074 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5075 if (ranges[1] + 1 != ranges[2])
5076 {
5077 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5078 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5079 }
5080 else
5081 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5082 return TRUE;
5083
5084 case 4:
5085 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5086 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5087 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5088 && is_powerof2(ranges[2] - ranges[0]))
5089 {
5090 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5091 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5092 if (ranges[2] + 1 != ranges[3])
5093 {
5094 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5095 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5096 }
5097 else
5098 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5099 return TRUE;
5100 }
5101
5102 if (bit != 0)
5103 {
5104 i = 0;
5105 if (ranges[0] + 1 != ranges[1])
5106 {
5107 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5108 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5109 i = ranges[0];
5110 }
5111 else
5112 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5113
5114 if (ranges[2] + 1 != ranges[3])
5115 {
5116 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5117 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5118 }
5119 else
5120 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5121 return TRUE;
5122 }
5123
5124 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5125 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5126 if (ranges[1] + 1 != ranges[2])
5127 {
5128 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5129 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5130 }
5131 else
5132 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5133 return TRUE;
5134
5135 default:
5136 SLJIT_UNREACHABLE();
5137 return FALSE;
5138 }
5139}
5140
5141static void check_anynewline(compiler_common *common)
5142{
5143/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5144DEFINE_COMPILER;
5145
5146sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5147
5148OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5149OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5150OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5151OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5152#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5153#ifdef COMPILE_PCRE8
5154if (common->utf)
5155 {
5156#endif
5157 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5158 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5159 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5160#ifdef COMPILE_PCRE8
5161 }
5162#endif
5163#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5164OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5165sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5166}
5167
5168static void check_hspace(compiler_common *common)
5169{
5170/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5171DEFINE_COMPILER;
5172
5173sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5174
5175OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5176OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5177OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5178OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5179OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5180#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5181#ifdef COMPILE_PCRE8
5182if (common->utf)
5183 {
5184#endif
5185 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5186 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5187 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5188 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5189 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5190 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5191 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5192 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5193 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5194 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5195 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5196 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5197 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5198#ifdef COMPILE_PCRE8
5199 }
5200#endif
5201#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5202OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5203
5204sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5205}
5206
5207static void check_vspace(compiler_common *common)
5208{
5209/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5210DEFINE_COMPILER;
5211
5212sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5213
5214OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5215OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5216OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5217OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5218#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5219#ifdef COMPILE_PCRE8
5220if (common->utf)
5221 {
5222#endif
5223 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5224 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5225 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5226#ifdef COMPILE_PCRE8
5227 }
5228#endif
5229#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5230OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5231
5232sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5233}
5234
5235static void do_casefulcmp(compiler_common *common)
5236{
5237DEFINE_COMPILER;
5238struct sljit_jump *jump;
5239struct sljit_label *label;
5240int char1_reg;
5241int char2_reg;
5242
5243if (sljit_get_register_index(TMP3) < 0)
5244 {
5245 char1_reg = STR_END;
5246 char2_reg = STACK_TOP;
5247 }
5248else
5249 {
5250 char1_reg = TMP3;
5251 char2_reg = RETURN_ADDR;
5252 }
5253
5254sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5255OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5256
5257if (char1_reg == STR_END)
5258 {
5259 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
5260 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
5261 }
5262
5263if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5264 {
5265 label = LABEL();
5266 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5267 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5268 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5269 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5270 JUMPTO(SLJIT_NOT_ZERO, label);
5271
5272 JUMPHERE(jump);
5273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5274 }
5275else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5276 {
5277 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5278 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5279
5280 label = LABEL();
5281 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5282 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5283 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5284 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5285 JUMPTO(SLJIT_NOT_ZERO, label);
5286
5287 JUMPHERE(jump);
5288 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5289 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5290 }
5291else
5292 {
5293 label = LABEL();
5294 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5295 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5296 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5297 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5298 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5299 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5300 JUMPTO(SLJIT_NOT_ZERO, label);
5301
5302 JUMPHERE(jump);
5303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5304 }
5305
5306if (char1_reg == STR_END)
5307 {
5308 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
5309 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
5310 }
5311
5312sljit_emit_fast_return(compiler, TMP1, 0);
5313}
5314
5315static void do_caselesscmp(compiler_common *common)
5316{
5317DEFINE_COMPILER;
5318struct sljit_jump *jump;
5319struct sljit_label *label;
5320int char1_reg = STR_END;
5321int char2_reg;
5322int lcc_table;
5323int opt_type = 0;
5324
5325if (sljit_get_register_index(TMP3) < 0)
5326 {
5327 char2_reg = STACK_TOP;
5328 lcc_table = STACK_LIMIT;
5329 }
5330else
5331 {
5332 char2_reg = RETURN_ADDR;
5333 lcc_table = TMP3;
5334 }
5335
5336if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5337 opt_type = 1;
5338else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5339 opt_type = 2;
5340
5341sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5342OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5343
5344OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
5345
5346if (char2_reg == STACK_TOP)
5347 {
5348 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
5349 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
5350 }
5351
5352OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
5353
5354if (opt_type == 1)
5355 {
5356 label = LABEL();
5357 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5358 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5359 }
5360else if (opt_type == 2)
5361 {
5362 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5363 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5364
5365 label = LABEL();
5366 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5367 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5368 }
5369else
5370 {
5371 label = LABEL();
5372 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5373 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5374 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5375 }
5376
5377#ifndef COMPILE_PCRE8
5378jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
5379#endif
5380OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
5381#ifndef COMPILE_PCRE8
5382JUMPHERE(jump);
5383jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
5384#endif
5385OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
5386#ifndef COMPILE_PCRE8
5387JUMPHERE(jump);
5388#endif
5389
5390if (opt_type == 0)
5391 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5392
5393jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5394OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5395JUMPTO(SLJIT_NOT_ZERO, label);
5396
5397JUMPHERE(jump);
5398OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5399
5400if (opt_type == 2)
5401 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5402
5403if (char2_reg == STACK_TOP)
5404 {
5405 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
5406 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
5407 }
5408
5409OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5410sljit_emit_fast_return(compiler, TMP1, 0);
5411}
5412
5413#if defined SUPPORT_UTF && defined SUPPORT_UCP
5414
5415static const pcre_uchar * SLJIT_FUNC do_utf_caselesscmp(pcre_uchar *src1, pcre_uchar *src2, pcre_uchar *end1, pcre_uchar *end2)
5416{
5417/* This function would be ineffective to do in JIT level. */
5418sljit_u32 c1, c2;
5419const ucd_record *ur;
5420const sljit_u32 *pp;
5421
5422while (src1 < end1)
5423 {
5424 if (src2 >= end2)
5425 return (pcre_uchar*)1;
5426 GETCHARINC(c1, src1);
5427 GETCHARINC(c2, src2);
5428 ur = GET_UCD(c2);
5429 if (c1 != c2 && c1 != c2 + ur->other_case)
5430 {
5431 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5432 for (;;)
5433 {
5434 if (c1 < *pp) return NULL;
5435 if (c1 == *pp++) break;
5436 }
5437 }
5438 }
5439return src2;
5440}
5441
5442#endif /* SUPPORT_UTF && SUPPORT_UCP */
5443
5444static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5445 compare_context *context, jump_list **backtracks)
5446{
5447DEFINE_COMPILER;
5448unsigned int othercasebit = 0;
5449pcre_uchar *othercasechar = NULL;
5450#ifdef SUPPORT_UTF
5451int utflength;
5452#endif
5453
5454if (caseless && char_has_othercase(common, cc))
5455 {
5456 othercasebit = char_get_othercase_bit(common, cc);
5457 SLJIT_ASSERT(othercasebit);
5458 /* Extracting bit difference info. */
5459#if defined COMPILE_PCRE8
5460 othercasechar = cc + (othercasebit >> 8);
5461 othercasebit &= 0xff;
5462#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5463 /* Note that this code only handles characters in the BMP. If there
5464 ever are characters outside the BMP whose othercase differs in only one
5465 bit from itself (there currently are none), this code will need to be
5466 revised for COMPILE_PCRE32. */
5467 othercasechar = cc + (othercasebit >> 9);
5468 if ((othercasebit & 0x100) != 0)
5469 othercasebit = (othercasebit & 0xff) << 8;
5470 else
5471 othercasebit &= 0xff;
5472#endif /* COMPILE_PCRE[8|16|32] */
5473 }
5474
5475if (context->sourcereg == -1)
5476 {
5477#if defined COMPILE_PCRE8
5478#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5479 if (context->length >= 4)
5480 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5481 else if (context->length >= 2)
5482 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5483 else
5484#endif
5485 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5486#elif defined COMPILE_PCRE16
5487#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5488 if (context->length >= 4)
5489 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5490 else
5491#endif
5492 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5493#elif defined COMPILE_PCRE32
5494 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5495#endif /* COMPILE_PCRE[8|16|32] */
5496 context->sourcereg = TMP2;
5497 }
5498
5499#ifdef SUPPORT_UTF
5500utflength = 1;
5501if (common->utf && HAS_EXTRALEN(*cc))
5502 utflength += GET_EXTRALEN(*cc);
5503
5504do
5505 {
5506#endif
5507
5508 context->length -= IN_UCHARS(1);
5509#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5510
5511 /* Unaligned read is supported. */
5512 if (othercasebit != 0 && othercasechar == cc)
5513 {
5514 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5515 context->oc.asuchars[context->ucharptr] = othercasebit;
5516 }
5517 else
5518 {
5519 context->c.asuchars[context->ucharptr] = *cc;
5520 context->oc.asuchars[context->ucharptr] = 0;
5521 }
5522 context->ucharptr++;
5523
5524#if defined COMPILE_PCRE8
5525 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5526#else
5527 if (context->ucharptr >= 2 || context->length == 0)
5528#endif
5529 {
5530 if (context->length >= 4)
5531 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5532 else if (context->length >= 2)
5533 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5534#if defined COMPILE_PCRE8
5535 else if (context->length >= 1)
5536 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5537#endif /* COMPILE_PCRE8 */
5538 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5539
5540 switch(context->ucharptr)
5541 {
5542 case 4 / sizeof(pcre_uchar):
5543 if (context->oc.asint != 0)
5544 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5545 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5546 break;
5547
5548 case 2 / sizeof(pcre_uchar):
5549 if (context->oc.asushort != 0)
5550 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5551 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5552 break;
5553
5554#ifdef COMPILE_PCRE8
5555 case 1:
5556 if (context->oc.asbyte != 0)
5557 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5558 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5559 break;
5560#endif
5561
5562 default:
5563 SLJIT_UNREACHABLE();
5564 break;
5565 }
5566 context->ucharptr = 0;
5567 }
5568
5569#else
5570
5571 /* Unaligned read is unsupported or in 32 bit mode. */
5572 if (context->length >= 1)
5573 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5574
5575 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5576
5577 if (othercasebit != 0 && othercasechar == cc)
5578 {
5579 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5580 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5581 }
5582 else
5583 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5584
5585#endif
5586
5587 cc++;
5588#ifdef SUPPORT_UTF
5589 utflength--;
5590 }
5591while (utflength > 0);
5592#endif
5593
5594return cc;
5595}
5596
5597#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5598
5599#define SET_TYPE_OFFSET(value) \
5600 if ((value) != typeoffset) \
5601 { \
5602 if ((value) < typeoffset) \
5603 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5604 else \
5605 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5606 } \
5607 typeoffset = (value);
5608
5609#define SET_CHAR_OFFSET(value) \
5610 if ((value) != charoffset) \
5611 { \
5612 if ((value) < charoffset) \
5613 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5614 else \
5615 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5616 } \
5617 charoffset = (value);
5618
5619static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5620
5621static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5622{
5623DEFINE_COMPILER;
5624jump_list *found = NULL;
5625jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5626sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5627struct sljit_jump *jump = NULL;
5628pcre_uchar *ccbegin;
5629int compares, invertcmp, numberofcmps;
5630#if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5631BOOL utf = common->utf;
5632#endif
5633
5634#ifdef SUPPORT_UCP
5635BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5636BOOL charsaved = FALSE;
5637int typereg = TMP1;
5638const sljit_u32 *other_cases;
5639sljit_uw typeoffset;
5640#endif
5641
5642/* Scanning the necessary info. */
5643cc++;
5644ccbegin = cc;
5645compares = 0;
5646if (cc[-1] & XCL_MAP)
5647 {
5648 min = 0;
5649 cc += 32 / sizeof(pcre_uchar);
5650 }
5651
5652while (*cc != XCL_END)
5653 {
5654 compares++;
5655 if (*cc == XCL_SINGLE)
5656 {
5657 cc ++;
5658 GETCHARINCTEST(c, cc);
5659 if (c > max) max = c;
5660 if (c < min) min = c;
5661#ifdef SUPPORT_UCP
5662 needschar = TRUE;
5663#endif
5664 }
5665 else if (*cc == XCL_RANGE)
5666 {
5667 cc ++;
5668 GETCHARINCTEST(c, cc);
5669 if (c < min) min = c;
5670 GETCHARINCTEST(c, cc);
5671 if (c > max) max = c;
5672#ifdef SUPPORT_UCP
5673 needschar = TRUE;
5674#endif
5675 }
5676#ifdef SUPPORT_UCP
5677 else
5678 {
5679 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5680 cc++;
5681 if (*cc == PT_CLIST)
5682 {
5683 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5684 while (*other_cases != NOTACHAR)
5685 {
5686 if (*other_cases > max) max = *other_cases;
5687 if (*other_cases < min) min = *other_cases;
5688 other_cases++;
5689 }
5690 }
5691 else
5692 {
5693 max = READ_CHAR_MAX;
5694 min = 0;
5695 }
5696
5697 switch(*cc)
5698 {
5699 case PT_ANY:
5700 /* Any either accepts everything or ignored. */
5701 if (cc[-1] == XCL_PROP)
5702 {
5703 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5704 if (list == backtracks)
5705 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5706 return;
5707 }
5708 break;
5709
5710 case PT_LAMP:
5711 case PT_GC:
5712 case PT_PC:
5713 case PT_ALNUM:
5714 needstype = TRUE;
5715 break;
5716
5717 case PT_SC:
5718 needsscript = TRUE;
5719 break;
5720
5721 case PT_SPACE:
5722 case PT_PXSPACE:
5723 case PT_WORD:
5724 case PT_PXGRAPH:
5725 case PT_PXPRINT:
5726 case PT_PXPUNCT:
5727 needstype = TRUE;
5728 needschar = TRUE;
5729 break;
5730
5731 case PT_CLIST:
5732 case PT_UCNC:
5733 needschar = TRUE;
5734 break;
5735
5736 default:
5737 SLJIT_UNREACHABLE();
5738 break;
5739 }
5740 cc += 2;
5741 }
5742#endif
5743 }
5744SLJIT_ASSERT(compares > 0);
5745
5746/* We are not necessary in utf mode even in 8 bit mode. */
5747cc = ccbegin;
5748read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5749
5750if ((cc[-1] & XCL_HASPROP) == 0)
5751 {
5752 if ((cc[-1] & XCL_MAP) != 0)
5753 {
5754 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5755 if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5756 {
5757 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5758 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5759 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5760 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5761 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5762 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5763 }
5764
5765 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5766 JUMPHERE(jump);
5767
5768 cc += 32 / sizeof(pcre_uchar);
5769 }
5770 else
5771 {
5772 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5773 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5774 }
5775 }
5776else if ((cc[-1] & XCL_MAP) != 0)
5777 {
5778 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5779#ifdef SUPPORT_UCP
5780 charsaved = TRUE;
5781#endif
5782 if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5783 {
5784#ifdef COMPILE_PCRE8
5785 jump = NULL;
5786 if (common->utf)
5787#endif
5788 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5789
5790 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5791 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5792 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5793 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5794 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5795 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5796
5797#ifdef COMPILE_PCRE8
5798 if (common->utf)
5799#endif
5800 JUMPHERE(jump);
5801 }
5802
5803 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5804 cc += 32 / sizeof(pcre_uchar);
5805 }
5806
5807#ifdef SUPPORT_UCP
5808if (needstype || needsscript)
5809 {
5810 if (needschar && !charsaved)
5811 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5812
5813#ifdef COMPILE_PCRE32
5814 if (!common->utf)
5815 {
5816 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
5817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5818 JUMPHERE(jump);
5819 }
5820#endif
5821
5822 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5823 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5824 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5825 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5826 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5827 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5828 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5829
5830 /* Before anything else, we deal with scripts. */
5831 if (needsscript)
5832 {
5833 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5834 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5835
5836 ccbegin = cc;
5837
5838 while (*cc != XCL_END)
5839 {
5840 if (*cc == XCL_SINGLE)
5841 {
5842 cc ++;
5843 GETCHARINCTEST(c, cc);
5844 }
5845 else if (*cc == XCL_RANGE)
5846 {
5847 cc ++;
5848 GETCHARINCTEST(c, cc);
5849 GETCHARINCTEST(c, cc);
5850 }
5851 else
5852 {
5853 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5854 cc++;
5855 if (*cc == PT_SC)
5856 {
5857 compares--;
5858 invertcmp = (compares == 0 && list != backtracks);
5859 if (cc[-1] == XCL_NOTPROP)
5860 invertcmp ^= 0x1;
5861 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5862 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5863 }
5864 cc += 2;
5865 }
5866 }
5867
5868 cc = ccbegin;
5869 }
5870
5871 if (needschar)
5872 {
5873 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5874 }
5875
5876 if (needstype)
5877 {
5878 if (!needschar)
5879 {
5880 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5881 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5882 }
5883 else
5884 {
5885 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5886 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5887 typereg = RETURN_ADDR;
5888 }
5889 }
5890 }
5891#endif
5892
5893/* Generating code. */
5894charoffset = 0;
5895numberofcmps = 0;
5896#ifdef SUPPORT_UCP
5897typeoffset = 0;
5898#endif
5899
5900while (*cc != XCL_END)
5901 {
5902 compares--;
5903 invertcmp = (compares == 0 && list != backtracks);
5904 jump = NULL;
5905
5906 if (*cc == XCL_SINGLE)
5907 {
5908 cc ++;
5909 GETCHARINCTEST(c, cc);
5910
5911 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5912 {
5913 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5914 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5915 numberofcmps++;
5916 }
5917 else if (numberofcmps > 0)
5918 {
5919 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5920 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5921 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5922 numberofcmps = 0;
5923 }
5924 else
5925 {
5926 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5927 numberofcmps = 0;
5928 }
5929 }
5930 else if (*cc == XCL_RANGE)
5931 {
5932 cc ++;
5933 GETCHARINCTEST(c, cc);
5934 SET_CHAR_OFFSET(c);
5935 GETCHARINCTEST(c, cc);
5936
5937 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5938 {
5939 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5940 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5941 numberofcmps++;
5942 }
5943 else if (numberofcmps > 0)
5944 {
5945 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5946 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5947 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5948 numberofcmps = 0;
5949 }
5950 else
5951 {
5952 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5953 numberofcmps = 0;
5954 }
5955 }
5956#ifdef SUPPORT_UCP
5957 else
5958 {
5959 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5960 if (*cc == XCL_NOTPROP)
5961 invertcmp ^= 0x1;
5962 cc++;
5963 switch(*cc)
5964 {
5965 case PT_ANY:
5966 if (!invertcmp)
5967 jump = JUMP(SLJIT_JUMP);
5968 break;
5969
5970 case PT_LAMP:
5971 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5972 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5973 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5974 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5975 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5976 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5977 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5978 break;
5979
5980 case PT_GC:
5981 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5982 SET_TYPE_OFFSET(c);
5983 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5984 break;
5985
5986 case PT_PC:
5987 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5988 break;
5989
5990 case PT_SC:
5991 compares++;
5992 /* Do nothing. */
5993 break;
5994
5995 case PT_SPACE:
5996 case PT_PXSPACE:
5997 SET_CHAR_OFFSET(9);
5998 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5999 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6000
6001 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
6002 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6003
6004 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
6005 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6006
6007 SET_TYPE_OFFSET(ucp_Zl);
6008 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
6009 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6010 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6011 break;
6012
6013 case PT_WORD:
6014 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
6015 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6016 /* Fall through. */
6017
6018 case PT_ALNUM:
6019 SET_TYPE_OFFSET(ucp_Ll);
6020 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6021 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6022 SET_TYPE_OFFSET(ucp_Nd);
6023 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6024 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6025 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6026 break;
6027
6028 case PT_CLIST:
6029 other_cases = PRIV(ucd_caseless_sets) + cc[1];
6030
6031 /* At least three characters are required.
6032 Otherwise this case would be handled by the normal code path. */
6033 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
6034 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
6035
6036 /* Optimizing character pairs, if their difference is power of 2. */
6037 if (is_powerof2(other_cases[1] ^ other_cases[0]))
6038 {
6039 if (charoffset == 0)
6040 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6041 else
6042 {
6043 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6044 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6045 }
6046 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
6047 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6048 other_cases += 2;
6049 }
6050 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
6051 {
6052 if (charoffset == 0)
6053 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
6054 else
6055 {
6056 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6057 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6058 }
6059 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
6060 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6061
6062 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
6063 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6064
6065 other_cases += 3;
6066 }
6067 else
6068 {
6069 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6070 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6071 }
6072
6073 while (*other_cases != NOTACHAR)
6074 {
6075 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6076 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6077 }
6078 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6079 break;
6080
6081 case PT_UCNC:
6082 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
6083 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6084 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
6085 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6086 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
6087 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6088
6089 SET_CHAR_OFFSET(0xa0);
6090 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
6091 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6092 SET_CHAR_OFFSET(0);
6093 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
6094 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
6095 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6096 break;
6097
6098 case PT_PXGRAPH:
6099 /* C and Z groups are the farthest two groups. */
6100 SET_TYPE_OFFSET(ucp_Ll);
6101 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6102 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6103
6104 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6105
6106 /* In case of ucp_Cf, we overwrite the result. */
6107 SET_CHAR_OFFSET(0x2066);
6108 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6109 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6110
6111 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6112 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6113
6114 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
6115 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6116
6117 JUMPHERE(jump);
6118 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6119 break;
6120
6121 case PT_PXPRINT:
6122 /* C and Z groups are the farthest two groups. */
6123 SET_TYPE_OFFSET(ucp_Ll);
6124 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6125 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6126
6127 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
6128 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
6129
6130 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6131
6132 /* In case of ucp_Cf, we overwrite the result. */
6133 SET_CHAR_OFFSET(0x2066);
6134 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6135 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6136
6137 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6138 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6139
6140 JUMPHERE(jump);
6141 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6142 break;
6143
6144 case PT_PXPUNCT:
6145 SET_TYPE_OFFSET(ucp_Sc);
6146 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
6147 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6148
6149 SET_CHAR_OFFSET(0);
6150 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
6151 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
6152
6153 SET_TYPE_OFFSET(ucp_Pc);
6154 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
6155 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6156 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6157 break;
6158
6159 default:
6160 SLJIT_UNREACHABLE();
6161 break;
6162 }
6163 cc += 2;
6164 }
6165#endif
6166
6167 if (jump != NULL)
6168 add_jump(compiler, compares > 0 ? list : backtracks, jump);
6169 }
6170
6171if (found != NULL)
6172 set_jumps(found, LABEL());
6173}
6174
6175#undef SET_TYPE_OFFSET
6176#undef SET_CHAR_OFFSET
6177
6178#endif
6179
6180static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6181{
6182DEFINE_COMPILER;
6183int length;
6184struct sljit_jump *jump[4];
6185#ifdef SUPPORT_UTF
6186struct sljit_label *label;
6187#endif /* SUPPORT_UTF */
6188
6189switch(type)
6190 {
6191 case OP_SOD:
6192 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6193 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6194 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6195 return cc;
6196
6197 case OP_SOM:
6198 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6199 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6200 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6201 return cc;
6202
6203 case OP_NOT_WORD_BOUNDARY:
6204 case OP_WORD_BOUNDARY:
6205 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6206 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6207 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6208 return cc;
6209
6210 case OP_EODN:
6211 /* Requires rather complex checks. */
6212 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6213 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6214 {
6215 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6216 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6217 if (common->mode == JIT_COMPILE)
6218 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6219 else
6220 {
6221 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6222 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6223 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
6224 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6225 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
6226 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6227 check_partial(common, TRUE);
6228 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6229 JUMPHERE(jump[1]);
6230 }
6231 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6232 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6233 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6234 }
6235 else if (common->nltype == NLTYPE_FIXED)
6236 {
6237 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6238 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6239 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6240 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6241 }
6242 else
6243 {
6244 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6245 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6246 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6247 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6248 jump[2] = JUMP(SLJIT_GREATER);
6249 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
6250 /* Equal. */
6251 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6252 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6253 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6254
6255 JUMPHERE(jump[1]);
6256 if (common->nltype == NLTYPE_ANYCRLF)
6257 {
6258 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6259 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6260 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6261 }
6262 else
6263 {
6264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6265 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6266 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6267 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6268 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6269 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6270 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6271 }
6272 JUMPHERE(jump[2]);
6273 JUMPHERE(jump[3]);
6274 }
6275 JUMPHERE(jump[0]);
6276 check_partial(common, FALSE);
6277 return cc;
6278
6279 case OP_EOD:
6280 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6281 check_partial(common, FALSE);
6282 return cc;
6283
6284 case OP_DOLL:
6285 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6286 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6287 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6288
6289 if (!common->endonly)
6290 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6291 else
6292 {
6293 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6294 check_partial(common, FALSE);
6295 }
6296 return cc;
6297
6298 case OP_DOLLM:
6299 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6300 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6301 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6302 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6303 check_partial(common, FALSE);
6304 jump[0] = JUMP(SLJIT_JUMP);
6305 JUMPHERE(jump[1]);
6306
6307 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6308 {
6309 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6310 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6311 if (common->mode == JIT_COMPILE)
6312 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6313 else
6314 {
6315 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6316 /* STR_PTR = STR_END - IN_UCHARS(1) */
6317 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6318 check_partial(common, TRUE);
6319 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6320 JUMPHERE(jump[1]);
6321 }
6322
6323 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6324 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6325 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6326 }
6327 else
6328 {
6329 peek_char(common, common->nlmax);
6330 check_newlinechar(common, common->nltype, backtracks, FALSE);
6331 }
6332 JUMPHERE(jump[0]);
6333 return cc;
6334
6335 case OP_CIRC:
6336 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6337 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6338 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6339 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6340 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6341 return cc;
6342
6343 case OP_CIRCM:
6344 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6345 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6346 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6347 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6348 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6349 jump[0] = JUMP(SLJIT_JUMP);
6350 JUMPHERE(jump[1]);
6351
6352 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6353 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6354 {
6355 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6356 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6357 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6358 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6359 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6360 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6361 }
6362 else
6363 {
6364 skip_char_back(common);
6365 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6366 check_newlinechar(common, common->nltype, backtracks, FALSE);
6367 }
6368 JUMPHERE(jump[0]);
6369 return cc;
6370
6371 case OP_REVERSE:
6372 length = GET(cc, 0);
6373 if (length == 0)
6374 return cc + LINK_SIZE;
6375 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6376#ifdef SUPPORT_UTF
6377 if (common->utf)
6378 {
6379 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6380 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6381 label = LABEL();
6382 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6383 skip_char_back(common);
6384 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6385 JUMPTO(SLJIT_NOT_ZERO, label);
6386 }
6387 else
6388#endif
6389 {
6390 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6391 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6392 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6393 }
6394 check_start_used_ptr(common);
6395 return cc + LINK_SIZE;
6396 }
6397SLJIT_UNREACHABLE();
6398return cc;
6399}
6400
6401static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6402{
6403DEFINE_COMPILER;
6404int length;
6405unsigned int c, oc, bit;
6406compare_context context;
6407struct sljit_jump *jump[3];
6408jump_list *end_list;
6409#ifdef SUPPORT_UTF
6410struct sljit_label *label;
6411#ifdef SUPPORT_UCP
6412pcre_uchar propdata[5];
6413#endif
6414#endif /* SUPPORT_UTF */
6415
6416switch(type)
6417 {
6418 case OP_NOT_DIGIT:
6419 case OP_DIGIT:
6420 /* Digits are usually 0-9, so it is worth to optimize them. */
6421 if (check_str_ptr)
6422 detect_partial_match(common, backtracks);
6423#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6424 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6425 read_char7_type(common, type == OP_NOT_DIGIT);
6426 else
6427#endif
6428 read_char8_type(common, type == OP_NOT_DIGIT);
6429 /* Flip the starting bit in the negative case. */
6430 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6431 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6432 return cc;
6433
6434 case OP_NOT_WHITESPACE:
6435 case OP_WHITESPACE:
6436 if (check_str_ptr)
6437 detect_partial_match(common, backtracks);
6438#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6439 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6440 read_char7_type(common, type == OP_NOT_WHITESPACE);
6441 else
6442#endif
6443 read_char8_type(common, type == OP_NOT_WHITESPACE);
6444 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6445 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6446 return cc;
6447
6448 case OP_NOT_WORDCHAR:
6449 case OP_WORDCHAR:
6450 if (check_str_ptr)
6451 detect_partial_match(common, backtracks);
6452#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6453 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6454 read_char7_type(common, type == OP_NOT_WORDCHAR);
6455 else
6456#endif
6457 read_char8_type(common, type == OP_NOT_WORDCHAR);
6458 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6459 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6460 return cc;
6461
6462 case OP_ANY:
6463 if (check_str_ptr)
6464 detect_partial_match(common, backtracks);
6465 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6466 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6467 {
6468 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6469 end_list = NULL;
6470 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6471 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6472 else
6473 check_str_end(common, &end_list);
6474
6475 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6476 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6477 set_jumps(end_list, LABEL());
6478 JUMPHERE(jump[0]);
6479 }
6480 else
6481 check_newlinechar(common, common->nltype, backtracks, TRUE);
6482 return cc;
6483
6484 case OP_ALLANY:
6485 if (check_str_ptr)
6486 detect_partial_match(common, backtracks);
6487#ifdef SUPPORT_UTF
6488 if (common->utf)
6489 {
6490 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6491 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6492#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6493#if defined COMPILE_PCRE8
6494 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6495 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6496 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6497#elif defined COMPILE_PCRE16
6498 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6499 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6500 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6501 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6502 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6503 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6504#endif
6505 JUMPHERE(jump[0]);
6506#endif /* COMPILE_PCRE[8|16] */
6507 return cc;
6508 }
6509#endif
6510 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6511 return cc;
6512
6513 case OP_ANYBYTE:
6514 if (check_str_ptr)
6515 detect_partial_match(common, backtracks);
6516 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6517 return cc;
6518
6519#ifdef SUPPORT_UTF
6520#ifdef SUPPORT_UCP
6521 case OP_NOTPROP:
6522 case OP_PROP:
6523 propdata[0] = XCL_HASPROP;
6524 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6525 propdata[2] = cc[0];
6526 propdata[3] = cc[1];
6527 propdata[4] = XCL_END;
6528 if (check_str_ptr)
6529 detect_partial_match(common, backtracks);
6530 compile_xclass_matchingpath(common, propdata, backtracks);
6531 return cc + 2;
6532#endif
6533#endif
6534
6535 case OP_ANYNL:
6536 if (check_str_ptr)
6537 detect_partial_match(common, backtracks);
6538 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6539 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6540 /* We don't need to handle soft partial matching case. */
6541 end_list = NULL;
6542 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6543 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6544 else
6545 check_str_end(common, &end_list);
6546 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6547 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6548 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6549 jump[2] = JUMP(SLJIT_JUMP);
6550 JUMPHERE(jump[0]);
6551 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6552 set_jumps(end_list, LABEL());
6553 JUMPHERE(jump[1]);
6554 JUMPHERE(jump[2]);
6555 return cc;
6556
6557 case OP_NOT_HSPACE:
6558 case OP_HSPACE:
6559 if (check_str_ptr)
6560 detect_partial_match(common, backtracks);
6561 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6562 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6563 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6564 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6565 return cc;
6566
6567 case OP_NOT_VSPACE:
6568 case OP_VSPACE:
6569 if (check_str_ptr)
6570 detect_partial_match(common, backtracks);
6571 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6572 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6573 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6574 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6575 return cc;
6576
6577#ifdef SUPPORT_UCP
6578 case OP_EXTUNI:
6579 if (check_str_ptr)
6580 detect_partial_match(common, backtracks);
6581 read_char(common);
6582 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6583 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6584 /* Optimize register allocation: use a real register. */
6585 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6586 OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6587
6588 label = LABEL();
6589 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6590 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6591 read_char(common);
6592 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6593 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6594 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6595
6596 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6597 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6598 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6599 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6600 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6601 JUMPTO(SLJIT_NOT_ZERO, label);
6602
6603 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6604 JUMPHERE(jump[0]);
6605 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6606
6607 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6608 {
6609 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6610 /* Since we successfully read a char above, partial matching must occure. */
6611 check_partial(common, TRUE);
6612 JUMPHERE(jump[0]);
6613 }
6614 return cc;
6615#endif
6616
6617 case OP_CHAR:
6618 case OP_CHARI:
6619 length = 1;
6620#ifdef SUPPORT_UTF
6621 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6622#endif
6623 if (common->mode == JIT_COMPILE && check_str_ptr
6624 && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6625 {
6626 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6627 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6628
6629 context.length = IN_UCHARS(length);
6630 context.sourcereg = -1;
6631#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6632 context.ucharptr = 0;
6633#endif
6634 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6635 }
6636
6637 if (check_str_ptr)
6638 detect_partial_match(common, backtracks);
6639#ifdef SUPPORT_UTF
6640 if (common->utf)
6641 {
6642 GETCHAR(c, cc);
6643 }
6644 else
6645#endif
6646 c = *cc;
6647
6648 if (type == OP_CHAR || !char_has_othercase(common, cc))
6649 {
6650 read_char_range(common, c, c, FALSE);
6651 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6652 return cc + length;
6653 }
6654 oc = char_othercase(common, c);
6655 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6656 bit = c ^ oc;
6657 if (is_powerof2(bit))
6658 {
6659 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6660 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6661 return cc + length;
6662 }
6663 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6664 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6665 JUMPHERE(jump[0]);
6666 return cc + length;
6667
6668 case OP_NOT:
6669 case OP_NOTI:
6670 if (check_str_ptr)
6671 detect_partial_match(common, backtracks);
6672 length = 1;
6673#ifdef SUPPORT_UTF
6674 if (common->utf)
6675 {
6676#ifdef COMPILE_PCRE8
6677 c = *cc;
6678 if (c < 128)
6679 {
6680 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6681 if (type == OP_NOT || !char_has_othercase(common, cc))
6682 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6683 else
6684 {
6685 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6686 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6687 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6688 }
6689 /* Skip the variable-length character. */
6690 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6691 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6692 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6693 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6694 JUMPHERE(jump[0]);
6695 return cc + 1;
6696 }
6697 else
6698#endif /* COMPILE_PCRE8 */
6699 {
6700 GETCHARLEN(c, cc, length);
6701 }
6702 }
6703 else
6704#endif /* SUPPORT_UTF */
6705 c = *cc;
6706
6707 if (type == OP_NOT || !char_has_othercase(common, cc))
6708 {
6709 read_char_range(common, c, c, TRUE);
6710 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6711 }
6712 else
6713 {
6714 oc = char_othercase(common, c);
6715 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6716 bit = c ^ oc;
6717 if (is_powerof2(bit))
6718 {
6719 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6720 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6721 }
6722 else
6723 {
6724 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6725 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6726 }
6727 }
6728 return cc + length;
6729
6730 case OP_CLASS:
6731 case OP_NCLASS:
6732 if (check_str_ptr)
6733 detect_partial_match(common, backtracks);
6734
6735#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6736 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6737 read_char_range(common, 0, bit, type == OP_NCLASS);
6738#else
6739 read_char_range(common, 0, 255, type == OP_NCLASS);
6740#endif
6741
6742 if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6743 return cc + 32 / sizeof(pcre_uchar);
6744
6745#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6746 jump[0] = NULL;
6747 if (common->utf)
6748 {
6749 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6750 if (type == OP_CLASS)
6751 {
6752 add_jump(compiler, backtracks, jump[0]);
6753 jump[0] = NULL;
6754 }
6755 }
6756#elif !defined COMPILE_PCRE8
6757 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6758 if (type == OP_CLASS)
6759 {
6760 add_jump(compiler, backtracks, jump[0]);
6761 jump[0] = NULL;
6762 }
6763#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6764
6765 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6766 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6767 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6768 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6769 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6770 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6771
6772#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6773 if (jump[0] != NULL)
6774 JUMPHERE(jump[0]);
6775#endif
6776 return cc + 32 / sizeof(pcre_uchar);
6777
6778#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6779 case OP_XCLASS:
6780 if (check_str_ptr)
6781 detect_partial_match(common, backtracks);
6782 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6783 return cc + GET(cc, 0) - 1;
6784#endif
6785 }
6786SLJIT_UNREACHABLE();
6787return cc;
6788}
6789
6790static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6791{
6792/* This function consumes at least one input character. */
6793/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6794DEFINE_COMPILER;
6795pcre_uchar *ccbegin = cc;
6796compare_context context;
6797int size;
6798
6799context.length = 0;
6800do
6801 {
6802 if (cc >= ccend)
6803 break;
6804
6805 if (*cc == OP_CHAR)
6806 {
6807 size = 1;
6808#ifdef SUPPORT_UTF
6809 if (common->utf && HAS_EXTRALEN(cc[1]))
6810 size += GET_EXTRALEN(cc[1]);
6811#endif
6812 }
6813 else if (*cc == OP_CHARI)
6814 {
6815 size = 1;
6816#ifdef SUPPORT_UTF
6817 if (common->utf)
6818 {
6819 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6820 size = 0;
6821 else if (HAS_EXTRALEN(cc[1]))
6822 size += GET_EXTRALEN(cc[1]);
6823 }
6824 else
6825#endif
6826 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6827 size = 0;
6828 }
6829 else
6830 size = 0;
6831
6832 cc += 1 + size;
6833 context.length += IN_UCHARS(size);
6834 }
6835while (size > 0 && context.length <= 128);
6836
6837cc = ccbegin;
6838if (context.length > 0)
6839 {
6840 /* We have a fixed-length byte sequence. */
6841 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6842 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6843
6844 context.sourcereg = -1;
6845#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6846 context.ucharptr = 0;
6847#endif
6848 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6849 return cc;
6850 }
6851
6852/* A non-fixed length character will be checked if length == 0. */
6853return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6854}
6855
6856/* Forward definitions. */
6857static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6858static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6859
6860#define PUSH_BACKTRACK(size, ccstart, error) \
6861 do \
6862 { \
6863 backtrack = sljit_alloc_memory(compiler, (size)); \
6864 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6865 return error; \
6866 memset(backtrack, 0, size); \
6867 backtrack->prev = parent->top; \
6868 backtrack->cc = (ccstart); \
6869 parent->top = backtrack; \
6870 } \
6871 while (0)
6872
6873#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6874 do \
6875 { \
6876 backtrack = sljit_alloc_memory(compiler, (size)); \
6877 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6878 return; \
6879 memset(backtrack, 0, size); \
6880 backtrack->prev = parent->top; \
6881 backtrack->cc = (ccstart); \
6882 parent->top = backtrack; \
6883 } \
6884 while (0)
6885
6886#define BACKTRACK_AS(type) ((type *)backtrack)
6887
6888static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6889{
6890/* The OVECTOR offset goes to TMP2. */
6891DEFINE_COMPILER;
6892int count = GET2(cc, 1 + IMM2_SIZE);
6893pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6894unsigned int offset;
6895jump_list *found = NULL;
6896
6897SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6898
6899OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6900
6901count--;
6902while (count-- > 0)
6903 {
6904 offset = GET2(slot, 0) << 1;
6905 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6906 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6907 slot += common->name_entry_size;
6908 }
6909
6910offset = GET2(slot, 0) << 1;
6911GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6912if (backtracks != NULL && !common->jscript_compat)
6913 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6914
6915set_jumps(found, LABEL());
6916}
6917
6918static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6919{
6920DEFINE_COMPILER;
6921BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6922int offset = 0;
6923struct sljit_jump *jump = NULL;
6924struct sljit_jump *partial;
6925struct sljit_jump *nopartial;
6926
6927if (ref)
6928 {
6929 offset = GET2(cc, 1) << 1;
6930 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6931 /* OVECTOR(1) contains the "string begin - 1" constant. */
6932 if (withchecks && !common->jscript_compat)
6933 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6934 }
6935else
6936 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6937
6938#if defined SUPPORT_UTF && defined SUPPORT_UCP
6939if (common->utf && *cc == OP_REFI)
6940 {
6941 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1);
6942 if (ref)
6943 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6944 else
6945 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6946
6947 if (withchecks)
6948 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_R2, 0);
6949
6950 /* No free saved registers so save data on stack. */
6951 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6952 OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
6953 OP1(SLJIT_MOV, SLJIT_R3, 0, STR_END, 0);
6954 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6955 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6956 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6957
6958 if (common->mode == JIT_COMPILE)
6959 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6960 else
6961 {
6962 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6963
6964 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
6965
6966 nopartial = JUMP(SLJIT_NOT_EQUAL);
6967 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6968 check_partial(common, FALSE);
6969 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6970 JUMPHERE(nopartial);
6971 }
6972 }
6973else
6974#endif /* SUPPORT_UTF && SUPPORT_UCP */
6975 {
6976 if (ref)
6977 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6978 else
6979 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6980
6981 if (withchecks)
6982 jump = JUMP(SLJIT_ZERO);
6983
6984 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6985 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6986 if (common->mode == JIT_COMPILE)
6987 add_jump(compiler, backtracks, partial);
6988
6989 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6990 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6991
6992 if (common->mode != JIT_COMPILE)
6993 {
6994 nopartial = JUMP(SLJIT_JUMP);
6995 JUMPHERE(partial);
6996 /* TMP2 -= STR_END - STR_PTR */
6997 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6998 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6999 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
7000 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
7001 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
7002 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7003 JUMPHERE(partial);
7004 check_partial(common, FALSE);
7005 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7006 JUMPHERE(nopartial);
7007 }
7008 }
7009
7010if (jump != NULL)
7011 {
7012 if (emptyfail)
7013 add_jump(compiler, backtracks, jump);
7014 else
7015 JUMPHERE(jump);
7016 }
7017}
7018
7019static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7020{
7021DEFINE_COMPILER;
7022BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
7023backtrack_common *backtrack;
7024pcre_uchar type;
7025int offset = 0;
7026struct sljit_label *label;
7027struct sljit_jump *zerolength;
7028struct sljit_jump *jump = NULL;
7029pcre_uchar *ccbegin = cc;
7030int min = 0, max = 0;
7031BOOL minimize;
7032
7033PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
7034
7035if (ref)
7036 offset = GET2(cc, 1) << 1;
7037else
7038 cc += IMM2_SIZE;
7039type = cc[1 + IMM2_SIZE];
7040
7041SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
7042minimize = (type & 0x1) != 0;
7043switch(type)
7044 {
7045 case OP_CRSTAR:
7046 case OP_CRMINSTAR:
7047 min = 0;
7048 max = 0;
7049 cc += 1 + IMM2_SIZE + 1;
7050 break;
7051 case OP_CRPLUS:
7052 case OP_CRMINPLUS:
7053 min = 1;
7054 max = 0;
7055 cc += 1 + IMM2_SIZE + 1;
7056 break;
7057 case OP_CRQUERY:
7058 case OP_CRMINQUERY:
7059 min = 0;
7060 max = 1;
7061 cc += 1 + IMM2_SIZE + 1;
7062 break;
7063 case OP_CRRANGE:
7064 case OP_CRMINRANGE:
7065 min = GET2(cc, 1 + IMM2_SIZE + 1);
7066 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
7067 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
7068 break;
7069 default:
7070 SLJIT_UNREACHABLE();
7071 break;
7072 }
7073
7074if (!minimize)
7075 {
7076 if (min == 0)
7077 {
7078 allocate_stack(common, 2);
7079 if (ref)
7080 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7081 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7082 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7083 /* Temporary release of STR_PTR. */
7084 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7085 /* Handles both invalid and empty cases. Since the minimum repeat,
7086 is zero the invalid case is basically the same as an empty case. */
7087 if (ref)
7088 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7089 else
7090 {
7091 compile_dnref_search(common, ccbegin, NULL);
7092 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
7094 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7095 }
7096 /* Restore if not zero length. */
7097 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7098 }
7099 else
7100 {
7101 allocate_stack(common, 1);
7102 if (ref)
7103 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7104 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7105 if (ref)
7106 {
7107 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7108 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7109 }
7110 else
7111 {
7112 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7114 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
7115 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7116 }
7117 }
7118
7119 if (min > 1 || max > 1)
7120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
7121
7122 label = LABEL();
7123 if (!ref)
7124 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
7125 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
7126
7127 if (min > 1 || max > 1)
7128 {
7129 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
7130 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
7132 if (min > 1)
7133 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
7134 if (max > 1)
7135 {
7136 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7137 allocate_stack(common, 1);
7138 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7139 JUMPTO(SLJIT_JUMP, label);
7140 JUMPHERE(jump);
7141 }
7142 }
7143
7144 if (max == 0)
7145 {
7146 /* Includes min > 1 case as well. */
7147 allocate_stack(common, 1);
7148 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7149 JUMPTO(SLJIT_JUMP, label);
7150 }
7151
7152 JUMPHERE(zerolength);
7153 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7154
7155 count_match(common);
7156 return cc;
7157 }
7158
7159allocate_stack(common, ref ? 2 : 3);
7160if (ref)
7161 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7162OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7163if (type != OP_CRMINSTAR)
7164 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7165
7166if (min == 0)
7167 {
7168 /* Handles both invalid and empty cases. Since the minimum repeat,
7169 is zero the invalid case is basically the same as an empty case. */
7170 if (ref)
7171 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7172 else
7173 {
7174 compile_dnref_search(common, ccbegin, NULL);
7175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7176 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7177 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7178 }
7179 /* Length is non-zero, we can match real repeats. */
7180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7181 jump = JUMP(SLJIT_JUMP);
7182 }
7183else
7184 {
7185 if (ref)
7186 {
7187 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7188 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7189 }
7190 else
7191 {
7192 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7193 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7195 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7196 }
7197 }
7198
7199BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7200if (max > 0)
7201 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
7202
7203if (!ref)
7204 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
7205compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
7206OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7207
7208if (min > 1)
7209 {
7210 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7211 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7213 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
7214 }
7215else if (max > 0)
7216 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
7217
7218if (jump != NULL)
7219 JUMPHERE(jump);
7220JUMPHERE(zerolength);
7221
7222count_match(common);
7223return cc;
7224}
7225
7226static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7227{
7228DEFINE_COMPILER;
7229backtrack_common *backtrack;
7230recurse_entry *entry = common->entries;
7231recurse_entry *prev = NULL;
7232sljit_sw start = GET(cc, 1);
7233pcre_uchar *start_cc;
7234BOOL needs_control_head;
7235
7236PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
7237
7238/* Inlining simple patterns. */
7239if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
7240 {
7241 start_cc = common->start + start;
7242 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
7243 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
7244 return cc + 1 + LINK_SIZE;
7245 }
7246
7247while (entry != NULL)
7248 {
7249 if (entry->start == start)
7250 break;
7251 prev = entry;
7252 entry = entry->next;
7253 }
7254
7255if (entry == NULL)
7256 {
7257 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
7258 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7259 return NULL;
7260 entry->next = NULL;
7261 entry->entry = NULL;
7262 entry->calls = NULL;
7263 entry->start = start;
7264
7265 if (prev != NULL)
7266 prev->next = entry;
7267 else
7268 common->entries = entry;
7269 }
7270
7271if (common->has_set_som && common->mark_ptr != 0)
7272 {
7273 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7274 allocate_stack(common, 2);
7275 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
7276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7277 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7278 }
7279else if (common->has_set_som || common->mark_ptr != 0)
7280 {
7281 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
7282 allocate_stack(common, 1);
7283 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7284 }
7285
7286if (entry->entry == NULL)
7287 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
7288else
7289 JUMPTO(SLJIT_FAST_CALL, entry->entry);
7290/* Leave if the match is failed. */
7291add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
7292return cc + 1 + LINK_SIZE;
7293}
7294
7295static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
7296{
7297const pcre_uchar *begin = arguments->begin;
7298int *offset_vector = arguments->offsets;
7299int offset_count = arguments->offset_count;
7300int i;
7301
7302if (PUBL(callout) == NULL)
7303 return 0;
7304
7305callout_block->version = 2;
7306callout_block->callout_data = arguments->callout_data;
7307
7308/* Offsets in subject. */
7309callout_block->subject_length = arguments->end - arguments->begin;
7310callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
7311callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
7312#if defined COMPILE_PCRE8
7313callout_block->subject = (PCRE_SPTR)begin;
7314#elif defined COMPILE_PCRE16
7315callout_block->subject = (PCRE_SPTR16)begin;
7316#elif defined COMPILE_PCRE32
7317callout_block->subject = (PCRE_SPTR32)begin;
7318#endif
7319
7320/* Convert and copy the JIT offset vector to the offset_vector array. */
7321callout_block->capture_top = 0;
7322callout_block->offset_vector = offset_vector;
7323for (i = 2; i < offset_count; i += 2)
7324 {
7325 offset_vector[i] = jit_ovector[i] - begin;
7326 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
7327 if (jit_ovector[i] >= begin)
7328 callout_block->capture_top = i;
7329 }
7330
7331callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
7332if (offset_count > 0)
7333 offset_vector[0] = -1;
7334if (offset_count > 1)
7335 offset_vector[1] = -1;
7336return (*PUBL(callout))(callout_block);
7337}
7338
7339/* Aligning to 8 byte. */
7340#define CALLOUT_ARG_SIZE \
7341 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
7342
7343#define CALLOUT_ARG_OFFSET(arg) \
7344 SLJIT_OFFSETOF(PUBL(callout_block), arg)
7345
7346static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7347{
7348DEFINE_COMPILER;
7349backtrack_common *backtrack;
7350
7351PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7352
7353allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7354
7355SLJIT_ASSERT(common->capture_last_ptr != 0);
7356OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7357OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7358OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
7359OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
7360
7361/* These pointer sized fields temporarly stores internal variables. */
7362OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7363OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
7364OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
7365
7366if (common->mark_ptr != 0)
7367 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
7368OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
7369OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
7370OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
7371
7372/* Needed to save important temporary registers. */
7373OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
7374/* SLJIT_R0 = arguments */
7375OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
7376GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
7377sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
7378OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7379free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7380
7381/* Check return value. */
7382OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7383add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
7384if (common->forced_quit_label == NULL)
7385 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
7386else
7387 JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->forced_quit_label);
7388return cc + 2 + 2 * LINK_SIZE;
7389}
7390
7391#undef CALLOUT_ARG_SIZE
7392#undef CALLOUT_ARG_OFFSET
7393
7394static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
7395{
7396while (TRUE)
7397 {
7398 switch (*cc)
7399 {
7400 case OP_NOT_WORD_BOUNDARY:
7401 case OP_WORD_BOUNDARY:
7402 case OP_CIRC:
7403 case OP_CIRCM:
7404 case OP_DOLL:
7405 case OP_DOLLM:
7406 case OP_CALLOUT:
7407 case OP_ALT:
7408 cc += PRIV(OP_lengths)[*cc];
7409 break;
7410
7411 case OP_KET:
7412 return FALSE;
7413
7414 default:
7415 return TRUE;
7416 }
7417 }
7418}
7419
7420static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
7421{
7422DEFINE_COMPILER;
7423int framesize;
7424int extrasize;
7425BOOL needs_control_head;
7426int private_data_ptr;
7427backtrack_common altbacktrack;
7428pcre_uchar *ccbegin;
7429pcre_uchar opcode;
7430pcre_uchar bra = OP_BRA;
7431jump_list *tmp = NULL;
7432jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
7433jump_list **found;
7434/* Saving previous accept variables. */
7435BOOL save_local_exit = common->local_exit;
7436BOOL save_positive_assert = common->positive_assert;
7437then_trap_backtrack *save_then_trap = common->then_trap;
7438struct sljit_label *save_quit_label = common->quit_label;
7439struct sljit_label *save_accept_label = common->accept_label;
7440jump_list *save_quit = common->quit;
7441jump_list *save_positive_assert_quit = common->positive_assert_quit;
7442jump_list *save_accept = common->accept;
7443struct sljit_jump *jump;
7444struct sljit_jump *brajump = NULL;
7445
7446/* Assert captures then. */
7447common->then_trap = NULL;
7448
7449if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7450 {
7451 SLJIT_ASSERT(!conditional);
7452 bra = *cc;
7453 cc++;
7454 }
7455private_data_ptr = PRIVATE_DATA(cc);
7456SLJIT_ASSERT(private_data_ptr != 0);
7457framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7458backtrack->framesize = framesize;
7459backtrack->private_data_ptr = private_data_ptr;
7460opcode = *cc;
7461SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
7462found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
7463ccbegin = cc;
7464cc += GET(cc, 1);
7465
7466if (bra == OP_BRAMINZERO)
7467 {
7468 /* This is a braminzero backtrack path. */
7469 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7470 free_stack(common, 1);
7471 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7472 }
7473
7474if (framesize < 0)
7475 {
7476 extrasize = 1;
7477 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
7478 extrasize = 0;
7479
7480 if (needs_control_head)
7481 extrasize++;
7482
7483 if (framesize == no_frame)
7484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7485
7486 if (extrasize > 0)
7487 allocate_stack(common, extrasize);
7488
7489 if (needs_control_head)
7490 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7491
7492 if (extrasize > 0)
7493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7494
7495 if (needs_control_head)
7496 {
7497 SLJIT_ASSERT(extrasize == 2);
7498 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7499 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7500 }
7501 }
7502else
7503 {
7504 extrasize = needs_control_head ? 3 : 2;
7505 allocate_stack(common, framesize + extrasize);
7506
7507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7508 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7510 if (needs_control_head)
7511 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7512 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7513
7514 if (needs_control_head)
7515 {
7516 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7518 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7519 }
7520 else
7521 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7522
7523 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
7524 }
7525
7526memset(&altbacktrack, 0, sizeof(backtrack_common));
7527if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7528 {
7529 /* Negative assert is stronger than positive assert. */
7530 common->local_exit = TRUE;
7531 common->quit_label = NULL;
7532 common->quit = NULL;
7533 common->positive_assert = FALSE;
7534 }
7535else
7536 common->positive_assert = TRUE;
7537common->positive_assert_quit = NULL;
7538
7539while (1)
7540 {
7541 common->accept_label = NULL;
7542 common->accept = NULL;
7543 altbacktrack.top = NULL;
7544 altbacktrack.topbacktracks = NULL;
7545
7546 if (*ccbegin == OP_ALT && extrasize > 0)
7547 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7548
7549 altbacktrack.cc = ccbegin;
7550 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
7551 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7552 {
7553 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7554 {
7555 common->local_exit = save_local_exit;
7556 common->quit_label = save_quit_label;
7557 common->quit = save_quit;
7558 }
7559 common->positive_assert = save_positive_assert;
7560 common->then_trap = save_then_trap;
7561 common->accept_label = save_accept_label;
7562 common->positive_assert_quit = save_positive_assert_quit;
7563 common->accept = save_accept;
7564 return NULL;
7565 }
7566 common->accept_label = LABEL();
7567 if (common->accept != NULL)
7568 set_jumps(common->accept, common->accept_label);
7569
7570 /* Reset stack. */
7571 if (framesize < 0)
7572 {
7573 if (framesize == no_frame)
7574 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7575 else if (extrasize > 0)
7576 free_stack(common, extrasize);
7577
7578 if (needs_control_head)
7579 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7580 }
7581 else
7582 {
7583 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
7584 {
7585 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7586 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7587 if (needs_control_head)
7588 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7589 }
7590 else
7591 {
7592 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7593 if (needs_control_head)
7594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
7595 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7596 }
7597 }
7598
7599 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7600 {
7601 /* We know that STR_PTR was stored on the top of the stack. */
7602 if (conditional)
7603 {
7604 if (extrasize > 0)
7605 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
7606 }
7607 else if (bra == OP_BRAZERO)
7608 {
7609 if (framesize < 0)
7610 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7611 else
7612 {
7613 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7614 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
7615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7616 }
7617 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7619 }
7620 else if (framesize >= 0)
7621 {
7622 /* For OP_BRA and OP_BRAMINZERO. */
7623 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7624 }
7625 }
7626 add_jump(compiler, found, JUMP(SLJIT_JUMP));
7627
7628 compile_backtrackingpath(common, altbacktrack.top);
7629 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7630 {
7631 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7632 {
7633 common->local_exit = save_local_exit;
7634 common->quit_label = save_quit_label;
7635 common->quit = save_quit;
7636 }
7637 common->positive_assert = save_positive_assert;
7638 common->then_trap = save_then_trap;
7639 common->accept_label = save_accept_label;
7640 common->positive_assert_quit = save_positive_assert_quit;
7641 common->accept = save_accept;
7642 return NULL;
7643 }
7644 set_jumps(altbacktrack.topbacktracks, LABEL());
7645
7646 if (*cc != OP_ALT)
7647 break;
7648
7649 ccbegin = cc;
7650 cc += GET(cc, 1);
7651 }
7652
7653if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7654 {
7655 SLJIT_ASSERT(common->positive_assert_quit == NULL);
7656 /* Makes the check less complicated below. */
7657 common->positive_assert_quit = common->quit;
7658 }
7659
7660/* None of them matched. */
7661if (common->positive_assert_quit != NULL)
7662 {
7663 jump = JUMP(SLJIT_JUMP);
7664 set_jumps(common->positive_assert_quit, LABEL());
7665 SLJIT_ASSERT(framesize != no_stack);
7666 if (framesize < 0)
7667 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
7668 else
7669 {
7670 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7671 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7672 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7673 }
7674 JUMPHERE(jump);
7675 }
7676
7677if (needs_control_head)
7678 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
7679
7680if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
7681 {
7682 /* Assert is failed. */
7683 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
7684 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7685
7686 if (framesize < 0)
7687 {
7688 /* The topmost item should be 0. */
7689 if (bra == OP_BRAZERO)
7690 {
7691 if (extrasize == 2)
7692 free_stack(common, 1);
7693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7694 }
7695 else if (extrasize > 0)
7696 free_stack(common, extrasize);
7697 }
7698 else
7699 {
7700 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7701 /* The topmost item should be 0. */
7702 if (bra == OP_BRAZERO)
7703 {
7704 free_stack(common, framesize + extrasize - 1);
7705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7706 }
7707 else
7708 free_stack(common, framesize + extrasize);
7709 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7710 }
7711 jump = JUMP(SLJIT_JUMP);
7712 if (bra != OP_BRAZERO)
7713 add_jump(compiler, target, jump);
7714
7715 /* Assert is successful. */
7716 set_jumps(tmp, LABEL());
7717 if (framesize < 0)
7718 {
7719 /* We know that STR_PTR was stored on the top of the stack. */
7720 if (extrasize > 0)
7721 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7722
7723 /* Keep the STR_PTR on the top of the stack. */
7724 if (bra == OP_BRAZERO)
7725 {
7726 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7727 if (extrasize == 2)
7728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7729 }
7730 else if (bra == OP_BRAMINZERO)
7731 {
7732 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7733 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7734 }
7735 }
7736 else
7737 {
7738 if (bra == OP_BRA)
7739 {
7740 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7741 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7742 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
7743 }
7744 else
7745 {
7746 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7747 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
7748 if (extrasize == 2)
7749 {
7750 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7751 if (bra == OP_BRAMINZERO)
7752 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7753 }
7754 else
7755 {
7756 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
7757 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
7758 }
7759 }
7760 }
7761
7762 if (bra == OP_BRAZERO)
7763 {
7764 backtrack->matchingpath = LABEL();
7765 SET_LABEL(jump, backtrack->matchingpath);
7766 }
7767 else if (bra == OP_BRAMINZERO)
7768 {
7769 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7770 JUMPHERE(brajump);
7771 if (framesize >= 0)
7772 {
7773 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7774 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7775 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7776 }
7777 set_jumps(backtrack->common.topbacktracks, LABEL());
7778 }
7779 }
7780else
7781 {
7782 /* AssertNot is successful. */
7783 if (framesize < 0)
7784 {
7785 if (extrasize > 0)
7786 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7787
7788 if (bra != OP_BRA)
7789 {
7790 if (extrasize == 2)
7791 free_stack(common, 1);
7792 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7793 }
7794 else if (extrasize > 0)
7795 free_stack(common, extrasize);
7796 }
7797 else
7798 {
7799 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7800 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7801 /* The topmost item should be 0. */
7802 if (bra != OP_BRA)
7803 {
7804 free_stack(common, framesize + extrasize - 1);
7805 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7806 }
7807 else
7808 free_stack(common, framesize + extrasize);
7809 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7810 }
7811
7812 if (bra == OP_BRAZERO)
7813 backtrack->matchingpath = LABEL();
7814 else if (bra == OP_BRAMINZERO)
7815 {
7816 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7817 JUMPHERE(brajump);
7818 }
7819
7820 if (bra != OP_BRA)
7821 {
7822 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
7823 set_jumps(backtrack->common.topbacktracks, LABEL());
7824 backtrack->common.topbacktracks = NULL;
7825 }
7826 }
7827
7828if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7829 {
7830 common->local_exit = save_local_exit;
7831 common->quit_label = save_quit_label;
7832 common->quit = save_quit;
7833 }
7834common->positive_assert = save_positive_assert;
7835common->then_trap = save_then_trap;
7836common->accept_label = save_accept_label;
7837common->positive_assert_quit = save_positive_assert_quit;
7838common->accept = save_accept;
7839return cc + 1 + LINK_SIZE;
7840}
7841
7842static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
7843{
7844DEFINE_COMPILER;
7845int stacksize;
7846
7847if (framesize < 0)
7848 {
7849 if (framesize == no_frame)
7850 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7851 else
7852 {
7853 stacksize = needs_control_head ? 1 : 0;
7854 if (ket != OP_KET || has_alternatives)
7855 stacksize++;
7856
7857 if (stacksize > 0)
7858 free_stack(common, stacksize);
7859 }
7860
7861 if (needs_control_head)
7862 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
7863
7864 /* TMP2 which is set here used by OP_KETRMAX below. */
7865 if (ket == OP_KETRMAX)
7866 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7867 else if (ket == OP_KETRMIN)
7868 {
7869 /* Move the STR_PTR to the private_data_ptr. */
7870 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7871 }
7872 }
7873else
7874 {
7875 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
7876 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
7877 if (needs_control_head)
7878 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7879
7880 if (ket == OP_KETRMAX)
7881 {
7882 /* TMP2 which is set here used by OP_KETRMAX below. */
7883 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7884 }
7885 }
7886if (needs_control_head)
7887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
7888}
7889
7890static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
7891{
7892DEFINE_COMPILER;
7893
7894if (common->capture_last_ptr != 0)
7895 {
7896 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7897 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7898 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7899 stacksize++;
7900 }
7901if (common->optimized_cbracket[offset >> 1] == 0)
7902 {
7903 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7904 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7906 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7908 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7910 stacksize += 2;
7911 }
7912return stacksize;
7913}
7914
7915/*
7916 Handling bracketed expressions is probably the most complex part.
7917
7918 Stack layout naming characters:
7919 S - Push the current STR_PTR
7920 0 - Push a 0 (NULL)
7921 A - Push the current STR_PTR. Needed for restoring the STR_PTR
7922 before the next alternative. Not pushed if there are no alternatives.
7923 M - Any values pushed by the current alternative. Can be empty, or anything.
7924 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
7925 L - Push the previous local (pointed by localptr) to the stack
7926 () - opional values stored on the stack
7927 ()* - optonal, can be stored multiple times
7928
7929 The following list shows the regular expression templates, their PCRE byte codes
7930 and stack layout supported by pcre-sljit.
7931
7932 (?:) OP_BRA | OP_KET A M
7933 () OP_CBRA | OP_KET C M
7934 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
7935 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
7936 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
7937 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
7938 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
7939 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
7940 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
7941 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
7942 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
7943 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
7944 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
7945 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
7946 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
7947 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
7948 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
7949 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
7950 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
7951 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
7952 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
7953 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
7954
7955
7956 Stack layout naming characters:
7957 A - Push the alternative index (starting from 0) on the stack.
7958 Not pushed if there is no alternatives.
7959 M - Any values pushed by the current alternative. Can be empty, or anything.
7960
7961 The next list shows the possible content of a bracket:
7962 (|) OP_*BRA | OP_ALT ... M A
7963 (?()|) OP_*COND | OP_ALT M A
7964 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
7965 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
7966 Or nothing, if trace is unnecessary
7967*/
7968
7969static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7970{
7971DEFINE_COMPILER;
7972backtrack_common *backtrack;
7973pcre_uchar opcode;
7974int private_data_ptr = 0;
7975int offset = 0;
7976int i, stacksize;
7977int repeat_ptr = 0, repeat_length = 0;
7978int repeat_type = 0, repeat_count = 0;
7979pcre_uchar *ccbegin;
7980pcre_uchar *matchingpath;
7981pcre_uchar *slot;
7982pcre_uchar bra = OP_BRA;
7983pcre_uchar ket;
7984assert_backtrack *assert;
7985BOOL has_alternatives;
7986BOOL needs_control_head = FALSE;
7987struct sljit_jump *jump;
7988struct sljit_jump *skip;
7989struct sljit_label *rmax_label = NULL;
7990struct sljit_jump *braminzero = NULL;
7991
7992PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7993
7994if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7995 {
7996 bra = *cc;
7997 cc++;
7998 opcode = *cc;
7999 }
8000
8001opcode = *cc;
8002ccbegin = cc;
8003matchingpath = bracketend(cc) - 1 - LINK_SIZE;
8004ket = *matchingpath;
8005if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
8006 {
8007 repeat_ptr = PRIVATE_DATA(matchingpath);
8008 repeat_length = PRIVATE_DATA(matchingpath + 1);
8009 repeat_type = PRIVATE_DATA(matchingpath + 2);
8010 repeat_count = PRIVATE_DATA(matchingpath + 3);
8011 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
8012 if (repeat_type == OP_UPTO)
8013 ket = OP_KETRMAX;
8014 if (repeat_type == OP_MINUPTO)
8015 ket = OP_KETRMIN;
8016 }
8017
8018if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
8019 {
8020 /* Drop this bracket_backtrack. */
8021 parent->top = backtrack->prev;
8022 return matchingpath + 1 + LINK_SIZE + repeat_length;
8023 }
8024
8025matchingpath = ccbegin + 1 + LINK_SIZE;
8026SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
8027SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
8028cc += GET(cc, 1);
8029
8030has_alternatives = *cc == OP_ALT;
8031if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
8032 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
8033
8034if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8035 opcode = OP_SCOND;
8036if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8037 opcode = OP_ONCE;
8038
8039if (opcode == OP_CBRA || opcode == OP_SCBRA)
8040 {
8041 /* Capturing brackets has a pre-allocated space. */
8042 offset = GET2(ccbegin, 1 + LINK_SIZE);
8043 if (common->optimized_cbracket[offset] == 0)
8044 {
8045 private_data_ptr = OVECTOR_PRIV(offset);
8046 offset <<= 1;
8047 }
8048 else
8049 {
8050 offset <<= 1;
8051 private_data_ptr = OVECTOR(offset);
8052 }
8053 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
8054 matchingpath += IMM2_SIZE;
8055 }
8056else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
8057 {
8058 /* Other brackets simply allocate the next entry. */
8059 private_data_ptr = PRIVATE_DATA(ccbegin);
8060 SLJIT_ASSERT(private_data_ptr != 0);
8061 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
8062 if (opcode == OP_ONCE)
8063 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
8064 }
8065
8066/* Instructions before the first alternative. */
8067stacksize = 0;
8068if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
8069 stacksize++;
8070if (bra == OP_BRAZERO)
8071 stacksize++;
8072
8073if (stacksize > 0)
8074 allocate_stack(common, stacksize);
8075
8076stacksize = 0;
8077if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
8078 {
8079 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8080 stacksize++;
8081 }
8082
8083if (bra == OP_BRAZERO)
8084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8085
8086if (bra == OP_BRAMINZERO)
8087 {
8088 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
8089 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8090 if (ket != OP_KETRMIN)
8091 {
8092 free_stack(common, 1);
8093 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8094 }
8095 else
8096 {
8097 if (opcode == OP_ONCE || opcode >= OP_SBRA)
8098 {
8099 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8100 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8101 /* Nothing stored during the first run. */
8102 skip = JUMP(SLJIT_JUMP);
8103 JUMPHERE(jump);
8104 /* Checking zero-length iteration. */
8105 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
8106 {
8107 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
8108 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8109 }
8110 else
8111 {
8112 /* Except when the whole stack frame must be saved. */
8113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8114 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
8115 }
8116 JUMPHERE(skip);
8117 }
8118 else
8119 {
8120 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8121 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8122 JUMPHERE(jump);
8123 }
8124 }
8125 }
8126
8127if (repeat_type != 0)
8128 {
8129 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
8130 if (repeat_type == OP_EXACT)
8131 rmax_label = LABEL();
8132 }
8133
8134if (ket == OP_KETRMIN)
8135 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8136
8137if (ket == OP_KETRMAX)
8138 {
8139 rmax_label = LABEL();
8140 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
8141 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
8142 }
8143
8144/* Handling capturing brackets and alternatives. */
8145if (opcode == OP_ONCE)
8146 {
8147 stacksize = 0;
8148 if (needs_control_head)
8149 {
8150 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8151 stacksize++;
8152 }
8153
8154 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
8155 {
8156 /* Neither capturing brackets nor recursions are found in the block. */
8157 if (ket == OP_KETRMIN)
8158 {
8159 stacksize += 2;
8160 if (!needs_control_head)
8161 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8162 }
8163 else
8164 {
8165 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8166 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8167 if (ket == OP_KETRMAX || has_alternatives)
8168 stacksize++;
8169 }
8170
8171 if (stacksize > 0)
8172 allocate_stack(common, stacksize);
8173
8174 stacksize = 0;
8175 if (needs_control_head)
8176 {
8177 stacksize++;
8178 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8179 }
8180
8181 if (ket == OP_KETRMIN)
8182 {
8183 if (needs_control_head)
8184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8186 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8187 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
8188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8189 }
8190 else if (ket == OP_KETRMAX || has_alternatives)
8191 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8192 }
8193 else
8194 {
8195 if (ket != OP_KET || has_alternatives)
8196 stacksize++;
8197
8198 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
8199 allocate_stack(common, stacksize);
8200
8201 if (needs_control_head)
8202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8203
8204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8205 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8206
8207 stacksize = needs_control_head ? 1 : 0;
8208 if (ket != OP_KET || has_alternatives)
8209 {
8210 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8211 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8212 stacksize++;
8213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8214 }
8215 else
8216 {
8217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8219 }
8220 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
8221 }
8222 }
8223else if (opcode == OP_CBRA || opcode == OP_SCBRA)
8224 {
8225 /* Saving the previous values. */
8226 if (common->optimized_cbracket[offset >> 1] != 0)
8227 {
8228 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
8229 allocate_stack(common, 2);
8230 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8231 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
8232 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8234 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8235 }
8236 else
8237 {
8238 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8239 allocate_stack(common, 1);
8240 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8242 }
8243 }
8244else if (opcode == OP_SBRA || opcode == OP_SCOND)
8245 {
8246 /* Saving the previous value. */
8247 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8248 allocate_stack(common, 1);
8249 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8250 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8251 }
8252else if (has_alternatives)
8253 {
8254 /* Pushing the starting string pointer. */
8255 allocate_stack(common, 1);
8256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8257 }
8258
8259/* Generating code for the first alternative. */
8260if (opcode == OP_COND || opcode == OP_SCOND)
8261 {
8262 if (*matchingpath == OP_CREF)
8263 {
8264 SLJIT_ASSERT(has_alternatives);
8265 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
8266 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8267 matchingpath += 1 + IMM2_SIZE;
8268 }
8269 else if (*matchingpath == OP_DNCREF)
8270 {
8271 SLJIT_ASSERT(has_alternatives);
8272
8273 i = GET2(matchingpath, 1 + IMM2_SIZE);
8274 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8275 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8276 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8277 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8278 slot += common->name_entry_size;
8279 i--;
8280 while (i-- > 0)
8281 {
8282 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8283 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
8284 slot += common->name_entry_size;
8285 }
8286 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8287 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
8288 matchingpath += 1 + 2 * IMM2_SIZE;
8289 }
8290 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
8291 {
8292 /* Never has other case. */
8293 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
8294 SLJIT_ASSERT(!has_alternatives);
8295
8296 if (*matchingpath == OP_FAIL)
8297 stacksize = 0;
8298 else if (*matchingpath == OP_RREF)
8299 {
8300 stacksize = GET2(matchingpath, 1);
8301 if (common->currententry == NULL)
8302 stacksize = 0;
8303 else if (stacksize == RREF_ANY)
8304 stacksize = 1;
8305 else if (common->currententry->start == 0)
8306 stacksize = stacksize == 0;
8307 else
8308 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8309
8310 if (stacksize != 0)
8311 matchingpath += 1 + IMM2_SIZE;
8312 }
8313 else
8314 {
8315 if (common->currententry == NULL || common->currententry->start == 0)
8316 stacksize = 0;
8317 else
8318 {
8319 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
8320 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8321 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8322 while (stacksize > 0)
8323 {
8324 if ((int)GET2(slot, 0) == i)
8325 break;
8326 slot += common->name_entry_size;
8327 stacksize--;
8328 }
8329 }
8330
8331 if (stacksize != 0)
8332 matchingpath += 1 + 2 * IMM2_SIZE;
8333 }
8334
8335 /* The stacksize == 0 is a common "else" case. */
8336 if (stacksize == 0)
8337 {
8338 if (*cc == OP_ALT)
8339 {
8340 matchingpath = cc + 1 + LINK_SIZE;
8341 cc += GET(cc, 1);
8342 }
8343 else
8344 matchingpath = cc;
8345 }
8346 }
8347 else
8348 {
8349 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
8350 /* Similar code as PUSH_BACKTRACK macro. */
8351 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
8352 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8353 return NULL;
8354 memset(assert, 0, sizeof(assert_backtrack));
8355 assert->common.cc = matchingpath;
8356 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
8357 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
8358 }
8359 }
8360
8361compile_matchingpath(common, matchingpath, cc, backtrack);
8362if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8363 return NULL;
8364
8365if (opcode == OP_ONCE)
8366 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
8367
8368stacksize = 0;
8369if (repeat_type == OP_MINUPTO)
8370 {
8371 /* We need to preserve the counter. TMP2 will be used below. */
8372 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8373 stacksize++;
8374 }
8375if (ket != OP_KET || bra != OP_BRA)
8376 stacksize++;
8377if (offset != 0)
8378 {
8379 if (common->capture_last_ptr != 0)
8380 stacksize++;
8381 if (common->optimized_cbracket[offset >> 1] == 0)
8382 stacksize += 2;
8383 }
8384if (has_alternatives && opcode != OP_ONCE)
8385 stacksize++;
8386
8387if (stacksize > 0)
8388 allocate_stack(common, stacksize);
8389
8390stacksize = 0;
8391if (repeat_type == OP_MINUPTO)
8392 {
8393 /* TMP2 was set above. */
8394 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8395 stacksize++;
8396 }
8397
8398if (ket != OP_KET || bra != OP_BRA)
8399 {
8400 if (ket != OP_KET)
8401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8402 else
8403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8404 stacksize++;
8405 }
8406
8407if (offset != 0)
8408 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8409
8410if (has_alternatives)
8411 {
8412 if (opcode != OP_ONCE)
8413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8414 if (ket != OP_KETRMAX)
8415 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8416 }
8417
8418/* Must be after the matchingpath label. */
8419if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
8420 {
8421 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
8422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8423 }
8424
8425if (ket == OP_KETRMAX)
8426 {
8427 if (repeat_type != 0)
8428 {
8429 if (has_alternatives)
8430 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8431 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8432 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8433 /* Drop STR_PTR for greedy plus quantifier. */
8434 if (opcode != OP_ONCE)
8435 free_stack(common, 1);
8436 }
8437 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
8438 {
8439 if (has_alternatives)
8440 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8441 /* Checking zero-length iteration. */
8442 if (opcode != OP_ONCE)
8443 {
8444 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
8445 /* Drop STR_PTR for greedy plus quantifier. */
8446 if (bra != OP_BRAZERO)
8447 free_stack(common, 1);
8448 }
8449 else
8450 /* TMP2 must contain the starting STR_PTR. */
8451 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
8452 }
8453 else
8454 JUMPTO(SLJIT_JUMP, rmax_label);
8455 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8456 }
8457
8458if (repeat_type == OP_EXACT)
8459 {
8460 count_match(common);
8461 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8462 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8463 }
8464else if (repeat_type == OP_UPTO)
8465 {
8466 /* We need to preserve the counter. */
8467 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8468 allocate_stack(common, 1);
8469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8470 }
8471
8472if (bra == OP_BRAZERO)
8473 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
8474
8475if (bra == OP_BRAMINZERO)
8476 {
8477 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
8478 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
8479 if (braminzero != NULL)
8480 {
8481 JUMPHERE(braminzero);
8482 /* We need to release the end pointer to perform the
8483 backtrack for the zero-length iteration. When
8484 framesize is < 0, OP_ONCE will do the release itself. */
8485 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
8486 {
8487 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8488 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8489 }
8490 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
8491 free_stack(common, 1);
8492 }
8493 /* Continue to the normal backtrack. */
8494 }
8495
8496if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
8497 count_match(common);
8498
8499/* Skip the other alternatives. */
8500while (*cc == OP_ALT)
8501 cc += GET(cc, 1);
8502cc += 1 + LINK_SIZE;
8503
8504if (opcode == OP_ONCE)
8505 {
8506 /* We temporarily encode the needs_control_head in the lowest bit.
8507 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
8508 the same value for small signed numbers (including negative numbers). */
8509 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
8510 }
8511return cc + repeat_length;
8512}
8513
8514static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8515{
8516DEFINE_COMPILER;
8517backtrack_common *backtrack;
8518pcre_uchar opcode;
8519int private_data_ptr;
8520int cbraprivptr = 0;
8521BOOL needs_control_head;
8522int framesize;
8523int stacksize;
8524int offset = 0;
8525BOOL zero = FALSE;
8526pcre_uchar *ccbegin = NULL;
8527int stack; /* Also contains the offset of control head. */
8528struct sljit_label *loop = NULL;
8529struct jump_list *emptymatch = NULL;
8530
8531PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
8532if (*cc == OP_BRAPOSZERO)
8533 {
8534 zero = TRUE;
8535 cc++;
8536 }
8537
8538opcode = *cc;
8539private_data_ptr = PRIVATE_DATA(cc);
8540SLJIT_ASSERT(private_data_ptr != 0);
8541BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
8542switch(opcode)
8543 {
8544 case OP_BRAPOS:
8545 case OP_SBRAPOS:
8546 ccbegin = cc + 1 + LINK_SIZE;
8547 break;
8548
8549 case OP_CBRAPOS:
8550 case OP_SCBRAPOS:
8551 offset = GET2(cc, 1 + LINK_SIZE);
8552 /* This case cannot be optimized in the same was as
8553 normal capturing brackets. */
8554 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
8555 cbraprivptr = OVECTOR_PRIV(offset);
8556 offset <<= 1;
8557 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
8558 break;
8559
8560 default:
8561 SLJIT_UNREACHABLE();
8562 break;
8563 }
8564
8565framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8566BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
8567if (framesize < 0)
8568 {
8569 if (offset != 0)
8570 {
8571 stacksize = 2;
8572 if (common->capture_last_ptr != 0)
8573 stacksize++;
8574 }
8575 else
8576 stacksize = 1;
8577
8578 if (needs_control_head)
8579 stacksize++;
8580 if (!zero)
8581 stacksize++;
8582
8583 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8584 allocate_stack(common, stacksize);
8585 if (framesize == no_frame)
8586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8587
8588 stack = 0;
8589 if (offset != 0)
8590 {
8591 stack = 2;
8592 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8593 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8594 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8595 if (common->capture_last_ptr != 0)
8596 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8597 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8598 if (needs_control_head)
8599 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8600 if (common->capture_last_ptr != 0)
8601 {
8602 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8603 stack = 3;
8604 }
8605 }
8606 else
8607 {
8608 if (needs_control_head)
8609 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8611 stack = 1;
8612 }
8613
8614 if (needs_control_head)
8615 stack++;
8616 if (!zero)
8617 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
8618 if (needs_control_head)
8619 {
8620 stack--;
8621 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8622 }
8623 }
8624else
8625 {
8626 stacksize = framesize + 1;
8627 if (!zero)
8628 stacksize++;
8629 if (needs_control_head)
8630 stacksize++;
8631 if (offset == 0)
8632 stacksize++;
8633 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8634
8635 allocate_stack(common, stacksize);
8636 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8637 if (needs_control_head)
8638 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8639 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8640
8641 stack = 0;
8642 if (!zero)
8643 {
8644 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
8645 stack = 1;
8646 }
8647 if (needs_control_head)
8648 {
8649 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8650 stack++;
8651 }
8652 if (offset == 0)
8653 {
8654 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
8655 stack++;
8656 }
8657 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
8658 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
8659 stack -= 1 + (offset == 0);
8660 }
8661
8662if (offset != 0)
8663 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8664
8665loop = LABEL();
8666while (*cc != OP_KETRPOS)
8667 {
8668 backtrack->top = NULL;
8669 backtrack->topbacktracks = NULL;
8670 cc += GET(cc, 1);
8671
8672 compile_matchingpath(common, ccbegin, cc, backtrack);
8673 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8674 return NULL;
8675
8676 if (framesize < 0)
8677 {
8678 if (framesize == no_frame)
8679 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8680
8681 if (offset != 0)
8682 {
8683 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8684 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8685 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8686 if (common->capture_last_ptr != 0)
8687 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8688 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8689 }
8690 else
8691 {
8692 if (opcode == OP_SBRAPOS)
8693 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8694 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8695 }
8696
8697 /* Even if the match is empty, we need to reset the control head. */
8698 if (needs_control_head)
8699 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8700
8701 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8702 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8703
8704 if (!zero)
8705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8706 }
8707 else
8708 {
8709 if (offset != 0)
8710 {
8711 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8712 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8713 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8714 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8715 if (common->capture_last_ptr != 0)
8716 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8718 }
8719 else
8720 {
8721 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8722 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8723 if (opcode == OP_SBRAPOS)
8724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8725 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
8726 }
8727
8728 /* Even if the match is empty, we need to reset the control head. */
8729 if (needs_control_head)
8730 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8731
8732 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8733 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8734
8735 if (!zero)
8736 {
8737 if (framesize < 0)
8738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8739 else
8740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8741 }
8742 }
8743
8744 JUMPTO(SLJIT_JUMP, loop);
8745 flush_stubs(common);
8746
8747 compile_backtrackingpath(common, backtrack->top);
8748 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8749 return NULL;
8750 set_jumps(backtrack->topbacktracks, LABEL());
8751
8752 if (framesize < 0)
8753 {
8754 if (offset != 0)
8755 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8756 else
8757 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8758 }
8759 else
8760 {
8761 if (offset != 0)
8762 {
8763 /* Last alternative. */
8764 if (*cc == OP_KETRPOS)
8765 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8766 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8767 }
8768 else
8769 {
8770 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8771 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8772 }
8773 }
8774
8775 if (*cc == OP_KETRPOS)
8776 break;
8777 ccbegin = cc + 1 + LINK_SIZE;
8778 }
8779
8780/* We don't have to restore the control head in case of a failed match. */
8781
8782backtrack->topbacktracks = NULL;
8783if (!zero)
8784 {
8785 if (framesize < 0)
8786 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
8787 else /* TMP2 is set to [private_data_ptr] above. */
8788 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
8789 }
8790
8791/* None of them matched. */
8792set_jumps(emptymatch, LABEL());
8793count_match(common);
8794return cc + 1 + LINK_SIZE;
8795}
8796
8797static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end)
8798{
8799int class_len;
8800
8801*opcode = *cc;
8802*exact = 0;
8803
8804if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
8805 {
8806 cc++;
8807 *type = OP_CHAR;
8808 }
8809else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
8810 {
8811 cc++;
8812 *type = OP_CHARI;
8813 *opcode -= OP_STARI - OP_STAR;
8814 }
8815else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
8816 {
8817 cc++;
8818 *type = OP_NOT;
8819 *opcode -= OP_NOTSTAR - OP_STAR;
8820 }
8821else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
8822 {
8823 cc++;
8824 *type = OP_NOTI;
8825 *opcode -= OP_NOTSTARI - OP_STAR;
8826 }
8827else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
8828 {
8829 cc++;
8830 *opcode -= OP_TYPESTAR - OP_STAR;
8831 *type = OP_END;
8832 }
8833else
8834 {
8835 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
8836 *type = *opcode;
8837 cc++;
8838 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
8839 *opcode = cc[class_len - 1];
8840
8841 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
8842 {
8843 *opcode -= OP_CRSTAR - OP_STAR;
8844 *end = cc + class_len;
8845
8846 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
8847 {
8848 *exact = 1;
8849 *opcode -= OP_PLUS - OP_STAR;
8850 }
8851 }
8852 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
8853 {
8854 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
8855 *end = cc + class_len;
8856
8857 if (*opcode == OP_POSPLUS)
8858 {
8859 *exact = 1;
8860 *opcode = OP_POSSTAR;
8861 }
8862 }
8863 else
8864 {
8865 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
8866 *max = GET2(cc, (class_len + IMM2_SIZE));
8867 *exact = GET2(cc, class_len);
8868
8869 if (*max == 0)
8870 {
8871 if (*opcode == OP_CRPOSRANGE)
8872 *opcode = OP_POSSTAR;
8873 else
8874 *opcode -= OP_CRRANGE - OP_STAR;
8875 }
8876 else
8877 {
8878 *max -= *exact;
8879 if (*max == 0)
8880 *opcode = OP_EXACT;
8881 else if (*max == 1)
8882 {
8883 if (*opcode == OP_CRPOSRANGE)
8884 *opcode = OP_POSQUERY;
8885 else
8886 *opcode -= OP_CRRANGE - OP_QUERY;
8887 }
8888 else
8889 {
8890 if (*opcode == OP_CRPOSRANGE)
8891 *opcode = OP_POSUPTO;
8892 else
8893 *opcode -= OP_CRRANGE - OP_UPTO;
8894 }
8895 }
8896 *end = cc + class_len + 2 * IMM2_SIZE;
8897 }
8898 return cc;
8899 }
8900
8901switch(*opcode)
8902 {
8903 case OP_EXACT:
8904 *exact = GET2(cc, 0);
8905 cc += IMM2_SIZE;
8906 break;
8907
8908 case OP_PLUS:
8909 case OP_MINPLUS:
8910 *exact = 1;
8911 *opcode -= OP_PLUS - OP_STAR;
8912 break;
8913
8914 case OP_POSPLUS:
8915 *exact = 1;
8916 *opcode = OP_POSSTAR;
8917 break;
8918
8919 case OP_UPTO:
8920 case OP_MINUPTO:
8921 case OP_POSUPTO:
8922 *max = GET2(cc, 0);
8923 cc += IMM2_SIZE;
8924 break;
8925 }
8926
8927if (*type == OP_END)
8928 {
8929 *type = *cc;
8930 *end = next_opcode(common, cc);
8931 cc++;
8932 return cc;
8933 }
8934
8935*end = cc + 1;
8936#ifdef SUPPORT_UTF
8937if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
8938#endif
8939return cc;
8940}
8941
8942static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8943{
8944DEFINE_COMPILER;
8945backtrack_common *backtrack;
8946pcre_uchar opcode;
8947pcre_uchar type;
8948sljit_u32 max = 0, exact;
8949BOOL fast_fail;
8950sljit_s32 fast_str_ptr;
8951BOOL charpos_enabled;
8952pcre_uchar charpos_char;
8953unsigned int charpos_othercasebit;
8954pcre_uchar *end;
8955jump_list *no_match = NULL;
8956jump_list *no_char1_match = NULL;
8957struct sljit_jump *jump = NULL;
8958struct sljit_label *label;
8959int private_data_ptr = PRIVATE_DATA(cc);
8960int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8961int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8962int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8963int tmp_base, tmp_offset;
8964
8965PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
8966
8967fast_str_ptr = PRIVATE_DATA(cc + 1);
8968fast_fail = TRUE;
8969
8970SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
8971
8972if (cc == common->fast_forward_bc_ptr)
8973 fast_fail = FALSE;
8974else if (common->fast_fail_start_ptr == 0)
8975 fast_str_ptr = 0;
8976
8977SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
8978 || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
8979
8980cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
8981
8982if (type != OP_EXTUNI)
8983 {
8984 tmp_base = TMP3;
8985 tmp_offset = 0;
8986 }
8987else
8988 {
8989 tmp_base = SLJIT_MEM1(SLJIT_SP);
8990 tmp_offset = POSSESSIVE0;
8991 }
8992
8993if (fast_fail && fast_str_ptr != 0)
8994 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
8995
8996/* Handle fixed part first. */
8997if (exact > 1)
8998 {
8999 SLJIT_ASSERT(fast_str_ptr == 0);
9000 if (common->mode == JIT_COMPILE
9001#ifdef SUPPORT_UTF
9002 && !common->utf
9003#endif
9004 && type != OP_ANYNL && type != OP_EXTUNI)
9005 {
9006 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
9007 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
9008 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9009 label = LABEL();
9010 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
9011 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9012 JUMPTO(SLJIT_NOT_ZERO, label);
9013 }
9014 else
9015 {
9016 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9017 label = LABEL();
9018 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9019 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9020 JUMPTO(SLJIT_NOT_ZERO, label);
9021 }
9022 }
9023else if (exact == 1)
9024 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9025
9026switch(opcode)
9027 {
9028 case OP_STAR:
9029 case OP_UPTO:
9030 SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
9031
9032 if (type == OP_ANYNL || type == OP_EXTUNI)
9033 {
9034 SLJIT_ASSERT(private_data_ptr == 0);
9035 SLJIT_ASSERT(fast_str_ptr == 0);
9036
9037 allocate_stack(common, 2);
9038 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9040
9041 if (opcode == OP_UPTO)
9042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
9043
9044 label = LABEL();
9045 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9046 if (opcode == OP_UPTO)
9047 {
9048 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9049 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9050 jump = JUMP(SLJIT_ZERO);
9051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9052 }
9053
9054 /* We cannot use TMP3 because of this allocate_stack. */
9055 allocate_stack(common, 1);
9056 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9057 JUMPTO(SLJIT_JUMP, label);
9058 if (jump != NULL)
9059 JUMPHERE(jump);
9060 }
9061 else
9062 {
9063 charpos_enabled = FALSE;
9064 charpos_char = 0;
9065 charpos_othercasebit = 0;
9066
9067 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
9068 {
9069 charpos_enabled = TRUE;
9070#ifdef SUPPORT_UTF
9071 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
9072#endif
9073 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
9074 {
9075 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
9076 if (charpos_othercasebit == 0)
9077 charpos_enabled = FALSE;
9078 }
9079
9080 if (charpos_enabled)
9081 {
9082 charpos_char = end[1];
9083 /* Consumpe the OP_CHAR opcode. */
9084 end += 2;
9085#if defined COMPILE_PCRE8
9086 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
9087#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9088 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
9089 if ((charpos_othercasebit & 0x100) != 0)
9090 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
9091#endif
9092 if (charpos_othercasebit != 0)
9093 charpos_char |= charpos_othercasebit;
9094
9095 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
9096 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
9097 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
9098 }
9099 }
9100
9101 if (charpos_enabled)
9102 {
9103 if (opcode == OP_UPTO)
9104 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
9105
9106 /* Search the first instance of charpos_char. */
9107 jump = JUMP(SLJIT_JUMP);
9108 label = LABEL();
9109 if (opcode == OP_UPTO)
9110 {
9111 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9112 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
9113 }
9114 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
9115 if (fast_str_ptr != 0)
9116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9117 JUMPHERE(jump);
9118
9119 detect_partial_match(common, &backtrack->topbacktracks);
9120 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
9121 if (charpos_othercasebit != 0)
9122 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
9123 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
9124
9125 if (private_data_ptr == 0)
9126 allocate_stack(common, 2);
9127 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9128 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9129 if (opcode == OP_UPTO)
9130 {
9131 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9132 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
9133 }
9134
9135 /* Search the last instance of charpos_char. */
9136 label = LABEL();
9137 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
9138 if (fast_str_ptr != 0)
9139 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9140 detect_partial_match(common, &no_match);
9141 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
9142 if (charpos_othercasebit != 0)
9143 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
9144 if (opcode == OP_STAR)
9145 {
9146 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
9147 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9148 }
9149 else
9150 {
9151 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
9152 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9153 JUMPHERE(jump);
9154 }
9155
9156 if (opcode == OP_UPTO)
9157 {
9158 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9159 JUMPTO(SLJIT_NOT_ZERO, label);
9160 }
9161 else
9162 JUMPTO(SLJIT_JUMP, label);
9163
9164 set_jumps(no_match, LABEL());
9165 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9166 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9167 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9168 }
9169#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9170 else if (common->utf)
9171 {
9172 if (private_data_ptr == 0)
9173 allocate_stack(common, 2);
9174
9175 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9176 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9177
9178 if (opcode == OP_UPTO)
9179 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9180
9181 label = LABEL();
9182 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9183 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9184
9185 if (opcode == OP_UPTO)
9186 {
9187 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9188 JUMPTO(SLJIT_NOT_ZERO, label);
9189 }
9190 else
9191 JUMPTO(SLJIT_JUMP, label);
9192
9193 set_jumps(no_match, LABEL());
9194 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9195 if (fast_str_ptr != 0)
9196 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9197 }
9198#endif
9199 else
9200 {
9201 if (private_data_ptr == 0)
9202 allocate_stack(common, 2);
9203
9204 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9205 if (opcode == OP_UPTO)
9206 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9207
9208 label = LABEL();
9209 detect_partial_match(common, &no_match);
9210 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9211 if (opcode == OP_UPTO)
9212 {
9213 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9214 JUMPTO(SLJIT_NOT_ZERO, label);
9215 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9216 }
9217 else
9218 JUMPTO(SLJIT_JUMP, label);
9219
9220 set_jumps(no_char1_match, LABEL());
9221 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9222 set_jumps(no_match, LABEL());
9223 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9224 if (fast_str_ptr != 0)
9225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9226 }
9227 }
9228 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9229 break;
9230
9231 case OP_MINSTAR:
9232 if (private_data_ptr == 0)
9233 allocate_stack(common, 1);
9234 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9235 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9236 if (fast_str_ptr != 0)
9237 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9238 break;
9239
9240 case OP_MINUPTO:
9241 SLJIT_ASSERT(fast_str_ptr == 0);
9242 if (private_data_ptr == 0)
9243 allocate_stack(common, 2);
9244 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9245 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
9246 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9247 break;
9248
9249 case OP_QUERY:
9250 case OP_MINQUERY:
9251 SLJIT_ASSERT(fast_str_ptr == 0);
9252 if (private_data_ptr == 0)
9253 allocate_stack(common, 1);
9254 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9255 if (opcode == OP_QUERY)
9256 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9257 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9258 break;
9259
9260 case OP_EXACT:
9261 break;
9262
9263 case OP_POSSTAR:
9264#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9265 if (common->utf)
9266 {
9267 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9268 label = LABEL();
9269 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9270 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9271 JUMPTO(SLJIT_JUMP, label);
9272 set_jumps(no_match, LABEL());
9273 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9274 if (fast_str_ptr != 0)
9275 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9276 break;
9277 }
9278#endif
9279 label = LABEL();
9280 detect_partial_match(common, &no_match);
9281 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9282 JUMPTO(SLJIT_JUMP, label);
9283 set_jumps(no_char1_match, LABEL());
9284 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9285 set_jumps(no_match, LABEL());
9286 if (fast_str_ptr != 0)
9287 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9288 break;
9289
9290 case OP_POSUPTO:
9291 SLJIT_ASSERT(fast_str_ptr == 0);
9292#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9293 if (common->utf)
9294 {
9295 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9296 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9297 label = LABEL();
9298 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9299 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9300 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9301 JUMPTO(SLJIT_NOT_ZERO, label);
9302 set_jumps(no_match, LABEL());
9303 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9304 break;
9305 }
9306#endif
9307 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9308 label = LABEL();
9309 detect_partial_match(common, &no_match);
9310 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9311 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9312 JUMPTO(SLJIT_NOT_ZERO, label);
9313 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9314 set_jumps(no_char1_match, LABEL());
9315 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9316 set_jumps(no_match, LABEL());
9317 break;
9318
9319 case OP_POSQUERY:
9320 SLJIT_ASSERT(fast_str_ptr == 0);
9321 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9322 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9323 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9324 set_jumps(no_match, LABEL());
9325 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9326 break;
9327
9328 default:
9329 SLJIT_UNREACHABLE();
9330 break;
9331 }
9332
9333count_match(common);
9334return end;
9335}
9336
9337static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9338{
9339DEFINE_COMPILER;
9340backtrack_common *backtrack;
9341
9342PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9343
9344if (*cc == OP_FAIL)
9345 {
9346 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9347 return cc + 1;
9348 }
9349
9350if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
9351 {
9352 /* No need to check notempty conditions. */
9353 if (common->accept_label == NULL)
9354 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9355 else
9356 JUMPTO(SLJIT_JUMP, common->accept_label);
9357 return cc + 1;
9358 }
9359
9360if (common->accept_label == NULL)
9361 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
9362else
9363 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
9364OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9365OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
9366add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9367OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
9368if (common->accept_label == NULL)
9369 add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9370else
9371 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
9372OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9373if (common->accept_label == NULL)
9374 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
9375else
9376 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
9377add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9378return cc + 1;
9379}
9380
9381static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
9382{
9383DEFINE_COMPILER;
9384int offset = GET2(cc, 1);
9385BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
9386
9387/* Data will be discarded anyway... */
9388if (common->currententry != NULL)
9389 return cc + 1 + IMM2_SIZE;
9390
9391if (!optimized_cbracket)
9392 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
9393offset <<= 1;
9394OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9395if (!optimized_cbracket)
9396 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9397return cc + 1 + IMM2_SIZE;
9398}
9399
9400static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9401{
9402DEFINE_COMPILER;
9403backtrack_common *backtrack;
9404pcre_uchar opcode = *cc;
9405pcre_uchar *ccend = cc + 1;
9406
9407if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
9408 ccend += 2 + cc[1];
9409
9410PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9411
9412if (opcode == OP_SKIP)
9413 {
9414 allocate_stack(common, 1);
9415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9416 return ccend;
9417 }
9418
9419if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
9420 {
9421 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9422 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9423 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9424 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9425 }
9426
9427return ccend;
9428}
9429
9430static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
9431
9432static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9433{
9434DEFINE_COMPILER;
9435backtrack_common *backtrack;
9436BOOL needs_control_head;
9437int size;
9438
9439PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9440common->then_trap = BACKTRACK_AS(then_trap_backtrack);
9441BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9442BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
9443BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
9444
9445size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9446size = 3 + (size < 0 ? 0 : size);
9447
9448OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9449allocate_stack(common, size);
9450if (size > 3)
9451 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
9452else
9453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9454OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
9455OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
9456OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
9457
9458size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9459if (size >= 0)
9460 init_frame(common, cc, ccend, size - 1, 0, FALSE);
9461}
9462
9463static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9464{
9465DEFINE_COMPILER;
9466backtrack_common *backtrack;
9467BOOL has_then_trap = FALSE;
9468then_trap_backtrack *save_then_trap = NULL;
9469
9470SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
9471
9472if (common->has_then && common->then_offsets[cc - common->start] != 0)
9473 {
9474 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
9475 has_then_trap = TRUE;
9476 save_then_trap = common->then_trap;
9477 /* Tail item on backtrack. */
9478 compile_then_trap_matchingpath(common, cc, ccend, parent);
9479 }
9480
9481while (cc < ccend)
9482 {
9483 switch(*cc)
9484 {
9485 case OP_SOD:
9486 case OP_SOM:
9487 case OP_NOT_WORD_BOUNDARY:
9488 case OP_WORD_BOUNDARY:
9489 case OP_EODN:
9490 case OP_EOD:
9491 case OP_DOLL:
9492 case OP_DOLLM:
9493 case OP_CIRC:
9494 case OP_CIRCM:
9495 case OP_REVERSE:
9496 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9497 break;
9498
9499 case OP_NOT_DIGIT:
9500 case OP_DIGIT:
9501 case OP_NOT_WHITESPACE:
9502 case OP_WHITESPACE:
9503 case OP_NOT_WORDCHAR:
9504 case OP_WORDCHAR:
9505 case OP_ANY:
9506 case OP_ALLANY:
9507 case OP_ANYBYTE:
9508 case OP_NOTPROP:
9509 case OP_PROP:
9510 case OP_ANYNL:
9511 case OP_NOT_HSPACE:
9512 case OP_HSPACE:
9513 case OP_NOT_VSPACE:
9514 case OP_VSPACE:
9515 case OP_EXTUNI:
9516 case OP_NOT:
9517 case OP_NOTI:
9518 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9519 break;
9520
9521 case OP_SET_SOM:
9522 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9523 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
9524 allocate_stack(common, 1);
9525 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
9526 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9527 cc++;
9528 break;
9529
9530 case OP_CHAR:
9531 case OP_CHARI:
9532 if (common->mode == JIT_COMPILE)
9533 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9534 else
9535 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9536 break;
9537
9538 case OP_STAR:
9539 case OP_MINSTAR:
9540 case OP_PLUS:
9541 case OP_MINPLUS:
9542 case OP_QUERY:
9543 case OP_MINQUERY:
9544 case OP_UPTO:
9545 case OP_MINUPTO:
9546 case OP_EXACT:
9547 case OP_POSSTAR:
9548 case OP_POSPLUS:
9549 case OP_POSQUERY:
9550 case OP_POSUPTO:
9551 case OP_STARI:
9552 case OP_MINSTARI:
9553 case OP_PLUSI:
9554 case OP_MINPLUSI:
9555 case OP_QUERYI:
9556 case OP_MINQUERYI:
9557 case OP_UPTOI:
9558 case OP_MINUPTOI:
9559 case OP_EXACTI:
9560 case OP_POSSTARI:
9561 case OP_POSPLUSI:
9562 case OP_POSQUERYI:
9563 case OP_POSUPTOI:
9564 case OP_NOTSTAR:
9565 case OP_NOTMINSTAR:
9566 case OP_NOTPLUS:
9567 case OP_NOTMINPLUS:
9568 case OP_NOTQUERY:
9569 case OP_NOTMINQUERY:
9570 case OP_NOTUPTO:
9571 case OP_NOTMINUPTO:
9572 case OP_NOTEXACT:
9573 case OP_NOTPOSSTAR:
9574 case OP_NOTPOSPLUS:
9575 case OP_NOTPOSQUERY:
9576 case OP_NOTPOSUPTO:
9577 case OP_NOTSTARI:
9578 case OP_NOTMINSTARI:
9579 case OP_NOTPLUSI:
9580 case OP_NOTMINPLUSI:
9581 case OP_NOTQUERYI:
9582 case OP_NOTMINQUERYI:
9583 case OP_NOTUPTOI:
9584 case OP_NOTMINUPTOI:
9585 case OP_NOTEXACTI:
9586 case OP_NOTPOSSTARI:
9587 case OP_NOTPOSPLUSI:
9588 case OP_NOTPOSQUERYI:
9589 case OP_NOTPOSUPTOI:
9590 case OP_TYPESTAR:
9591 case OP_TYPEMINSTAR:
9592 case OP_TYPEPLUS:
9593 case OP_TYPEMINPLUS:
9594 case OP_TYPEQUERY:
9595 case OP_TYPEMINQUERY:
9596 case OP_TYPEUPTO:
9597 case OP_TYPEMINUPTO:
9598 case OP_TYPEEXACT:
9599 case OP_TYPEPOSSTAR:
9600 case OP_TYPEPOSPLUS:
9601 case OP_TYPEPOSQUERY:
9602 case OP_TYPEPOSUPTO:
9603 cc = compile_iterator_matchingpath(common, cc, parent);
9604 break;
9605
9606 case OP_CLASS:
9607 case OP_NCLASS:
9608 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
9609 cc = compile_iterator_matchingpath(common, cc, parent);
9610 else
9611 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9612 break;
9613
9614#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9615 case OP_XCLASS:
9616 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
9617 cc = compile_iterator_matchingpath(common, cc, parent);
9618 else
9619 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9620 break;
9621#endif
9622
9623 case OP_REF:
9624 case OP_REFI:
9625 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
9626 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9627 else
9628 {
9629 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9630 cc += 1 + IMM2_SIZE;
9631 }
9632 break;
9633
9634 case OP_DNREF:
9635 case OP_DNREFI:
9636 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
9637 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9638 else
9639 {
9640 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9641 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9642 cc += 1 + 2 * IMM2_SIZE;
9643 }
9644 break;
9645
9646 case OP_RECURSE:
9647 cc = compile_recurse_matchingpath(common, cc, parent);
9648 break;
9649
9650 case OP_CALLOUT:
9651 cc = compile_callout_matchingpath(common, cc, parent);
9652 break;
9653
9654 case OP_ASSERT:
9655 case OP_ASSERT_NOT:
9656 case OP_ASSERTBACK:
9657 case OP_ASSERTBACK_NOT:
9658 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9659 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9660 break;
9661
9662 case OP_BRAMINZERO:
9663 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
9664 cc = bracketend(cc + 1);
9665 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
9666 {
9667 allocate_stack(common, 1);
9668 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9669 }
9670 else
9671 {
9672 allocate_stack(common, 2);
9673 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
9675 }
9676 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
9677 count_match(common);
9678 break;
9679
9680 case OP_ONCE:
9681 case OP_ONCE_NC:
9682 case OP_BRA:
9683 case OP_CBRA:
9684 case OP_COND:
9685 case OP_SBRA:
9686 case OP_SCBRA:
9687 case OP_SCOND:
9688 cc = compile_bracket_matchingpath(common, cc, parent);
9689 break;
9690
9691 case OP_BRAZERO:
9692 if (cc[1] > OP_ASSERTBACK_NOT)
9693 cc = compile_bracket_matchingpath(common, cc, parent);
9694 else
9695 {
9696 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9697 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9698 }
9699 break;
9700
9701 case OP_BRAPOS:
9702 case OP_CBRAPOS:
9703 case OP_SBRAPOS:
9704 case OP_SCBRAPOS:
9705 case OP_BRAPOSZERO:
9706 cc = compile_bracketpos_matchingpath(common, cc, parent);
9707 break;
9708
9709 case OP_MARK:
9710 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9711 SLJIT_ASSERT(common->mark_ptr != 0);
9712 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
9713 allocate_stack(common, common->has_skip_arg ? 5 : 1);
9714 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9715 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
9716 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9718 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9719 if (common->has_skip_arg)
9720 {
9721 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9722 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
9724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
9725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
9726 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9727 }
9728 cc += 1 + 2 + cc[1];
9729 break;
9730
9731 case OP_PRUNE:
9732 case OP_PRUNE_ARG:
9733 case OP_SKIP:
9734 case OP_SKIP_ARG:
9735 case OP_THEN:
9736 case OP_THEN_ARG:
9737 case OP_COMMIT:
9738 cc = compile_control_verb_matchingpath(common, cc, parent);
9739 break;
9740
9741 case OP_FAIL:
9742 case OP_ACCEPT:
9743 case OP_ASSERT_ACCEPT:
9744 cc = compile_fail_accept_matchingpath(common, cc, parent);
9745 break;
9746
9747 case OP_CLOSE:
9748 cc = compile_close_matchingpath(common, cc);
9749 break;
9750
9751 case OP_SKIPZERO:
9752 cc = bracketend(cc + 1);
9753 break;
9754
9755 default:
9756 SLJIT_UNREACHABLE();
9757 return;
9758 }
9759 if (cc == NULL)
9760 return;
9761 }
9762
9763if (has_then_trap)
9764 {
9765 /* Head item on backtrack. */
9766 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9767 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9768 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
9769 common->then_trap = save_then_trap;
9770 }
9771SLJIT_ASSERT(cc == ccend);
9772}
9773
9774#undef PUSH_BACKTRACK
9775#undef PUSH_BACKTRACK_NOVALUE
9776#undef BACKTRACK_AS
9777
9778#define COMPILE_BACKTRACKINGPATH(current) \
9779 do \
9780 { \
9781 compile_backtrackingpath(common, (current)); \
9782 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9783 return; \
9784 } \
9785 while (0)
9786
9787#define CURRENT_AS(type) ((type *)current)
9788
9789static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9790{
9791DEFINE_COMPILER;
9792pcre_uchar *cc = current->cc;
9793pcre_uchar opcode;
9794pcre_uchar type;
9795sljit_u32 max = 0, exact;
9796struct sljit_label *label = NULL;
9797struct sljit_jump *jump = NULL;
9798jump_list *jumplist = NULL;
9799pcre_uchar *end;
9800int private_data_ptr = PRIVATE_DATA(cc);
9801int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9802int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9803int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9804
9805cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9806
9807switch(opcode)
9808 {
9809 case OP_STAR:
9810 case OP_UPTO:
9811 if (type == OP_ANYNL || type == OP_EXTUNI)
9812 {
9813 SLJIT_ASSERT(private_data_ptr == 0);
9814 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9815 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9816 free_stack(common, 1);
9817 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9818 }
9819 else
9820 {
9821 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
9822 {
9823 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9824 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
9825 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9826
9827 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
9828 label = LABEL();
9829 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
9830 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9831 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
9832 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
9833 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9834 skip_char_back(common);
9835 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
9836 }
9837 else
9838 {
9839 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9840 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
9841 skip_char_back(common);
9842 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9843 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9844 }
9845 JUMPHERE(jump);
9846 if (private_data_ptr == 0)
9847 free_stack(common, 2);
9848 }
9849 break;
9850
9851 case OP_MINSTAR:
9852 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9853 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9854 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9855 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9856 set_jumps(jumplist, LABEL());
9857 if (private_data_ptr == 0)
9858 free_stack(common, 1);
9859 break;
9860
9861 case OP_MINUPTO:
9862 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
9863 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9864 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9865 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
9866
9867 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
9868 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9869 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9870 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9871
9872 set_jumps(jumplist, LABEL());
9873 if (private_data_ptr == 0)
9874 free_stack(common, 2);
9875 break;
9876
9877 case OP_QUERY:
9878 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9879 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9880 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9881 jump = JUMP(SLJIT_JUMP);
9882 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9883 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9884 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9885 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9886 JUMPHERE(jump);
9887 if (private_data_ptr == 0)
9888 free_stack(common, 1);
9889 break;
9890
9891 case OP_MINQUERY:
9892 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9893 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9894 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9895 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9896 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9897 set_jumps(jumplist, LABEL());
9898 JUMPHERE(jump);
9899 if (private_data_ptr == 0)
9900 free_stack(common, 1);
9901 break;
9902
9903 case OP_EXACT:
9904 case OP_POSSTAR:
9905 case OP_POSQUERY:
9906 case OP_POSUPTO:
9907 break;
9908
9909 default:
9910 SLJIT_UNREACHABLE();
9911 break;
9912 }
9913
9914set_jumps(current->topbacktracks, LABEL());
9915}
9916
9917static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9918{
9919DEFINE_COMPILER;
9920pcre_uchar *cc = current->cc;
9921BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9922pcre_uchar type;
9923
9924type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
9925
9926if ((type & 0x1) == 0)
9927 {
9928 /* Maximize case. */
9929 set_jumps(current->topbacktracks, LABEL());
9930 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9931 free_stack(common, 1);
9932 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9933 return;
9934 }
9935
9936OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9937CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9938set_jumps(current->topbacktracks, LABEL());
9939free_stack(common, ref ? 2 : 3);
9940}
9941
9942static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9943{
9944DEFINE_COMPILER;
9945
9946if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9947 compile_backtrackingpath(common, current->top);
9948set_jumps(current->topbacktracks, LABEL());
9949if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9950 return;
9951
9952if (common->has_set_som && common->mark_ptr != 0)
9953 {
9954 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9955 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9956 free_stack(common, 2);
9957 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
9958 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9959 }
9960else if (common->has_set_som || common->mark_ptr != 0)
9961 {
9962 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9963 free_stack(common, 1);
9964 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
9965 }
9966}
9967
9968static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9969{
9970DEFINE_COMPILER;
9971pcre_uchar *cc = current->cc;
9972pcre_uchar bra = OP_BRA;
9973struct sljit_jump *brajump = NULL;
9974
9975SLJIT_ASSERT(*cc != OP_BRAMINZERO);
9976if (*cc == OP_BRAZERO)
9977 {
9978 bra = *cc;
9979 cc++;
9980 }
9981
9982if (bra == OP_BRAZERO)
9983 {
9984 SLJIT_ASSERT(current->topbacktracks == NULL);
9985 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9986 }
9987
9988if (CURRENT_AS(assert_backtrack)->framesize < 0)
9989 {
9990 set_jumps(current->topbacktracks, LABEL());
9991
9992 if (bra == OP_BRAZERO)
9993 {
9994 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9995 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
9996 free_stack(common, 1);
9997 }
9998 return;
9999 }
10000
10001if (bra == OP_BRAZERO)
10002 {
10003 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
10004 {
10005 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10006 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
10007 free_stack(common, 1);
10008 return;
10009 }
10010 free_stack(common, 1);
10011 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10012 }
10013
10014if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
10015 {
10016 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
10017 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10018 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(assert_backtrack)->framesize - 1));
10019
10020 set_jumps(current->topbacktracks, LABEL());
10021 }
10022else
10023 set_jumps(current->topbacktracks, LABEL());
10024
10025if (bra == OP_BRAZERO)
10026 {
10027 /* We know there is enough place on the stack. */
10028 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10029 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10030 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
10031 JUMPHERE(brajump);
10032 }
10033}
10034
10035static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10036{
10037DEFINE_COMPILER;
10038int opcode, stacksize, alt_count, alt_max;
10039int offset = 0;
10040int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
10041int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
10042pcre_uchar *cc = current->cc;
10043pcre_uchar *ccbegin;
10044pcre_uchar *ccprev;
10045pcre_uchar bra = OP_BRA;
10046pcre_uchar ket;
10047assert_backtrack *assert;
10048sljit_uw *next_update_addr = NULL;
10049BOOL has_alternatives;
10050BOOL needs_control_head = FALSE;
10051struct sljit_jump *brazero = NULL;
10052struct sljit_jump *alt1 = NULL;
10053struct sljit_jump *alt2 = NULL;
10054struct sljit_jump *once = NULL;
10055struct sljit_jump *cond = NULL;
10056struct sljit_label *rmin_label = NULL;
10057struct sljit_label *exact_label = NULL;
10058
10059if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10060 {
10061 bra = *cc;
10062 cc++;
10063 }
10064
10065opcode = *cc;
10066ccbegin = bracketend(cc) - 1 - LINK_SIZE;
10067ket = *ccbegin;
10068if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
10069 {
10070 repeat_ptr = PRIVATE_DATA(ccbegin);
10071 repeat_type = PRIVATE_DATA(ccbegin + 2);
10072 repeat_count = PRIVATE_DATA(ccbegin + 3);
10073 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
10074 if (repeat_type == OP_UPTO)
10075 ket = OP_KETRMAX;
10076 if (repeat_type == OP_MINUPTO)
10077 ket = OP_KETRMIN;
10078 }
10079ccbegin = cc;
10080cc += GET(cc, 1);
10081has_alternatives = *cc == OP_ALT;
10082if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10083 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
10084if (opcode == OP_CBRA || opcode == OP_SCBRA)
10085 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
10086if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10087 opcode = OP_SCOND;
10088if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
10089 opcode = OP_ONCE;
10090
10091alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
10092
10093/* Decoding the needs_control_head in framesize. */
10094if (opcode == OP_ONCE)
10095 {
10096 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
10097 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
10098 }
10099
10100if (ket != OP_KET && repeat_type != 0)
10101 {
10102 /* TMP1 is used in OP_KETRMIN below. */
10103 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10104 free_stack(common, 1);
10105 if (repeat_type == OP_UPTO)
10106 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
10107 else
10108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10109 }
10110
10111if (ket == OP_KETRMAX)
10112 {
10113 if (bra == OP_BRAZERO)
10114 {
10115 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10116 free_stack(common, 1);
10117 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
10118 }
10119 }
10120else if (ket == OP_KETRMIN)
10121 {
10122 if (bra != OP_BRAMINZERO)
10123 {
10124 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10125 if (repeat_type != 0)
10126 {
10127 /* TMP1 was set a few lines above. */
10128 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10129 /* Drop STR_PTR for non-greedy plus quantifier. */
10130 if (opcode != OP_ONCE)
10131 free_stack(common, 1);
10132 }
10133 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
10134 {
10135 /* Checking zero-length iteration. */
10136 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
10137 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10138 else
10139 {
10140 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10141 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10142 }
10143 /* Drop STR_PTR for non-greedy plus quantifier. */
10144 if (opcode != OP_ONCE)
10145 free_stack(common, 1);
10146 }
10147 else
10148 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10149 }
10150 rmin_label = LABEL();
10151 if (repeat_type != 0)
10152 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10153 }
10154else if (bra == OP_BRAZERO)
10155 {
10156 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10157 free_stack(common, 1);
10158 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
10159 }
10160else if (repeat_type == OP_EXACT)
10161 {
10162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10163 exact_label = LABEL();
10164 }
10165
10166if (offset != 0)
10167 {
10168 if (common->capture_last_ptr != 0)
10169 {
10170 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
10171 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10172 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10175 free_stack(common, 3);
10176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
10177 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
10178 }
10179 else if (common->optimized_cbracket[offset >> 1] == 0)
10180 {
10181 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10182 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10183 free_stack(common, 2);
10184 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10185 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10186 }
10187 }
10188
10189if (SLJIT_UNLIKELY(opcode == OP_ONCE))
10190 {
10191 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10192 {
10193 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10194 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10195 }
10196 once = JUMP(SLJIT_JUMP);
10197 }
10198else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10199 {
10200 if (has_alternatives)
10201 {
10202 /* Always exactly one alternative. */
10203 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10204 free_stack(common, 1);
10205
10206 alt_max = 2;
10207 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10208 }
10209 }
10210else if (has_alternatives)
10211 {
10212 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10213 free_stack(common, 1);
10214
10215 if (alt_max > 4)
10216 {
10217 /* Table jump if alt_max is greater than 4. */
10218 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
10219 if (SLJIT_UNLIKELY(next_update_addr == NULL))
10220 return;
10221 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
10222 add_label_addr(common, next_update_addr++);
10223 }
10224 else
10225 {
10226 if (alt_max == 4)
10227 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10228 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10229 }
10230 }
10231
10232COMPILE_BACKTRACKINGPATH(current->top);
10233if (current->topbacktracks)
10234 set_jumps(current->topbacktracks, LABEL());
10235
10236if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10237 {
10238 /* Conditional block always has at most one alternative. */
10239 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
10240 {
10241 SLJIT_ASSERT(has_alternatives);
10242 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10243 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
10244 {
10245 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10246 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10247 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10248 }
10249 cond = JUMP(SLJIT_JUMP);
10250 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
10251 }
10252 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
10253 {
10254 SLJIT_ASSERT(has_alternatives);
10255 cond = JUMP(SLJIT_JUMP);
10256 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
10257 }
10258 else
10259 SLJIT_ASSERT(!has_alternatives);
10260 }
10261
10262if (has_alternatives)
10263 {
10264 alt_count = sizeof(sljit_uw);
10265 do
10266 {
10267 current->top = NULL;
10268 current->topbacktracks = NULL;
10269 current->nextbacktracks = NULL;
10270 /* Conditional blocks always have an additional alternative, even if it is empty. */
10271 if (*cc == OP_ALT)
10272 {
10273 ccprev = cc + 1 + LINK_SIZE;
10274 cc += GET(cc, 1);
10275 if (opcode != OP_COND && opcode != OP_SCOND)
10276 {
10277 if (opcode != OP_ONCE)
10278 {
10279 if (private_data_ptr != 0)
10280 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10281 else
10282 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10283 }
10284 else
10285 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
10286 }
10287 compile_matchingpath(common, ccprev, cc, current);
10288 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10289 return;
10290 }
10291
10292 /* Instructions after the current alternative is successfully matched. */
10293 /* There is a similar code in compile_bracket_matchingpath. */
10294 if (opcode == OP_ONCE)
10295 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10296
10297 stacksize = 0;
10298 if (repeat_type == OP_MINUPTO)
10299 {
10300 /* We need to preserve the counter. TMP2 will be used below. */
10301 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10302 stacksize++;
10303 }
10304 if (ket != OP_KET || bra != OP_BRA)
10305 stacksize++;
10306 if (offset != 0)
10307 {
10308 if (common->capture_last_ptr != 0)
10309 stacksize++;
10310 if (common->optimized_cbracket[offset >> 1] == 0)
10311 stacksize += 2;
10312 }
10313 if (opcode != OP_ONCE)
10314 stacksize++;
10315
10316 if (stacksize > 0)
10317 allocate_stack(common, stacksize);
10318
10319 stacksize = 0;
10320 if (repeat_type == OP_MINUPTO)
10321 {
10322 /* TMP2 was set above. */
10323 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10324 stacksize++;
10325 }
10326
10327 if (ket != OP_KET || bra != OP_BRA)
10328 {
10329 if (ket != OP_KET)
10330 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10331 else
10332 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10333 stacksize++;
10334 }
10335
10336 if (offset != 0)
10337 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10338
10339 if (opcode != OP_ONCE)
10340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
10341
10342 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
10343 {
10344 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
10345 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10347 }
10348
10349 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
10350
10351 if (opcode != OP_ONCE)
10352 {
10353 if (alt_max > 4)
10354 add_label_addr(common, next_update_addr++);
10355 else
10356 {
10357 if (alt_count != 2 * sizeof(sljit_uw))
10358 {
10359 JUMPHERE(alt1);
10360 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
10361 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10362 }
10363 else
10364 {
10365 JUMPHERE(alt2);
10366 if (alt_max == 4)
10367 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
10368 }
10369 }
10370 alt_count += sizeof(sljit_uw);
10371 }
10372
10373 COMPILE_BACKTRACKINGPATH(current->top);
10374 if (current->topbacktracks)
10375 set_jumps(current->topbacktracks, LABEL());
10376 SLJIT_ASSERT(!current->nextbacktracks);
10377 }
10378 while (*cc == OP_ALT);
10379
10380 if (cond != NULL)
10381 {
10382 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
10383 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10384 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
10385 {
10386 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10387 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10388 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10389 }
10390 JUMPHERE(cond);
10391 }
10392
10393 /* Free the STR_PTR. */
10394 if (private_data_ptr == 0)
10395 free_stack(common, 1);
10396 }
10397
10398if (offset != 0)
10399 {
10400 /* Using both tmp register is better for instruction scheduling. */
10401 if (common->optimized_cbracket[offset >> 1] != 0)
10402 {
10403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10404 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10405 free_stack(common, 2);
10406 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10408 }
10409 else
10410 {
10411 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10412 free_stack(common, 1);
10413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10414 }
10415 }
10416else if (opcode == OP_SBRA || opcode == OP_SCOND)
10417 {
10418 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
10419 free_stack(common, 1);
10420 }
10421else if (opcode == OP_ONCE)
10422 {
10423 cc = ccbegin + GET(ccbegin, 1);
10424 stacksize = needs_control_head ? 1 : 0;
10425
10426 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10427 {
10428 /* Reset head and drop saved frame. */
10429 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
10430 }
10431 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
10432 {
10433 /* The STR_PTR must be released. */
10434 stacksize++;
10435 }
10436
10437 if (stacksize > 0)
10438 free_stack(common, stacksize);
10439
10440 JUMPHERE(once);
10441 /* Restore previous private_data_ptr */
10442 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
10444 else if (ket == OP_KETRMIN)
10445 {
10446 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10447 /* See the comment below. */
10448 free_stack(common, 2);
10449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10450 }
10451 }
10452
10453if (repeat_type == OP_EXACT)
10454 {
10455 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10457 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
10458 }
10459else if (ket == OP_KETRMAX)
10460 {
10461 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10462 if (bra != OP_BRAZERO)
10463 free_stack(common, 1);
10464
10465 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10466 if (bra == OP_BRAZERO)
10467 {
10468 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10469 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10470 JUMPHERE(brazero);
10471 free_stack(common, 1);
10472 }
10473 }
10474else if (ket == OP_KETRMIN)
10475 {
10476 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10477
10478 /* OP_ONCE removes everything in case of a backtrack, so we don't
10479 need to explicitly release the STR_PTR. The extra release would
10480 affect badly the free_stack(2) above. */
10481 if (opcode != OP_ONCE)
10482 free_stack(common, 1);
10483 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
10484 if (opcode == OP_ONCE)
10485 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
10486 else if (bra == OP_BRAMINZERO)
10487 free_stack(common, 1);
10488 }
10489else if (bra == OP_BRAZERO)
10490 {
10491 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10492 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10493 JUMPHERE(brazero);
10494 }
10495}
10496
10497static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10498{
10499DEFINE_COMPILER;
10500int offset;
10501struct sljit_jump *jump;
10502
10503if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
10504 {
10505 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
10506 {
10507 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
10508 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10509 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10510 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10511 if (common->capture_last_ptr != 0)
10512 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10513 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10514 if (common->capture_last_ptr != 0)
10515 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10516 }
10517 set_jumps(current->topbacktracks, LABEL());
10518 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10519 return;
10520 }
10521
10522OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
10523add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10524
10525if (current->topbacktracks)
10526 {
10527 jump = JUMP(SLJIT_JUMP);
10528 set_jumps(current->topbacktracks, LABEL());
10529 /* Drop the stack frame. */
10530 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10531 JUMPHERE(jump);
10532 }
10533OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
10534}
10535
10536static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10537{
10538assert_backtrack backtrack;
10539
10540current->top = NULL;
10541current->topbacktracks = NULL;
10542current->nextbacktracks = NULL;
10543if (current->cc[1] > OP_ASSERTBACK_NOT)
10544 {
10545 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
10546 compile_bracket_matchingpath(common, current->cc, current);
10547 compile_bracket_backtrackingpath(common, current->top);
10548 }
10549else
10550 {
10551 memset(&backtrack, 0, sizeof(backtrack));
10552 backtrack.common.cc = current->cc;
10553 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
10554 /* Manual call of compile_assert_matchingpath. */
10555 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
10556 }
10557SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
10558}
10559
10560static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10561{
10562DEFINE_COMPILER;
10563pcre_uchar opcode = *current->cc;
10564struct sljit_label *loop;
10565struct sljit_jump *jump;
10566
10567if (opcode == OP_THEN || opcode == OP_THEN_ARG)
10568 {
10569 if (common->then_trap != NULL)
10570 {
10571 SLJIT_ASSERT(common->control_head_ptr != 0);
10572
10573 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10574 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
10575 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
10576 jump = JUMP(SLJIT_JUMP);
10577
10578 loop = LABEL();
10579 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10580 JUMPHERE(jump);
10581 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
10582 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
10583 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
10584 return;
10585 }
10586 else if (common->positive_assert)
10587 {
10588 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
10589 return;
10590 }
10591 }
10592
10593if (common->local_exit)
10594 {
10595 if (common->quit_label == NULL)
10596 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10597 else
10598 JUMPTO(SLJIT_JUMP, common->quit_label);
10599 return;
10600 }
10601
10602if (opcode == OP_SKIP_ARG)
10603 {
10604 SLJIT_ASSERT(common->control_head_ptr != 0);
10605 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10606 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
10607 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
10608 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
10609 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10610
10611 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10612 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
10613 return;
10614 }
10615
10616if (opcode == OP_SKIP)
10617 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10618else
10619 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
10620add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
10621}
10622
10623static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10624{
10625DEFINE_COMPILER;
10626struct sljit_jump *jump;
10627int size;
10628
10629if (CURRENT_AS(then_trap_backtrack)->then_trap)
10630 {
10631 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
10632 return;
10633 }
10634
10635size = CURRENT_AS(then_trap_backtrack)->framesize;
10636size = 3 + (size < 0 ? 0 : size);
10637
10638OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
10639free_stack(common, size);
10640jump = JUMP(SLJIT_JUMP);
10641
10642set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
10643/* STACK_TOP is set by THEN. */
10644if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
10645 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10646OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10647free_stack(common, 3);
10648
10649JUMPHERE(jump);
10650OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10651}
10652
10653static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10654{
10655DEFINE_COMPILER;
10656then_trap_backtrack *save_then_trap = common->then_trap;
10657
10658while (current)
10659 {
10660 if (current->nextbacktracks != NULL)
10661 set_jumps(current->nextbacktracks, LABEL());
10662 switch(*current->cc)
10663 {
10664 case OP_SET_SOM:
10665 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10666 free_stack(common, 1);
10667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
10668 break;
10669
10670 case OP_STAR:
10671 case OP_MINSTAR:
10672 case OP_PLUS:
10673 case OP_MINPLUS:
10674 case OP_QUERY:
10675 case OP_MINQUERY:
10676 case OP_UPTO:
10677 case OP_MINUPTO:
10678 case OP_EXACT:
10679 case OP_POSSTAR:
10680 case OP_POSPLUS:
10681 case OP_POSQUERY:
10682 case OP_POSUPTO:
10683 case OP_STARI:
10684 case OP_MINSTARI:
10685 case OP_PLUSI:
10686 case OP_MINPLUSI:
10687 case OP_QUERYI:
10688 case OP_MINQUERYI:
10689 case OP_UPTOI:
10690 case OP_MINUPTOI:
10691 case OP_EXACTI:
10692 case OP_POSSTARI:
10693 case OP_POSPLUSI:
10694 case OP_POSQUERYI:
10695 case OP_POSUPTOI:
10696 case OP_NOTSTAR:
10697 case OP_NOTMINSTAR:
10698 case OP_NOTPLUS:
10699 case OP_NOTMINPLUS:
10700 case OP_NOTQUERY:
10701 case OP_NOTMINQUERY:
10702 case OP_NOTUPTO:
10703 case OP_NOTMINUPTO:
10704 case OP_NOTEXACT:
10705 case OP_NOTPOSSTAR:
10706 case OP_NOTPOSPLUS:
10707 case OP_NOTPOSQUERY:
10708 case OP_NOTPOSUPTO:
10709 case OP_NOTSTARI:
10710 case OP_NOTMINSTARI:
10711 case OP_NOTPLUSI:
10712 case OP_NOTMINPLUSI:
10713 case OP_NOTQUERYI:
10714 case OP_NOTMINQUERYI:
10715 case OP_NOTUPTOI:
10716 case OP_NOTMINUPTOI:
10717 case OP_NOTEXACTI:
10718 case OP_NOTPOSSTARI:
10719 case OP_NOTPOSPLUSI:
10720 case OP_NOTPOSQUERYI:
10721 case OP_NOTPOSUPTOI:
10722 case OP_TYPESTAR:
10723 case OP_TYPEMINSTAR:
10724 case OP_TYPEPLUS:
10725 case OP_TYPEMINPLUS:
10726 case OP_TYPEQUERY:
10727 case OP_TYPEMINQUERY:
10728 case OP_TYPEUPTO:
10729 case OP_TYPEMINUPTO:
10730 case OP_TYPEEXACT:
10731 case OP_TYPEPOSSTAR:
10732 case OP_TYPEPOSPLUS:
10733 case OP_TYPEPOSQUERY:
10734 case OP_TYPEPOSUPTO:
10735 case OP_CLASS:
10736 case OP_NCLASS:
10737#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
10738 case OP_XCLASS:
10739#endif
10740 compile_iterator_backtrackingpath(common, current);
10741 break;
10742
10743 case OP_REF:
10744 case OP_REFI:
10745 case OP_DNREF:
10746 case OP_DNREFI:
10747 compile_ref_iterator_backtrackingpath(common, current);
10748 break;
10749
10750 case OP_RECURSE:
10751 compile_recurse_backtrackingpath(common, current);
10752 break;
10753
10754 case OP_ASSERT:
10755 case OP_ASSERT_NOT:
10756 case OP_ASSERTBACK:
10757 case OP_ASSERTBACK_NOT:
10758 compile_assert_backtrackingpath(common, current);
10759 break;
10760
10761 case OP_ONCE:
10762 case OP_ONCE_NC:
10763 case OP_BRA:
10764 case OP_CBRA:
10765 case OP_COND:
10766 case OP_SBRA:
10767 case OP_SCBRA:
10768 case OP_SCOND:
10769 compile_bracket_backtrackingpath(common, current);
10770 break;
10771
10772 case OP_BRAZERO:
10773 if (current->cc[1] > OP_ASSERTBACK_NOT)
10774 compile_bracket_backtrackingpath(common, current);
10775 else
10776 compile_assert_backtrackingpath(common, current);
10777 break;
10778
10779 case OP_BRAPOS:
10780 case OP_CBRAPOS:
10781 case OP_SBRAPOS:
10782 case OP_SCBRAPOS:
10783 case OP_BRAPOSZERO:
10784 compile_bracketpos_backtrackingpath(common, current);
10785 break;
10786
10787 case OP_BRAMINZERO:
10788 compile_braminzero_backtrackingpath(common, current);
10789 break;
10790
10791 case OP_MARK:
10792 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
10793 if (common->has_skip_arg)
10794 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10795 free_stack(common, common->has_skip_arg ? 5 : 1);
10796 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
10797 if (common->has_skip_arg)
10798 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10799 break;
10800
10801 case OP_THEN:
10802 case OP_THEN_ARG:
10803 case OP_PRUNE:
10804 case OP_PRUNE_ARG:
10805 case OP_SKIP:
10806 case OP_SKIP_ARG:
10807 compile_control_verb_backtrackingpath(common, current);
10808 break;
10809
10810 case OP_COMMIT:
10811 if (!common->local_exit)
10812 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10813 if (common->quit_label == NULL)
10814 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10815 else
10816 JUMPTO(SLJIT_JUMP, common->quit_label);
10817 break;
10818
10819 case OP_CALLOUT:
10820 case OP_FAIL:
10821 case OP_ACCEPT:
10822 case OP_ASSERT_ACCEPT:
10823 set_jumps(current->topbacktracks, LABEL());
10824 break;
10825
10826 case OP_THEN_TRAP:
10827 /* A virtual opcode for then traps. */
10828 compile_then_trap_backtrackingpath(common, current);
10829 break;
10830
10831 default:
10832 SLJIT_UNREACHABLE();
10833 break;
10834 }
10835 current = current->prev;
10836 }
10837common->then_trap = save_then_trap;
10838}
10839
10840static SLJIT_INLINE void compile_recurse(compiler_common *common)
10841{
10842DEFINE_COMPILER;
10843pcre_uchar *cc = common->start + common->currententry->start;
10844pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
10845pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
10846BOOL needs_control_head;
10847int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
10848int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
10849int alternativesize;
10850BOOL needs_frame;
10851backtrack_common altbacktrack;
10852struct sljit_jump *jump;
10853
10854/* Recurse captures then. */
10855common->then_trap = NULL;
10856
10857SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
10858needs_frame = framesize >= 0;
10859if (!needs_frame)
10860 framesize = 0;
10861alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
10862
10863SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
10864common->currententry->entry = LABEL();
10865set_jumps(common->currententry->calls, common->currententry->entry);
10866
10867sljit_emit_fast_enter(compiler, TMP2, 0);
10868count_match(common);
10869allocate_stack(common, private_data_size + framesize + alternativesize);
10870OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
10871copy_private_data(common, ccbegin, ccend, TRUE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10872if (needs_control_head)
10873 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10874OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
10875if (needs_frame)
10876 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
10877
10878if (alternativesize > 0)
10879 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10880
10881memset(&altbacktrack, 0, sizeof(backtrack_common));
10882common->quit_label = NULL;
10883common->accept_label = NULL;
10884common->quit = NULL;
10885common->accept = NULL;
10886altbacktrack.cc = ccbegin;
10887cc += GET(cc, 1);
10888while (1)
10889 {
10890 altbacktrack.top = NULL;
10891 altbacktrack.topbacktracks = NULL;
10892
10893 if (altbacktrack.cc != ccbegin)
10894 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10895
10896 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
10897 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10898 return;
10899
10900 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10901
10902 compile_backtrackingpath(common, altbacktrack.top);
10903 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10904 return;
10905 set_jumps(altbacktrack.topbacktracks, LABEL());
10906
10907 if (*cc != OP_ALT)
10908 break;
10909
10910 altbacktrack.cc = cc + 1 + LINK_SIZE;
10911 cc += GET(cc, 1);
10912 }
10913
10914/* None of them matched. */
10915OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10916jump = JUMP(SLJIT_JUMP);
10917
10918if (common->quit != NULL)
10919 {
10920 set_jumps(common->quit, LABEL());
10921 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10922 if (needs_frame)
10923 {
10924 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10925 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10926 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10927 }
10928 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10929 common->quit = NULL;
10930 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10931 }
10932
10933set_jumps(common->accept, LABEL());
10934OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10935if (needs_frame)
10936 {
10937 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10938 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10939 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10940 }
10941OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
10942
10943JUMPHERE(jump);
10944if (common->quit != NULL)
10945 set_jumps(common->quit, LABEL());
10946copy_private_data(common, ccbegin, ccend, FALSE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10947free_stack(common, private_data_size + framesize + alternativesize);
10948if (needs_control_head)
10949 {
10950 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-3));
10951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
10953 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10954 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10955 }
10956else
10957 {
10958 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10959 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10960 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
10961 }
10962sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), STACK(-1));
10963}
10964
10965#undef COMPILE_BACKTRACKINGPATH
10966#undef CURRENT_AS
10967
10968void
10969PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
10970{
10971struct sljit_compiler *compiler;
10972backtrack_common rootbacktrack;
10973compiler_common common_data;
10974compiler_common *common = &common_data;
10975const sljit_u8 *tables = re->tables;
10976pcre_study_data *study;
10977int private_data_size;
10978pcre_uchar *ccend;
10979executable_functions *functions;
10980void *executable_func;
10981sljit_uw executable_size;
10982sljit_uw total_length;
10983label_addr_list *label_addr;
10984struct sljit_label *mainloop_label = NULL;
10985struct sljit_label *continue_match_label;
10986struct sljit_label *empty_match_found_label = NULL;
10987struct sljit_label *empty_match_backtrack_label = NULL;
10988struct sljit_label *reset_match_label;
10989struct sljit_label *quit_label;
10990struct sljit_jump *jump;
10991struct sljit_jump *minlength_check_failed = NULL;
10992struct sljit_jump *reqbyte_notfound = NULL;
10993struct sljit_jump *empty_match = NULL;
10994
10995SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
10996study = extra->study_data;
10997
10998if (!tables)
10999 tables = PRIV(default_tables);
11000
11001memset(&rootbacktrack, 0, sizeof(backtrack_common));
11002memset(common, 0, sizeof(compiler_common));
11003rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
11004
11005common->start = rootbacktrack.cc;
11006common->read_only_data_head = NULL;
11007common->fcc = tables + fcc_offset;
11008common->lcc = (sljit_sw)(tables + lcc_offset);
11009common->mode = mode;
11010common->might_be_empty = study->minlength == 0;
11011common->nltype = NLTYPE_FIXED;
11012switch(re->options & PCRE_NEWLINE_BITS)
11013 {
11014 case 0:
11015 /* Compile-time default */
11016 switch(NEWLINE)
11017 {
11018 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
11019 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
11020 default: common->newline = NEWLINE; break;
11021 }
11022 break;
11023 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
11024 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
11025 case PCRE_NEWLINE_CR+
11026 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
11027 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
11028 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
11029 default: return;
11030 }
11031common->nlmax = READ_CHAR_MAX;
11032common->nlmin = 0;
11033if ((re->options & PCRE_BSR_ANYCRLF) != 0)
11034 common->bsr_nltype = NLTYPE_ANYCRLF;
11035else if ((re->options & PCRE_BSR_UNICODE) != 0)
11036 common->bsr_nltype = NLTYPE_ANY;
11037else
11038 {
11039#ifdef BSR_ANYCRLF
11040 common->bsr_nltype = NLTYPE_ANYCRLF;
11041#else
11042 common->bsr_nltype = NLTYPE_ANY;
11043#endif
11044 }
11045common->bsr_nlmax = READ_CHAR_MAX;
11046common->bsr_nlmin = 0;
11047common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
11048common->ctypes = (sljit_sw)(tables + ctypes_offset);
11049common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
11050common->name_count = re->name_count;
11051common->name_entry_size = re->name_entry_size;
11052common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
11053#ifdef SUPPORT_UTF
11054/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
11055common->utf = (re->options & PCRE_UTF8) != 0;
11056#ifdef SUPPORT_UCP
11057common->use_ucp = (re->options & PCRE_UCP) != 0;
11058#endif
11059if (common->utf)
11060 {
11061 if (common->nltype == NLTYPE_ANY)
11062 common->nlmax = 0x2029;
11063 else if (common->nltype == NLTYPE_ANYCRLF)
11064 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
11065 else
11066 {
11067 /* We only care about the first newline character. */
11068 common->nlmax = common->newline & 0xff;
11069 }
11070
11071 if (common->nltype == NLTYPE_FIXED)
11072 common->nlmin = common->newline & 0xff;
11073 else
11074 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
11075
11076 if (common->bsr_nltype == NLTYPE_ANY)
11077 common->bsr_nlmax = 0x2029;
11078 else
11079 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
11080 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
11081 }
11082#endif /* SUPPORT_UTF */
11083ccend = bracketend(common->start);
11084
11085/* Calculate the local space size on the stack. */
11086common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
11087common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
11088if (!common->optimized_cbracket)
11089 return;
11090#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
11091memset(common->optimized_cbracket, 0, re->top_bracket + 1);
11092#else
11093memset(common->optimized_cbracket, 1, re->top_bracket + 1);
11094#endif
11095
11096SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
11097#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
11098common->capture_last_ptr = common->ovector_start;
11099common->ovector_start += sizeof(sljit_sw);
11100#endif
11101if (!check_opcode_types(common, common->start, ccend))
11102 {
11103 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11104 return;
11105 }
11106
11107/* Checking flags and updating ovector_start. */
11108if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11109 {
11110 common->req_char_ptr = common->ovector_start;
11111 common->ovector_start += sizeof(sljit_sw);
11112 }
11113if (mode != JIT_COMPILE)
11114 {
11115 common->start_used_ptr = common->ovector_start;
11116 common->ovector_start += sizeof(sljit_sw);
11117 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11118 {
11119 common->hit_start = common->ovector_start;
11120 common->ovector_start += 2 * sizeof(sljit_sw);
11121 }
11122 }
11123if ((re->options & PCRE_FIRSTLINE) != 0)
11124 {
11125 common->match_end_ptr = common->ovector_start;
11126 common->ovector_start += sizeof(sljit_sw);
11127 }
11128#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
11129common->control_head_ptr = 1;
11130#endif
11131if (common->control_head_ptr != 0)
11132 {
11133 common->control_head_ptr = common->ovector_start;
11134 common->ovector_start += sizeof(sljit_sw);
11135 }
11136if (common->has_set_som)
11137 {
11138 /* Saving the real start pointer is necessary. */
11139 common->start_ptr = common->ovector_start;
11140 common->ovector_start += sizeof(sljit_sw);
11141 }
11142
11143/* Aligning ovector to even number of sljit words. */
11144if ((common->ovector_start & sizeof(sljit_sw)) != 0)
11145 common->ovector_start += sizeof(sljit_sw);
11146
11147if (common->start_ptr == 0)
11148 common->start_ptr = OVECTOR(0);
11149
11150/* Capturing brackets cannot be optimized if callouts are allowed. */
11151if (common->capture_last_ptr != 0)
11152 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
11153
11154SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
11155common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
11156
11157total_length = ccend - common->start;
11158common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data);
11159if (!common->private_data_ptrs)
11160 {
11161 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11162 return;
11163 }
11164memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
11165
11166private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
11167set_private_data_ptrs(common, &private_data_size, ccend);
11168if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11169 {
11170 if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
11171 detect_fast_fail(common, common->start, &private_data_size, 4);
11172 }
11173
11174SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
11175
11176if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
11177 {
11178 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11179 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11180 return;
11181 }
11182
11183if (common->has_then)
11184 {
11185 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
11186 memset(common->then_offsets, 0, total_length);
11187 set_then_offsets(common, common->start, NULL);
11188 }
11189
11190compiler = sljit_create_compiler(NULL);
11191if (!compiler)
11192 {
11193 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11194 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11195 return;
11196 }
11197common->compiler = compiler;
11198
11199/* Main pcre_jit_exec entry. */
11200sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
11201
11202/* Register init. */
11203reset_ovector(common, (re->top_bracket + 1) * 2);
11204if (common->req_char_ptr != 0)
11205 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
11206
11207OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
11208OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
11209OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11210OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
11211OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11212OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
11213OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
11214OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
11215OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11216OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
11217
11218if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
11219 reset_fast_fail(common);
11220
11221if (mode == JIT_PARTIAL_SOFT_COMPILE)
11222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11223if (common->mark_ptr != 0)
11224 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
11225if (common->control_head_ptr != 0)
11226 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11227
11228/* Main part of the matching */
11229if ((re->options & PCRE_ANCHORED) == 0)
11230 {
11231 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
11232 continue_match_label = LABEL();
11233 /* Forward search if possible. */
11234 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
11235 {
11236 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
11237 ;
11238 else if ((re->flags & PCRE_FIRSTSET) != 0)
11239 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
11240 else if ((re->flags & PCRE_STARTLINE) != 0)
11241 fast_forward_newline(common);
11242 else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
11243 fast_forward_start_bits(common, study->start_bits);
11244 }
11245 }
11246else
11247 continue_match_label = LABEL();
11248
11249if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11250 {
11251 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11252 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
11253 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
11254 }
11255if (common->req_char_ptr != 0)
11256 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
11257
11258/* Store the current STR_PTR in OVECTOR(0). */
11259OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11260/* Copy the limit of allowed recursions. */
11261OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
11262if (common->capture_last_ptr != 0)
11263 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
11264if (common->fast_forward_bc_ptr != NULL)
11265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
11266
11267if (common->start_ptr != OVECTOR(0))
11268 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
11269
11270/* Copy the beginning of the string. */
11271if (mode == JIT_PARTIAL_SOFT_COMPILE)
11272 {
11273 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11274 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11275 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
11276 JUMPHERE(jump);
11277 }
11278else if (mode == JIT_PARTIAL_HARD_COMPILE)
11279 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11280
11281compile_matchingpath(common, common->start, ccend, &rootbacktrack);
11282if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11283 {
11284 sljit_free_compiler(compiler);
11285 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11286 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11287 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11288 return;
11289 }
11290
11291if (common->might_be_empty)
11292 {
11293 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11294 empty_match_found_label = LABEL();
11295 }
11296
11297common->accept_label = LABEL();
11298if (common->accept != NULL)
11299 set_jumps(common->accept, common->accept_label);
11300
11301/* This means we have a match. Update the ovector. */
11302copy_ovector(common, re->top_bracket + 1);
11303common->quit_label = common->forced_quit_label = LABEL();
11304if (common->quit != NULL)
11305 set_jumps(common->quit, common->quit_label);
11306if (common->forced_quit != NULL)
11307 set_jumps(common->forced_quit, common->forced_quit_label);
11308if (minlength_check_failed != NULL)
11309 SET_LABEL(minlength_check_failed, common->forced_quit_label);
11310sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
11311
11312if (mode != JIT_COMPILE)
11313 {
11314 common->partialmatchlabel = LABEL();
11315 set_jumps(common->partialmatch, common->partialmatchlabel);
11316 return_with_partial_match(common, common->quit_label);
11317 }
11318
11319if (common->might_be_empty)
11320 empty_match_backtrack_label = LABEL();
11321compile_backtrackingpath(common, rootbacktrack.top);
11322if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11323 {
11324 sljit_free_compiler(compiler);
11325 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11326 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11327 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11328 return;
11329 }
11330
11331SLJIT_ASSERT(rootbacktrack.prev == NULL);
11332reset_match_label = LABEL();
11333
11334if (mode == JIT_PARTIAL_SOFT_COMPILE)
11335 {
11336 /* Update hit_start only in the first time. */
11337 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
11338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
11339 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
11340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
11341 JUMPHERE(jump);
11342 }
11343
11344/* Check we have remaining characters. */
11345if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
11346 {
11347 SLJIT_ASSERT(common->match_end_ptr != 0);
11348 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
11349 }
11350
11351OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
11352 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
11353
11354if ((re->options & PCRE_ANCHORED) == 0)
11355 {
11356 if (common->ff_newline_shortcut != NULL)
11357 {
11358 if ((re->options & PCRE_FIRSTLINE) == 0)
11359 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
11360 /* There cannot be more newlines here. */
11361 }
11362 else
11363 CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
11364 }
11365
11366/* No more remaining characters. */
11367if (reqbyte_notfound != NULL)
11368 JUMPHERE(reqbyte_notfound);
11369
11370if (mode == JIT_PARTIAL_SOFT_COMPILE)
11371 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
11372
11373OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11374JUMPTO(SLJIT_JUMP, common->quit_label);
11375
11376flush_stubs(common);
11377
11378if (common->might_be_empty)
11379 {
11380 JUMPHERE(empty_match);
11381 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11382 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
11383 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
11384 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
11385 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
11386 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11387 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
11388 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
11389 }
11390
11391common->fast_forward_bc_ptr = NULL;
11392common->fast_fail_start_ptr = 0;
11393common->fast_fail_end_ptr = 0;
11394common->currententry = common->entries;
11395common->local_exit = TRUE;
11396quit_label = common->quit_label;
11397while (common->currententry != NULL)
11398 {
11399 /* Might add new entries. */
11400 compile_recurse(common);
11401 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11402 {
11403 sljit_free_compiler(compiler);
11404 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11405 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11406 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11407 return;
11408 }
11409 flush_stubs(common);
11410 common->currententry = common->currententry->next;
11411 }
11412common->local_exit = FALSE;
11413common->quit_label = quit_label;
11414
11415/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
11416/* This is a (really) rare case. */
11417set_jumps(common->stackalloc, LABEL());
11418/* RETURN_ADDR is not a saved register. */
11419sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11420
11421SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1);
11422
11423OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STACK_TOP, 0);
11424OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
11425OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
11426OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
11427OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
11428
11429sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
11430jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
11431OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
11432OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
11433OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11434OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
11435sljit_emit_fast_return(compiler, TMP1, 0);
11436
11437/* Allocation failed. */
11438JUMPHERE(jump);
11439/* We break the return address cache here, but this is a really rare case. */
11440OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
11441JUMPTO(SLJIT_JUMP, common->quit_label);
11442
11443/* Call limit reached. */
11444set_jumps(common->calllimit, LABEL());
11445OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
11446JUMPTO(SLJIT_JUMP, common->quit_label);
11447
11448if (common->revertframes != NULL)
11449 {
11450 set_jumps(common->revertframes, LABEL());
11451 do_revertframes(common);
11452 }
11453if (common->wordboundary != NULL)
11454 {
11455 set_jumps(common->wordboundary, LABEL());
11456 check_wordboundary(common);
11457 }
11458if (common->anynewline != NULL)
11459 {
11460 set_jumps(common->anynewline, LABEL());
11461 check_anynewline(common);
11462 }
11463if (common->hspace != NULL)
11464 {
11465 set_jumps(common->hspace, LABEL());
11466 check_hspace(common);
11467 }
11468if (common->vspace != NULL)
11469 {
11470 set_jumps(common->vspace, LABEL());
11471 check_vspace(common);
11472 }
11473if (common->casefulcmp != NULL)
11474 {
11475 set_jumps(common->casefulcmp, LABEL());
11476 do_casefulcmp(common);
11477 }
11478if (common->caselesscmp != NULL)
11479 {
11480 set_jumps(common->caselesscmp, LABEL());
11481 do_caselesscmp(common);
11482 }
11483if (common->reset_match != NULL)
11484 {
11485 set_jumps(common->reset_match, LABEL());
11486 do_reset_match(common, (re->top_bracket + 1) * 2);
11487 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
11488 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
11489 JUMPTO(SLJIT_JUMP, reset_match_label);
11490 }
11491#ifdef SUPPORT_UTF
11492#ifdef COMPILE_PCRE8
11493if (common->utfreadchar != NULL)
11494 {
11495 set_jumps(common->utfreadchar, LABEL());
11496 do_utfreadchar(common);
11497 }
11498if (common->utfreadchar16 != NULL)
11499 {
11500 set_jumps(common->utfreadchar16, LABEL());
11501 do_utfreadchar16(common);
11502 }
11503if (common->utfreadtype8 != NULL)
11504 {
11505 set_jumps(common->utfreadtype8, LABEL());
11506 do_utfreadtype8(common);
11507 }
11508#endif /* COMPILE_PCRE8 */
11509#endif /* SUPPORT_UTF */
11510#ifdef SUPPORT_UCP
11511if (common->getucd != NULL)
11512 {
11513 set_jumps(common->getucd, LABEL());
11514 do_getucd(common);
11515 }
11516#endif
11517
11518SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11519SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11520
11521executable_func = sljit_generate_code(compiler);
11522executable_size = sljit_get_generated_code_size(compiler);
11523label_addr = common->label_addrs;
11524while (label_addr != NULL)
11525 {
11526 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
11527 label_addr = label_addr->next;
11528 }
11529sljit_free_compiler(compiler);
11530if (executable_func == NULL)
11531 {
11532 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11533 return;
11534 }
11535
11536/* Reuse the function descriptor if possible. */
11537if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
11538 functions = (executable_functions *)extra->executable_jit;
11539else
11540 {
11541 /* Note: If your memory-checker has flagged the allocation below as a
11542 * memory leak, it is probably because you either forgot to call
11543 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
11544 * pcre16_extra) object, or you called said function after having
11545 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
11546 * of the object. (The function will only free the JIT data if the
11547 * bit remains set, as the bit indicates that the pointer to the data
11548 * is valid.)
11549 */
11550 functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
11551 if (functions == NULL)
11552 {
11553 /* This case is highly unlikely since we just recently
11554 freed a lot of memory. Not impossible though. */
11555 sljit_free_code(executable_func);
11556 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11557 return;
11558 }
11559 memset(functions, 0, sizeof(executable_functions));
11560 functions->top_bracket = (re->top_bracket + 1) * 2;
11561 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
11562 extra->executable_jit = functions;
11563 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
11564 }
11565
11566functions->executable_funcs[mode] = executable_func;
11567functions->read_only_data_heads[mode] = common->read_only_data_head;
11568functions->executable_sizes[mode] = executable_size;
11569}
11570
11571static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
11572{
11573union {
11574 void *executable_func;
11575 jit_function call_executable_func;
11576} convert_executable_func;
11577sljit_u8 local_space[MACHINE_STACK_SIZE];
11578struct sljit_stack local_stack;
11579
11580local_stack.min_start = local_space;
11581local_stack.start = local_space;
11582local_stack.end = local_space + MACHINE_STACK_SIZE;
11583local_stack.top = local_space + MACHINE_STACK_SIZE;
11584arguments->stack = &local_stack;
11585convert_executable_func.executable_func = executable_func;
11586return convert_executable_func.call_executable_func(arguments);
11587}
11588
11589int
11590PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
11591 int length, int start_offset, int options, int *offsets, int offset_count)
11592{
11593executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11594union {
11595 void *executable_func;
11596 jit_function call_executable_func;
11597} convert_executable_func;
11598jit_arguments arguments;
11599int max_offset_count;
11600int retval;
11601int mode = JIT_COMPILE;
11602
11603if ((options & PCRE_PARTIAL_HARD) != 0)
11604 mode = JIT_PARTIAL_HARD_COMPILE;
11605else if ((options & PCRE_PARTIAL_SOFT) != 0)
11606 mode = JIT_PARTIAL_SOFT_COMPILE;
11607
11608if (functions->executable_funcs[mode] == NULL)
11609 return PCRE_ERROR_JIT_BADOPTION;
11610
11611/* Sanity checks should be handled by pcre_exec. */
11612arguments.str = subject + start_offset;
11613arguments.begin = subject;
11614arguments.end = subject + length;
11615arguments.mark_ptr = NULL;
11616/* JIT decreases this value less frequently than the interpreter. */
11617arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11618if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11619 arguments.limit_match = functions->limit_match;
11620arguments.notbol = (options & PCRE_NOTBOL) != 0;
11621arguments.noteol = (options & PCRE_NOTEOL) != 0;
11622arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11623arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11624arguments.offsets = offsets;
11625arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11626arguments.real_offset_count = offset_count;
11627
11628/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11629the output vector for storing captured strings, with the remainder used as
11630workspace. We don't need the workspace here. For compatibility, we limit the
11631number of captured strings in the same way as pcre_exec(), so that the user
11632gets the same result with and without JIT. */
11633
11634if (offset_count != 2)
11635 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11636max_offset_count = functions->top_bracket;
11637if (offset_count > max_offset_count)
11638 offset_count = max_offset_count;
11639arguments.offset_count = offset_count;
11640
11641if (functions->callback)
11642 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
11643else
11644 arguments.stack = (struct sljit_stack *)functions->userdata;
11645
11646if (arguments.stack == NULL)
11647 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
11648else
11649 {
11650 convert_executable_func.executable_func = functions->executable_funcs[mode];
11651 retval = convert_executable_func.call_executable_func(&arguments);
11652 }
11653
11654if (retval * 2 > offset_count)
11655 retval = 0;
11656if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11657 *(extra_data->mark) = arguments.mark_ptr;
11658
11659return retval;
11660}
11661
11662#if defined COMPILE_PCRE8
11663PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11664pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
11665 PCRE_SPTR subject, int length, int start_offset, int options,
11666 int *offsets, int offset_count, pcre_jit_stack *stack)
11667#elif defined COMPILE_PCRE16
11668PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11669pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
11670 PCRE_SPTR16 subject, int length, int start_offset, int options,
11671 int *offsets, int offset_count, pcre16_jit_stack *stack)
11672#elif defined COMPILE_PCRE32
11673PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11674pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
11675 PCRE_SPTR32 subject, int length, int start_offset, int options,
11676 int *offsets, int offset_count, pcre32_jit_stack *stack)
11677#endif
11678{
11679pcre_uchar *subject_ptr = (pcre_uchar *)subject;
11680executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11681union {
11682 void *executable_func;
11683 jit_function call_executable_func;
11684} convert_executable_func;
11685jit_arguments arguments;
11686int max_offset_count;
11687int retval;
11688int mode = JIT_COMPILE;
11689
11690SLJIT_UNUSED_ARG(argument_re);
11691
11692/* Plausibility checks */
11693if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
11694
11695if ((options & PCRE_PARTIAL_HARD) != 0)
11696 mode = JIT_PARTIAL_HARD_COMPILE;
11697else if ((options & PCRE_PARTIAL_SOFT) != 0)
11698 mode = JIT_PARTIAL_SOFT_COMPILE;
11699
11700if (functions == NULL || functions->executable_funcs[mode] == NULL)
11701 return PCRE_ERROR_JIT_BADOPTION;
11702
11703/* Sanity checks should be handled by pcre_exec. */
11704arguments.stack = (struct sljit_stack *)stack;
11705arguments.str = subject_ptr + start_offset;
11706arguments.begin = subject_ptr;
11707arguments.end = subject_ptr + length;
11708arguments.mark_ptr = NULL;
11709/* JIT decreases this value less frequently than the interpreter. */
11710arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11711if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11712 arguments.limit_match = functions->limit_match;
11713arguments.notbol = (options & PCRE_NOTBOL) != 0;
11714arguments.noteol = (options & PCRE_NOTEOL) != 0;
11715arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11716arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11717arguments.offsets = offsets;
11718arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11719arguments.real_offset_count = offset_count;
11720
11721/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11722the output vector for storing captured strings, with the remainder used as
11723workspace. We don't need the workspace here. For compatibility, we limit the
11724number of captured strings in the same way as pcre_exec(), so that the user
11725gets the same result with and without JIT. */
11726
11727if (offset_count != 2)
11728 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11729max_offset_count = functions->top_bracket;
11730if (offset_count > max_offset_count)
11731 offset_count = max_offset_count;
11732arguments.offset_count = offset_count;
11733
11734convert_executable_func.executable_func = functions->executable_funcs[mode];
11735retval = convert_executable_func.call_executable_func(&arguments);
11736
11737if (retval * 2 > offset_count)
11738 retval = 0;
11739if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11740 *(extra_data->mark) = arguments.mark_ptr;
11741
11742return retval;
11743}
11744
11745void
11746PRIV(jit_free)(void *executable_funcs)
11747{
11748int i;
11749executable_functions *functions = (executable_functions *)executable_funcs;
11750for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11751 {
11752 if (functions->executable_funcs[i] != NULL)
11753 sljit_free_code(functions->executable_funcs[i]);
11754 free_read_only_data(functions->read_only_data_heads[i], NULL);
11755 }
11756SLJIT_FREE(functions, compiler->allocator_data);
11757}
11758
11759int
11760PRIV(jit_get_size)(void *executable_funcs)
11761{
11762int i;
11763sljit_uw size = 0;
11764sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
11765for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11766 size += executable_sizes[i];
11767return (int)size;
11768}
11769
11770const char*
11771PRIV(jit_get_target)(void)
11772{
11773return sljit_get_platform_name();
11774}
11775
11776#if defined COMPILE_PCRE8
11777PCRE_EXP_DECL pcre_jit_stack *
11778pcre_jit_stack_alloc(int startsize, int maxsize)
11779#elif defined COMPILE_PCRE16
11780PCRE_EXP_DECL pcre16_jit_stack *
11781pcre16_jit_stack_alloc(int startsize, int maxsize)
11782#elif defined COMPILE_PCRE32
11783PCRE_EXP_DECL pcre32_jit_stack *
11784pcre32_jit_stack_alloc(int startsize, int maxsize)
11785#endif
11786{
11787if (startsize < 1 || maxsize < 1)
11788 return NULL;
11789if (startsize > maxsize)
11790 startsize = maxsize;
11791startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11792maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11793return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
11794}
11795
11796#if defined COMPILE_PCRE8
11797PCRE_EXP_DECL void
11798pcre_jit_stack_free(pcre_jit_stack *stack)
11799#elif defined COMPILE_PCRE16
11800PCRE_EXP_DECL void
11801pcre16_jit_stack_free(pcre16_jit_stack *stack)
11802#elif defined COMPILE_PCRE32
11803PCRE_EXP_DECL void
11804pcre32_jit_stack_free(pcre32_jit_stack *stack)
11805#endif
11806{
11807sljit_free_stack((struct sljit_stack *)stack, NULL);
11808}
11809
11810#if defined COMPILE_PCRE8
11811PCRE_EXP_DECL void
11812pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11813#elif defined COMPILE_PCRE16
11814PCRE_EXP_DECL void
11815pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11816#elif defined COMPILE_PCRE32
11817PCRE_EXP_DECL void
11818pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11819#endif
11820{
11821executable_functions *functions;
11822if (extra != NULL &&
11823 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
11824 extra->executable_jit != NULL)
11825 {
11826 functions = (executable_functions *)extra->executable_jit;
11827 functions->callback = callback;
11828 functions->userdata = userdata;
11829 }
11830}
11831
11832#if defined COMPILE_PCRE8
11833PCRE_EXP_DECL void
11834pcre_jit_free_unused_memory(void)
11835#elif defined COMPILE_PCRE16
11836PCRE_EXP_DECL void
11837pcre16_jit_free_unused_memory(void)
11838#elif defined COMPILE_PCRE32
11839PCRE_EXP_DECL void
11840pcre32_jit_free_unused_memory(void)
11841#endif
11842{
11843sljit_free_unused_memory_exec();
11844}
11845
11846#else /* SUPPORT_JIT */
11847
11848/* These are dummy functions to avoid linking errors when JIT support is not
11849being compiled. */
11850
11851#if defined COMPILE_PCRE8
11852PCRE_EXP_DECL pcre_jit_stack *
11853pcre_jit_stack_alloc(int startsize, int maxsize)
11854#elif defined COMPILE_PCRE16
11855PCRE_EXP_DECL pcre16_jit_stack *
11856pcre16_jit_stack_alloc(int startsize, int maxsize)
11857#elif defined COMPILE_PCRE32
11858PCRE_EXP_DECL pcre32_jit_stack *
11859pcre32_jit_stack_alloc(int startsize, int maxsize)
11860#endif
11861{
11862(void)startsize;
11863(void)maxsize;
11864return NULL;
11865}
11866
11867#if defined COMPILE_PCRE8
11868PCRE_EXP_DECL void
11869pcre_jit_stack_free(pcre_jit_stack *stack)
11870#elif defined COMPILE_PCRE16
11871PCRE_EXP_DECL void
11872pcre16_jit_stack_free(pcre16_jit_stack *stack)
11873#elif defined COMPILE_PCRE32
11874PCRE_EXP_DECL void
11875pcre32_jit_stack_free(pcre32_jit_stack *stack)
11876#endif
11877{
11878(void)stack;
11879}
11880
11881#if defined COMPILE_PCRE8
11882PCRE_EXP_DECL void
11883pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11884#elif defined COMPILE_PCRE16
11885PCRE_EXP_DECL void
11886pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11887#elif defined COMPILE_PCRE32
11888PCRE_EXP_DECL void
11889pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11890#endif
11891{
11892(void)extra;
11893(void)callback;
11894(void)userdata;
11895}
11896
11897#if defined COMPILE_PCRE8
11898PCRE_EXP_DECL void
11899pcre_jit_free_unused_memory(void)
11900#elif defined COMPILE_PCRE16
11901PCRE_EXP_DECL void
11902pcre16_jit_free_unused_memory(void)
11903#elif defined COMPILE_PCRE32
11904PCRE_EXP_DECL void
11905pcre32_jit_free_unused_memory(void)
11906#endif
11907{
11908}
11909
11910#endif
11911
11912/* End of pcre_jit_compile.c */
11913