1 | // Copyright 2007-2009 Russ Cox. All Rights Reserved. |
2 | // Copyright 2014 Paul Sokolovsky. |
3 | // Use of this source code is governed by a BSD-style |
4 | // license that can be found in the LICENSE file. |
5 | |
6 | #ifndef _RE1_5_REGEXP__H |
7 | #define _RE1_5_REGEXP__H |
8 | |
9 | #include <stdio.h> |
10 | #include <stdlib.h> |
11 | #include <string.h> |
12 | #include <stdarg.h> |
13 | #include <assert.h> |
14 | |
15 | #define nil ((void*)0) |
16 | #define nelem(x) (sizeof(x)/sizeof((x)[0])) |
17 | |
18 | typedef struct Regexp Regexp; |
19 | typedef struct Prog Prog; |
20 | typedef struct ByteProg ByteProg; |
21 | typedef struct Inst Inst; |
22 | typedef struct Subject Subject; |
23 | |
24 | struct Regexp |
25 | { |
26 | int type; |
27 | int n; |
28 | int ch; |
29 | Regexp *left; |
30 | Regexp *right; |
31 | }; |
32 | |
33 | enum /* Regexp.type */ |
34 | { |
35 | Alt = 1, |
36 | Cat, |
37 | Lit, |
38 | Dot, |
39 | Paren, |
40 | Quest, |
41 | Star, |
42 | Plus, |
43 | }; |
44 | |
45 | Regexp *parse(char*); |
46 | Regexp *reg(int type, Regexp *left, Regexp *right); |
47 | void printre(Regexp*); |
48 | #ifndef re1_5_fatal |
49 | void re1_5_fatal(char*); |
50 | #endif |
51 | #ifndef re1_5_stack_chk |
52 | #define re1_5_stack_chk() |
53 | #endif |
54 | void *mal(int); |
55 | |
56 | struct Prog |
57 | { |
58 | Inst *start; |
59 | int len; |
60 | }; |
61 | |
62 | struct ByteProg |
63 | { |
64 | int bytelen; |
65 | int len; |
66 | int sub; |
67 | char insts[0]; |
68 | }; |
69 | |
70 | struct Inst |
71 | { |
72 | int opcode; |
73 | int c; |
74 | int n; |
75 | Inst *x; |
76 | Inst *y; |
77 | int gen; // global state, oooh! |
78 | }; |
79 | |
80 | enum /* Inst.opcode */ |
81 | { |
82 | // Instructions which consume input bytes (and thus fail if none left) |
83 | CONSUMERS = 1, |
84 | Char = CONSUMERS, |
85 | Any, |
86 | Class, |
87 | ClassNot, |
88 | NamedClass, |
89 | |
90 | ASSERTS = 0x50, |
91 | Bol = ASSERTS, |
92 | Eol, |
93 | |
94 | // Instructions which take relative offset as arg |
95 | JUMPS = 0x60, |
96 | Jmp = JUMPS, |
97 | Split, |
98 | RSplit, |
99 | |
100 | // Other (special) instructions |
101 | Save = 0x7e, |
102 | Match = 0x7f, |
103 | }; |
104 | |
105 | #define inst_is_consumer(inst) ((inst) < ASSERTS) |
106 | #define inst_is_jump(inst) ((inst) & 0x70 == JUMPS) |
107 | |
108 | Prog *compile(Regexp*); |
109 | void printprog(Prog*); |
110 | |
111 | extern int gen; |
112 | |
113 | enum { |
114 | MAXSUB = 20 |
115 | }; |
116 | |
117 | typedef struct Sub Sub; |
118 | |
119 | struct Sub |
120 | { |
121 | int ref; |
122 | int nsub; |
123 | const char *sub[MAXSUB]; |
124 | }; |
125 | |
126 | Sub *newsub(int n); |
127 | Sub *incref(Sub*); |
128 | Sub *copy(Sub*); |
129 | Sub *update(Sub*, int, const char*); |
130 | void decref(Sub*); |
131 | |
132 | struct Subject { |
133 | const char *begin; |
134 | const char *end; |
135 | }; |
136 | |
137 | |
138 | #define NON_ANCHORED_PREFIX 5 |
139 | #define HANDLE_ANCHORED(bytecode, is_anchored) ((is_anchored) ? (bytecode) + NON_ANCHORED_PREFIX : (bytecode)) |
140 | |
141 | int re1_5_backtrack(ByteProg*, Subject*, const char**, int, int); |
142 | int re1_5_pikevm(ByteProg*, Subject*, const char**, int, int); |
143 | int re1_5_recursiveloopprog(ByteProg*, Subject*, const char**, int, int); |
144 | int re1_5_recursiveprog(ByteProg*, Subject*, const char**, int, int); |
145 | int re1_5_thompsonvm(ByteProg*, Subject*, const char**, int, int); |
146 | |
147 | int re1_5_sizecode(const char *re); |
148 | int re1_5_compilecode(ByteProg *prog, const char *re); |
149 | void re1_5_dumpcode(ByteProg *prog); |
150 | void cleanmarks(ByteProg *prog); |
151 | int _re1_5_classmatch(const char *pc, const char *sp); |
152 | int _re1_5_namedclassmatch(const char *pc, const char *sp); |
153 | |
154 | #endif /*_RE1_5_REGEXP__H*/ |
155 | |