compilecode.c source code [MicroPython/extmod/re1.5/compilecode.c]

1	// Copyright 2014 Paul Sokolovsky.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	#include "re1.5.h"
6
7	#define INSERT_CODE(at, num, pc) \
8	((code ? memmove(code + at + num, code + at, pc - at) : 0), pc += num)
9	#define REL(at, to) (to - at - 2)
10	#define EMIT(at, byte) (code ? (code[at] = byte) : (at))
11	#define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err))
12	#define PC (prog->bytelen)
13
14	static void _emit_checked(int at, char code, int* val, bool *err) {
15	*err \|= val != (int8_t)val;
16	if (code) {
17	code[at] = val;
18	}
19	}
20
21	static const char _compilecode(const* char re, ByteProg prog, int sizecode)
22	{
23	char *code = sizecode ? NULL : prog->insts;
24	bool err = false;
25	int start = PC;
26	int term = PC;
27	int alt_label = `0`;
28
29	for (; re && re != `')'`; re++) {
30	switch (*re) {
31	case `'\\'`:
32	re++;
33	if (!re) return* NULL; // Trailing backslash
34	if ((re \| `0x20`) == `'d'` \|\| (re \| `0x20`) == `'s'` \|\| (*re \| `0x20`) == `'w'`) {
35	term = PC;
36	EMIT(PC++, NamedClass);
37	EMIT(PC++, *re);
38	prog->len++;
39	break;
40	}
41	MP_FALLTHROUGH
42	default:
43	term = PC;
44	EMIT(PC++, Char);
45	EMIT(PC++, *re);
46	prog->len++;
47	break;
48	case `'.'`:
49	term = PC;
50	EMIT(PC++, Any);
51	prog->len++;
52	break;
53	case `'['`: {
54	int cnt;
55	term = PC;
56	re++;
57	if (*re == `'^'`) {
58	EMIT(PC++, ClassNot);
59	re++;
60	} else {
61	EMIT(PC++, Class);
62	}
63	PC++; // Skip # of pair byte
64	prog->len++;
65	for (cnt = `0`; *re != `']'`; re++, cnt++) {
66	if (*re == `'\\'`) {
67	++re;
68	}
69	if (!re) return* NULL;
70	EMIT(PC++, *re);
71	if (re[`1`] == `'-'` && re[`2`] != `']'`) {
72	re += `2`;
73	}
74	EMIT(PC++, *re);
75	}
76	EMIT_CHECKED(term + `1`, cnt);
77	break;
78	}
79	case `'('`: {
80	term = PC;
81	int sub = `0`;
82	int capture = re[`1`] != `'?'` \|\| re[`2`] != `':'`;
83
84	if (capture) {
85	sub = ++prog->sub;
86	EMIT(PC++, Save);
87	EMIT_CHECKED(PC++, `2` * sub);
88	prog->len++;
89	} else {
90	re += `2`;
91	}
92
93	re = _compilecode(re + `1`, prog, sizecode);
94	if (re == NULL \|\| re != `')'`) return* NULL; // error, or no matching paren
95
96	if (capture) {
97	EMIT(PC++, Save);
98	EMIT_CHECKED(PC++, `2` * sub + `1`);
99	prog->len++;
100	}
101
102	break;
103	}
104	case `'?'`:
105	if (PC == term) return NULL; // nothing to repeat
106	INSERT_CODE(term, `2`, PC);
107	if (re[`1`] == `'?'`) {
108	EMIT(term, RSplit);
109	re++;
110	} else {
111	EMIT(term, Split);
112	}
113	EMIT_CHECKED(term + `1`, REL(term, PC));
114	prog->len++;
115	term = PC;
116	break;
117	case `'*'`:
118	if (PC == term) return NULL; // nothing to repeat
119	INSERT_CODE(term, `2`, PC);
120	EMIT(PC, Jmp);
121	EMIT_CHECKED(PC + `1`, REL(PC, term));
122	PC += `2`;
123	if (re[`1`] == `'?'`) {
124	EMIT(term, RSplit);
125	re++;
126	} else {
127	EMIT(term, Split);
128	}
129	EMIT_CHECKED(term + `1`, REL(term, PC));
130	prog->len += `2`;
131	term = PC;
132	break;
133	case `'+'`:
134	if (PC == term) return NULL; // nothing to repeat
135	if (re[`1`] == `'?'`) {
136	EMIT(PC, Split);
137	re++;
138	} else {
139	EMIT(PC, RSplit);
140	}
141	EMIT_CHECKED(PC + `1`, REL(PC, term));
142	PC += `2`;
143	prog->len++;
144	term = PC;
145	break;
146	case `'\|'`:
147	if (alt_label) {
148	EMIT_CHECKED(alt_label, REL(alt_label, PC) + `1`);
149	}
150	INSERT_CODE(start, `2`, PC);
151	EMIT(PC++, Jmp);
152	alt_label = PC++;
153	EMIT(start, Split);
154	EMIT_CHECKED(start + `1`, REL(start, PC));
155	prog->len += `2`;
156	term = PC;
157	break;
158	case `'^'`:
159	EMIT(PC++, Bol);
160	prog->len++;
161	term = PC;
162	break;
163	case `'$'`:
164	EMIT(PC++, Eol);
165	prog->len++;
166	term = PC;
167	break;
168	}
169	}
170
171	if (alt_label) {
172	EMIT_CHECKED(alt_label, REL(alt_label, PC) + `1`);
173	}
174	return err ? NULL : re;
175	}
176
177	int re1_5_sizecode(const char *re)
178	{
179	ByteProg dummyprog = {
180	// Save 0, Save 1, Match; more bytes for "search" (vs "match") prefix code
181	.bytelen = `5` + NON_ANCHORED_PREFIX
182	};
183
184	if (_compilecode(re, &dummyprog, /sizecode/`1`) == NULL) return -`1`;
185
186	return dummyprog.bytelen;
187	}
188
189	int re1_5_compilecode(ByteProg prog, const* char *re)
190	{
191	prog->len = `0`;
192	prog->bytelen = `0`;
193	prog->sub = `0`;
194
195	// Add code to implement non-anchored operation ("search"),
196	// for anchored operation ("match"), this code will be just skipped.
197	// TODO: Implement search in much more efficient manner
198	prog->insts[prog->bytelen++] = RSplit;
199	prog->insts[prog->bytelen++] = `3`;
200	prog->insts[prog->bytelen++] = Any;
201	prog->insts[prog->bytelen++] = Jmp;
202	prog->insts[prog->bytelen++] = -`5`;
203	prog->len += `3`;
204
205	prog->insts[prog->bytelen++] = Save;
206	prog->insts[prog->bytelen++] = `0`;
207	prog->len++;
208
209	re = _compilecode(re, prog, /sizecode/`0`);
210	if (re == NULL \|\| re) return* `1`;
211
212	prog->insts[prog->bytelen++] = Save;
213	prog->insts[prog->bytelen++] = `1`;
214	prog->len++;
215
216	prog->insts[prog->bytelen++] = Match;
217	prog->len++;
218
219	return `0`;
220	}
221
222	#if 0
223	int main(int argc, char *argv[])
224	{
225	int pc = `0`;
226	ByteProg *code = re1_5_compilecode(argv[`1`]);
227	re1_5_dumpcode(code);
228	}
229	#endif
230

Browse the source code of MicroPython/extmod/re1.5/compilecode.c