lpcode.c source code [TIC-80/vendor/lpeg/lpcode.c]

1	/*
2	** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $
3	** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4	*/
5
6	#include <limits.h>
7
8
9	#include "lua.h"
10	#include "lauxlib.h"
11
12	#include "lptypes.h"
13	#include "lpcode.h"
14
15
16	/ signals a "no-instruction /
17	#define NOINST -1
18
19
20
21	static const Charset fullset_ =
22	{{`0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`,
23	`0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`,
24	`0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`,
25	`0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`, `0xFF`}};
26
27	static const Charset *fullset = &fullset_;
28
29	/*
30	** {======================================================
31	** Analysis and some optimizations
32	** =======================================================
33	*/
34
35	/*
36	** Check whether a charset is empty (returns IFail), singleton (IChar),
37	** full (IAny), or none of those (ISet). When singleton, '*c' returns
38	** which character it is. (When generic set, the set was the input,
39	** so there is no need to return it.)
40	*/
41	static Opcode charsettype (const byte cs, int* *c) {
42	int count = `0`; / number of characters in the set /
43	int i;
44	int candidate = -`1`; / candidate position for the singleton char /
45	for (i = `0`; i < CHARSETSIZE; i++) { / for each byte /
46	int b = cs[i];
47	if (b == `0`) { / is byte empty? /
48	if (count > `1`) / was set neither empty nor singleton? /
49	return ISet; / neither full nor empty nor singleton /
50	/ else set is still empty or singleton /
51	}
52	else if (b == `0xFF`) { / is byte full? /
53	if (count < (i * BITSPERCHAR)) / was set not full? /
54	return ISet; / neither full nor empty nor singleton /
55	else count += BITSPERCHAR; / set is still full /
56	}
57	else if ((b & (b - `1`)) == `0`) { / has byte only one bit? /
58	if (count > `0`) / was set not empty? /
59	return ISet; / neither full nor empty nor singleton /
60	else { / set has only one char till now; track it /
61	count++;
62	candidate = i;
63	}
64	}
65	else return ISet; / byte is neither empty, full, nor singleton /
66	}
67	switch (count) {
68	case `0`: return IFail; / empty set /
69	case `1`: { / singleton; find character bit inside byte /
70	int b = cs[candidate];
71	c = candidate BITSPERCHAR;
72	if ((b & `0xF0`) != `0`) { *c += `4`; b >>= `4`; }
73	if ((b & `0x0C`) != `0`) { *c += `2`; b >>= `2`; }
74	if ((b & `0x02`) != `0`) { *c += `1`; }
75	return IChar;
76	}
77	default: {
78	assert(count == CHARSETSIZE * BITSPERCHAR); / full set /
79	return IAny;
80	}
81	}
82	}
83
84
85	/*
86	** A few basic operations on Charsets
87	*/
88	static void cs_complement (Charset *cs) {
89	loopset(i, cs->cs[i] = ~cs->cs[i]);
90	}
91
92	static int cs_equal (const byte cs1, const* byte *cs2) {
93	loopset(i, if (cs1[i] != cs2[i]) return `0`);
94	return `1`;
95	}
96
97	static int cs_disjoint (const Charset cs1, const* Charset *cs2) {
98	loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != `0`) return `0`;)
99	return `1`;
100	}
101
102
103	/*
104	** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
105	** charset and return 1; else return 0.
106	*/
107	int tocharset (TTree tree, Charset cs) {
108	switch (tree->tag) {
109	case TSet: { / copy set /
110	loopset(i, cs->cs[i] = treebuffer(tree)[i]);
111	return `1`;
112	}
113	case TChar: { / only one char /
114	assert(`0` <= tree->u.n && tree->u.n <= UCHAR_MAX);
115	loopset(i, cs->cs[i] = `0`); / erase all chars /
116	setchar(cs->cs, tree->u.n); / add that one /
117	return `1`;
118	}
119	case TAny: {
120	loopset(i, cs->cs[i] = `0xFF`); / add all characters to the set /
121	return `1`;
122	}
123	default: return `0`;
124	}
125	}
126
127
128	/*
129	** Visit a TCall node taking care to stop recursion. If node not yet
130	** visited, return 'f(sib2(tree))', otherwise return 'def' (default
131	** value)
132	*/
133	static int callrecursive (TTree tree, int* f (TTree t), int* def) {
134	int key = tree->key;
135	assert(tree->tag == TCall);
136	assert(sib2(tree)->tag == TRule);
137	if (key == `0`) / node already visited? /
138	return def; / return default value /
139	else { / first visit /
140	int result;
141	tree->key = `0`; / mark call as already visited /
142	result = f(sib2(tree)); / go to called rule /
143	tree->key = key; / restore tree /
144	return result;
145	}
146	}
147
148
149	/*
150	** Check whether a pattern tree has captures
151	*/
152	int hascaptures (TTree *tree) {
153	tailcall:
154	switch (tree->tag) {
155	case TCapture: case TRunTime:
156	return `1`;
157	case TCall:
158	return callrecursive(tree, hascaptures, `0`);
159	case TRule: / do not follow siblings /
160	tree = sib1(tree); goto tailcall;
161	case TOpenCall: assert(`0`);
162	default: {
163	switch (numsiblings[tree->tag]) {
164	case `1`: / return hascaptures(sib1(tree)); /
165	tree = sib1(tree); goto tailcall;
166	case `2`:
167	if (hascaptures(sib1(tree)))
168	return `1`;
169	/ else return hascaptures(sib2(tree)); /
170	tree = sib2(tree); goto tailcall;
171	default: assert(numsiblings[tree->tag] == `0`); return `0`;
172	}
173	}
174	}
175	}
176
177
178	/*
179	** Checks how a pattern behaves regarding the empty string,
180	** in one of two different ways:
181	** A pattern is nullable if it can match without consuming any character;
182	** A pattern is nofail if it never fails for any string
183	** (including the empty string).
184	** The difference is only for predicates and run-time captures;
185	** for other patterns, the two properties are equivalent.
186	** (With predicates, &'a' is nullable but not nofail. Of course,
187	** nofail => nullable.)
188	** These functions are all convervative in the following way:
189	** p is nullable => nullable(p)
190	** nofail(p) => p cannot fail
191	** The function assumes that TOpenCall is not nullable;
192	** this will be checked again when the grammar is fixed.
193	** Run-time captures can do whatever they want, so the result
194	** is conservative.
195	*/
196	int checkaux (TTree tree, int* pred) {
197	tailcall:
198	switch (tree->tag) {
199	case TChar: case TSet: case TAny:
200	case TFalse: case TOpenCall:
201	return `0`; / not nullable /
202	case TRep: case TTrue:
203	return `1`; / no fail /
204	case TNot: case TBehind: / can match empty, but can fail /
205	if (pred == PEnofail) return `0`;
206	else return `1`; / PEnullable /
207	case TAnd: / can match empty; fail iff body does /
208	if (pred == PEnullable) return `1`;
209	/ else return checkaux(sib1(tree), pred); /
210	tree = sib1(tree); goto tailcall;
211	case TRunTime: / can fail; match empty iff body does /
212	if (pred == PEnofail) return `0`;
213	/ else return checkaux(sib1(tree), pred); /
214	tree = sib1(tree); goto tailcall;
215	case TSeq:
216	if (!checkaux(sib1(tree), pred)) return `0`;
217	/ else return checkaux(sib2(tree), pred); /
218	tree = sib2(tree); goto tailcall;
219	case TChoice:
220	if (checkaux(sib2(tree), pred)) return `1`;
221	/ else return checkaux(sib1(tree), pred); /
222	tree = sib1(tree); goto tailcall;
223	case TCapture: case TGrammar: case TRule:
224	/ return checkaux(sib1(tree), pred); /
225	tree = sib1(tree); goto tailcall;
226	case TCall: / return checkaux(sib2(tree), pred); /
227	tree = sib2(tree); goto tailcall;
228	default: assert(`0`); return `0`;
229	}
230	}
231
232
233	/*
234	** number of characters to match a pattern (or -1 if variable)
235	*/
236	int fixedlen (TTree *tree) {
237	int len = `0`; / to accumulate in tail calls /
238	tailcall:
239	switch (tree->tag) {
240	case TChar: case TSet: case TAny:
241	return len + `1`;
242	case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
243	return len;
244	case TRep: case TRunTime: case TOpenCall:
245	return -`1`;
246	case TCapture: case TRule: case TGrammar:
247	/ return fixedlen(sib1(tree)); /
248	tree = sib1(tree); goto tailcall;
249	case TCall: {
250	int n1 = callrecursive(tree, fixedlen, -`1`);
251	if (n1 < `0`)
252	return -`1`;
253	else
254	return len + n1;
255	}
256	case TSeq: {
257	int n1 = fixedlen(sib1(tree));
258	if (n1 < `0`)
259	return -`1`;
260	/ else return fixedlen(sib2(tree)) + len; /
261	len += n1; tree = sib2(tree); goto tailcall;
262	}
263	case TChoice: {
264	int n1 = fixedlen(sib1(tree));
265	int n2 = fixedlen(sib2(tree));
266	if (n1 != n2 \|\| n1 < `0`)
267	return -`1`;
268	else
269	return len + n1;
270	}
271	default: assert(`0`); return `0`;
272	};
273	}
274
275
276	/*
277	** Computes the 'first set' of a pattern.
278	** The result is a conservative aproximation:
279	** match p ax -> x (for some x) ==> a belongs to first(p)
280	** or
281	** a not in first(p) ==> match p ax -> fail (for all x)
282	**
283	** The set 'follow' is the first set of what follows the
284	** pattern (full set if nothing follows it).
285	**
286	** The function returns 0 when this resulting set can be used for
287	** test instructions that avoid the pattern altogether.
288	** A non-zero return can happen for two reasons:
289	** 1) match p '' -> '' ==> return has bit 1 set
290	** (tests cannot be used because they would always fail for an empty input);
291	** 2) there is a match-time capture ==> return has bit 2 set
292	** (optimizations should not bypass match-time captures).
293	*/
294	static int getfirst (TTree tree, const* Charset follow, Charset firstset) {
295	tailcall:
296	switch (tree->tag) {
297	case TChar: case TSet: case TAny: {
298	tocharset(tree, firstset);
299	return `0`;
300	}
301	case TTrue: {
302	loopset(i, firstset->cs[i] = follow->cs[i]);
303	return `1`; / accepts the empty string /
304	}
305	case TFalse: {
306	loopset(i, firstset->cs[i] = `0`);
307	return `0`;
308	}
309	case TChoice: {
310	Charset csaux;
311	int e1 = getfirst(sib1(tree), follow, firstset);
312	int e2 = getfirst(sib2(tree), follow, &csaux);
313	loopset(i, firstset->cs[i] \|= csaux.cs[i]);
314	return e1 \| e2;
315	}
316	case TSeq: {
317	if (!nullable(sib1(tree))) {
318	/ when p1 is not nullable, p2 has nothing to contribute;*
319	return getfirst(sib1(tree), fullset, firstset); /*
320	tree = sib1(tree); follow = fullset; goto tailcall;
321	}
322	else { / FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) /
323	Charset csaux;
324	int e2 = getfirst(sib2(tree), follow, &csaux);
325	int e1 = getfirst(sib1(tree), &csaux, firstset);
326	if (e1 == `0`) return `0`; / 'e1' ensures that first can be used /
327	else if ((e1 \| e2) & `2`) / one of the children has a matchtime? /
328	return `2`; / pattern has a matchtime capture /
329	else return e2; / else depends on 'e2' /
330	}
331	}
332	case TRep: {
333	getfirst(sib1(tree), follow, firstset);
334	loopset(i, firstset->cs[i] \|= follow->cs[i]);
335	return `1`; / accept the empty string /
336	}
337	case TCapture: case TGrammar: case TRule: {
338	/ return getfirst(sib1(tree), follow, firstset); /
339	tree = sib1(tree); goto tailcall;
340	}
341	case TRunTime: { / function invalidates any follow info. /
342	int e = getfirst(sib1(tree), fullset, firstset);
343	if (e) return `2`; / function is not "protected"? /
344	else return `0`; / pattern inside capture ensures first can be used /
345	}
346	case TCall: {
347	/ return getfirst(sib2(tree), follow, firstset); /
348	tree = sib2(tree); goto tailcall;
349	}
350	case TAnd: {
351	int e = getfirst(sib1(tree), follow, firstset);
352	loopset(i, firstset->cs[i] &= follow->cs[i]);
353	return e;
354	}
355	case TNot: {
356	if (tocharset(sib1(tree), firstset)) {
357	cs_complement(firstset);
358	return `1`;
359	}
360	/ else go through /
361	}
362	case TBehind: { / instruction gives no new information /
363	/ call 'getfirst' only to check for math-time captures /
364	int e = getfirst(sib1(tree), follow, firstset);
365	loopset(i, firstset->cs[i] = follow->cs[i]); / uses follow /
366	return e \| `1`; / always can accept the empty string /
367	}
368	default: assert(`0`); return `0`;
369	}
370	}
371
372
373	/*
374	** If 'headfail(tree)' true, then 'tree' can fail only depending on the
375	** next character of the subject.
376	*/
377	static int headfail (TTree *tree) {
378	tailcall:
379	switch (tree->tag) {
380	case TChar: case TSet: case TAny: case TFalse:
381	return `1`;
382	case TTrue: case TRep: case TRunTime: case TNot:
383	case TBehind:
384	return `0`;
385	case TCapture: case TGrammar: case TRule: case TAnd:
386	tree = sib1(tree); goto tailcall; / return headfail(sib1(tree)); /
387	case TCall:
388	tree = sib2(tree); goto tailcall; / return headfail(sib2(tree)); /
389	case TSeq:
390	if (!nofail(sib2(tree))) return `0`;
391	/ else return headfail(sib1(tree)); /
392	tree = sib1(tree); goto tailcall;
393	case TChoice:
394	if (!headfail(sib1(tree))) return `0`;
395	/ else return headfail(sib2(tree)); /
396	tree = sib2(tree); goto tailcall;
397	default: assert(`0`); return `0`;
398	}
399	}
400
401
402	/*
403	** Check whether the code generation for the given tree can benefit
404	** from a follow set (to avoid computing the follow set when it is
405	** not needed)
406	*/
407	static int needfollow (TTree *tree) {
408	tailcall:
409	switch (tree->tag) {
410	case TChar: case TSet: case TAny:
411	case TFalse: case TTrue: case TAnd: case TNot:
412	case TRunTime: case TGrammar: case TCall: case TBehind:
413	return `0`;
414	case TChoice: case TRep:
415	return `1`;
416	case TCapture:
417	tree = sib1(tree); goto tailcall;
418	case TSeq:
419	tree = sib2(tree); goto tailcall;
420	default: assert(`0`); return `0`;
421	}
422	}
423
424	/ }====================================================== /
425
426
427
428	/*
429	** {======================================================
430	** Code generation
431	** =======================================================
432	*/
433
434
435	/*
436	** size of an instruction
437	*/
438	int sizei (const Instruction *i) {
439	switch((Opcode)i->i.code) {
440	case ISet: case ISpan: return CHARSETINSTSIZE;
441	case ITestSet: return CHARSETINSTSIZE + `1`;
442	case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall:
443	case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit:
444	return `2`;
445	default: return `1`;
446	}
447	}
448
449
450	/*
451	** state for the compiler
452	*/
453	typedef struct CompileState {
454	Pattern p; /* pattern being compiled /
455	int ncode; / next position in p->code to be filled /
456	lua_State *L;
457	} CompileState;
458
459
460	/*
461	** code generation is recursive; 'opt' indicates that the code is being
462	** generated as the last thing inside an optional pattern (so, if that
463	** code is optional too, it can reuse the 'IChoice' already in place for
464	** the outer pattern). 'tt' points to a previous test protecting this
465	** code (or NOINST). 'fl' is the follow set of the pattern.
466	*/
467	static void codegen (CompileState compst, TTree tree, int opt, int tt,
468	const Charset *fl);
469
470
471	void realloccode (lua_State L, Pattern p, int nsize) {
472	void *ud;
473	lua_Alloc f = lua_getallocf(L, &ud);
474	void newblock = f(ud, p->code, p->codesize sizeof(Instruction),
475	nsize * sizeof(Instruction));
476	if (newblock == NULL && nsize > `0`)
477	luaL_error(L, "not enough memory");
478	p->code = (Instruction *)newblock;
479	p->codesize = nsize;
480	}
481
482
483	static int nextinstruction (CompileState *compst) {
484	int size = compst->p->codesize;
485	if (compst->ncode >= size)
486	realloccode(compst->L, compst->p, size * `2`);
487	return compst->ncode++;
488	}
489
490
491	#define getinstr(cs,i) ((cs)->p->code[i])
492
493
494	static int addinstruction (CompileState compst, Opcode op, int* aux) {
495	int i = nextinstruction(compst);
496	getinstr(compst, i).i.code = op;
497	getinstr(compst, i).i.aux = aux;
498	return i;
499	}
500
501
502	/*
503	** Add an instruction followed by space for an offset (to be set later)
504	*/
505	static int addoffsetinst (CompileState *compst, Opcode op) {
506	int i = addinstruction(compst, op, `0`); / instruction /
507	addinstruction(compst, (Opcode)`0`, `0`); / open space for offset /
508	assert(op == ITestSet \|\| sizei(&getinstr(compst, i)) == `2`);
509	return i;
510	}
511
512
513	/*
514	** Set the offset of an instruction
515	*/
516	static void setoffset (CompileState compst, int* instruction, int offset) {
517	getinstr(compst, instruction + `1`).offset = offset;
518	}
519
520
521	/*
522	** Add a capture instruction:
523	** 'op' is the capture instruction; 'cap' the capture kind;
524	** 'key' the key into ktable; 'aux' is the optional capture offset
525	**
526	*/
527	static int addinstcap (CompileState compst, Opcode op, int* cap, int key,
528	int aux) {
529	int i = addinstruction(compst, op, joinkindoff(cap, aux));
530	getinstr(compst, i).i.key = key;
531	return i;
532	}
533
534
535	#define gethere(compst) ((compst)->ncode)
536
537	#define target(code,i) ((i) + code[i + 1].offset)
538
539
540	/*
541	** Patch 'instruction' to jump to 'target'
542	*/
543	static void jumptothere (CompileState compst, int* instruction, int target) {
544	if (instruction >= `0`)
545	setoffset(compst, instruction, target - instruction);
546	}
547
548
549	/*
550	** Patch 'instruction' to jump to current position
551	*/
552	static void jumptohere (CompileState compst, int* instruction) {
553	jumptothere(compst, instruction, gethere(compst));
554	}
555
556
557	/*
558	** Code an IChar instruction, or IAny if there is an equivalent
559	** test dominating it
560	*/
561	static void codechar (CompileState compst, int* c, int tt) {
562	if (tt >= `0` && getinstr(compst, tt).i.code == ITestChar &&
563	getinstr(compst, tt).i.aux == c)
564	addinstruction(compst, IAny, `0`);
565	else
566	addinstruction(compst, IChar, c);
567	}
568
569
570	/*
571	** Add a charset posfix to an instruction
572	*/
573	static void addcharset (CompileState compst, const* byte *cs) {
574	int p = gethere(compst);
575	int i;
576	for (i = `0`; i < (int)CHARSETINSTSIZE - `1`; i++)
577	nextinstruction(compst); / space for buffer /
578	/ fill buffer with charset /
579	loopset(j, getinstr(compst, p).buff[j] = cs[j]);
580	}
581
582
583	/*
584	** code a char set, optimizing unit sets for IChar, "complete"
585	** sets for IAny, and empty sets for IFail; also use an IAny
586	** when instruction is dominated by an equivalent test.
587	*/
588	static void codecharset (CompileState compst, const* byte cs, int* tt) {
589	int c = `0`; / (=) to avoid warnings /
590	Opcode op = charsettype(cs, &c);
591	switch (op) {
592	case IChar: codechar(compst, c, tt); break;
593	case ISet: { / non-trivial set? /
594	if (tt >= `0` && getinstr(compst, tt).i.code == ITestSet &&
595	cs_equal(cs, getinstr(compst, tt + `2`).buff))
596	addinstruction(compst, IAny, `0`);
597	else {
598	addinstruction(compst, ISet, `0`);
599	addcharset(compst, cs);
600	}
601	break;
602	}
603	default: addinstruction(compst, op, c); break;
604	}
605	}
606
607
608	/*
609	** code a test set, optimizing unit sets for ITestChar, "complete"
610	** sets for ITestAny, and empty sets for IJmp (always fails).
611	** 'e' is true iff test should accept the empty string. (Test
612	** instructions in the current VM never accept the empty string.)
613	*/
614	static int codetestset (CompileState compst, Charset cs, int e) {
615	if (e) return NOINST; / no test /
616	else {
617	int c = `0`;
618	Opcode op = charsettype(cs->cs, &c);
619	switch (op) {
620	case IFail: return addoffsetinst(compst, IJmp); / always jump /
621	case IAny: return addoffsetinst(compst, ITestAny);
622	case IChar: {
623	int i = addoffsetinst(compst, ITestChar);
624	getinstr(compst, i).i.aux = c;
625	return i;
626	}
627	case ISet: {
628	int i = addoffsetinst(compst, ITestSet);
629	addcharset(compst, cs->cs);
630	return i;
631	}
632	default: assert(`0`); return `0`;
633	}
634	}
635	}
636
637
638	/*
639	** Find the final destination of a sequence of jumps
640	*/
641	static int finaltarget (Instruction code, int* i) {
642	while (code[i].i.code == IJmp)
643	i = target(code, i);
644	return i;
645	}
646
647
648	/*
649	** final label (after traversing any jumps)
650	*/
651	static int finallabel (Instruction code, int* i) {
652	return finaltarget(code, target(code, i));
653	}
654
655
656	/*
657	** <behind(p)> == behind n; <p> (where n = fixedlen(p))
658	*/
659	static void codebehind (CompileState compst, TTree tree) {
660	if (tree->u.n > `0`)
661	addinstruction(compst, IBehind, tree->u.n);
662	codegen(compst, sib1(tree), `0`, NOINST, fullset);
663	}
664
665
666	/*
667	** Choice; optimizations:
668	** - when p1 is headfail or
669	** when first(p1) and first(p2) are disjoint, than
670	** a character not in first(p1) cannot go to p1, and a character
671	** in first(p1) cannot go to p2 (at it is not in first(p2)).
672	** (The optimization is not valid if p1 accepts the empty string,
673	** as then there is no character at all...)
674	** - when p2 is empty and opt is true; a IPartialCommit can reuse
675	** the Choice already active in the stack.
676	*/
677	static void codechoice (CompileState compst, TTree p1, TTree p2, int* opt,
678	const Charset *fl) {
679	int emptyp2 = (p2->tag == TTrue);
680	Charset cs1, cs2;
681	int e1 = getfirst(p1, fullset, &cs1);
682	if (headfail(p1) \|\|
683	(!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) {
684	/ <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: /
685	int test = codetestset(compst, &cs1, `0`);
686	int jmp = NOINST;
687	codegen(compst, p1, `0`, test, fl);
688	if (!emptyp2)
689	jmp = addoffsetinst(compst, IJmp);
690	jumptohere(compst, test);
691	codegen(compst, p2, opt, NOINST, fl);
692	jumptohere(compst, jmp);
693	}
694	else if (opt && emptyp2) {
695	/ p1? == IPartialCommit; p1 /
696	jumptohere(compst, addoffsetinst(compst, IPartialCommit));
697	codegen(compst, p1, `1`, NOINST, fullset);
698	}
699	else {
700	/ <p1 / p2> ==*
701	test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: /*
702	int pcommit;
703	int test = codetestset(compst, &cs1, e1);
704	int pchoice = addoffsetinst(compst, IChoice);
705	codegen(compst, p1, emptyp2, test, fullset);
706	pcommit = addoffsetinst(compst, ICommit);
707	jumptohere(compst, pchoice);
708	jumptohere(compst, test);
709	codegen(compst, p2, opt, NOINST, fl);
710	jumptohere(compst, pcommit);
711	}
712	}
713
714
715	/*
716	** And predicate
717	** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
718	** (valid only when 'p' has no captures)
719	*/
720	static void codeand (CompileState compst, TTree tree, int tt) {
721	int n = fixedlen(tree);
722	if (n >= `0` && n <= MAXBEHIND && !hascaptures(tree)) {
723	codegen(compst, tree, `0`, tt, fullset);
724	if (n > `0`)
725	addinstruction(compst, IBehind, n);
726	}
727	else { / default: Choice L1; p1; BackCommit L2; L1: Fail; L2: /
728	int pcommit;
729	int pchoice = addoffsetinst(compst, IChoice);
730	codegen(compst, tree, `0`, tt, fullset);
731	pcommit = addoffsetinst(compst, IBackCommit);
732	jumptohere(compst, pchoice);
733	addinstruction(compst, IFail, `0`);
734	jumptohere(compst, pcommit);
735	}
736	}
737
738
739	/*
740	** Captures: if pattern has fixed (and not too big) length, and it
741	** has no nested captures, use a single IFullCapture instruction
742	** after the match; otherwise, enclose the pattern with OpenCapture -
743	** CloseCapture.
744	*/
745	static void codecapture (CompileState compst, TTree tree, int tt,
746	const Charset *fl) {
747	int len = fixedlen(sib1(tree));
748	if (len >= `0` && len <= MAXOFF && !hascaptures(sib1(tree))) {
749	codegen(compst, sib1(tree), `0`, tt, fl);
750	addinstcap(compst, IFullCapture, tree->cap, tree->key, len);
751	}
752	else {
753	addinstcap(compst, IOpenCapture, tree->cap, tree->key, `0`);
754	codegen(compst, sib1(tree), `0`, tt, fl);
755	addinstcap(compst, ICloseCapture, Cclose, `0`, `0`);
756	}
757	}
758
759
760	static void coderuntime (CompileState compst, TTree tree, int tt) {
761	addinstcap(compst, IOpenCapture, Cgroup, tree->key, `0`);
762	codegen(compst, sib1(tree), `0`, tt, fullset);
763	addinstcap(compst, ICloseRunTime, Cclose, `0`, `0`);
764	}
765
766
767	/*
768	** Repetion; optimizations:
769	** When pattern is a charset, can use special instruction ISpan.
770	** When pattern is head fail, or if it starts with characters that
771	** are disjoint from what follows the repetions, a simple test
772	** is enough (a fail inside the repetition would backtrack to fail
773	** again in the following pattern, so there is no need for a choice).
774	** When 'opt' is true, the repetion can reuse the Choice already
775	** active in the stack.
776	*/
777	static void coderep (CompileState compst, TTree tree, int opt,
778	const Charset *fl) {
779	Charset st;
780	if (tocharset(tree, &st)) {
781	addinstruction(compst, ISpan, `0`);
782	addcharset(compst, st.cs);
783	}
784	else {
785	int e1 = getfirst(tree, fullset, &st);
786	if (headfail(tree) \|\| (!e1 && cs_disjoint(&st, fl))) {
787	/ L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: /
788	int jmp;
789	int test = codetestset(compst, &st, `0`);
790	codegen(compst, tree, `0`, test, fullset);
791	jmp = addoffsetinst(compst, IJmp);
792	jumptohere(compst, test);
793	jumptothere(compst, jmp, test);
794	}
795	else {
796	/ test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: /
797	/ or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; /
798	int commit, l2;
799	int test = codetestset(compst, &st, e1);
800	int pchoice = NOINST;
801	if (opt)
802	jumptohere(compst, addoffsetinst(compst, IPartialCommit));
803	else
804	pchoice = addoffsetinst(compst, IChoice);
805	l2 = gethere(compst);
806	codegen(compst, tree, `0`, NOINST, fullset);
807	commit = addoffsetinst(compst, IPartialCommit);
808	jumptothere(compst, commit, l2);
809	jumptohere(compst, pchoice);
810	jumptohere(compst, test);
811	}
812	}
813	}
814
815
816	/*
817	** Not predicate; optimizations:
818	** In any case, if first test fails, 'not' succeeds, so it can jump to
819	** the end. If pattern is headfail, that is all (it cannot fail
820	** in other parts); this case includes 'not' of simple sets. Otherwise,
821	** use the default code (a choice plus a failtwice).
822	*/
823	static void codenot (CompileState compst, TTree tree) {
824	Charset st;
825	int e = getfirst(tree, fullset, &st);
826	int test = codetestset(compst, &st, e);
827	if (headfail(tree)) / test (fail(p1)) -> L1; fail; L1: /
828	addinstruction(compst, IFail, `0`);
829	else {
830	/ test(fail(p))-> L1; choice L1; <p>; failtwice; L1: /
831	int pchoice = addoffsetinst(compst, IChoice);
832	codegen(compst, tree, `0`, NOINST, fullset);
833	addinstruction(compst, IFailTwice, `0`);
834	jumptohere(compst, pchoice);
835	}
836	jumptohere(compst, test);
837	}
838
839
840	/*
841	** change open calls to calls, using list 'positions' to find
842	** correct offsets; also optimize tail calls
843	*/
844	static void correctcalls (CompileState compst, int* *positions,
845	int from, int to) {
846	int i;
847	Instruction *code = compst->p->code;
848	for (i = from; i < to; i += sizei(&code[i])) {
849	if (code[i].i.code == IOpenCall) {
850	int n = code[i].i.key; / rule number /
851	int rule = positions[n]; / rule position /
852	assert(rule == from \|\| code[rule - `1`].i.code == IRet);
853	if (code[finaltarget(code, i + `2`)].i.code == IRet) / call; ret ? /
854	code[i].i.code = IJmp; / tail call /
855	else
856	code[i].i.code = ICall;
857	jumptothere(compst, i, rule); / call jumps to respective rule /
858	}
859	}
860	assert(i == to);
861	}
862
863
864	/*
865	** Code for a grammar:
866	** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
867	*/
868	static void codegrammar (CompileState compst, TTree grammar) {
869	int positions[MAXRULES];
870	int rulenumber = `0`;
871	TTree *rule;
872	int firstcall = addoffsetinst(compst, ICall); / call initial rule /
873	int jumptoend = addoffsetinst(compst, IJmp); / jump to the end /
874	int start = gethere(compst); / here starts the initial rule /
875	jumptohere(compst, firstcall);
876	for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
877	positions[rulenumber++] = gethere(compst); / save rule position /
878	codegen(compst, sib1(rule), `0`, NOINST, fullset); / code rule /
879	addinstruction(compst, IRet, `0`);
880	}
881	assert(rule->tag == TTrue);
882	jumptohere(compst, jumptoend);
883	correctcalls(compst, positions, start, gethere(compst));
884	}
885
886
887	static void codecall (CompileState compst, TTree call) {
888	int c = addoffsetinst(compst, IOpenCall); / to be corrected later /
889	getinstr(compst, c).i.key = sib2(call)->cap; / rule number /
890	assert(sib2(call)->tag == TRule);
891	}
892
893
894	/*
895	** Code first child of a sequence
896	** (second child is called in-place to allow tail call)
897	** Return 'tt' for second child
898	*/
899	static int codeseq1 (CompileState compst, TTree p1, TTree *p2,
900	int tt, const Charset *fl) {
901	if (needfollow(p1)) {
902	Charset fl1;
903	getfirst(p2, fl, &fl1); / p1 follow is p2 first /
904	codegen(compst, p1, `0`, tt, &fl1);
905	}
906	else / use 'fullset' as follow /
907	codegen(compst, p1, `0`, tt, fullset);
908	if (fixedlen(p1) != `0`) / can 'p1' consume anything? /
909	return NOINST; / invalidate test /
910	else return tt; / else 'tt' still protects sib2 /
911	}
912
913
914	/*
915	** Main code-generation function: dispatch to auxiliar functions
916	** according to kind of tree. ('needfollow' should return true
917	** only for consructions that use 'fl'.)
918	*/
919	static void codegen (CompileState compst, TTree tree, int opt, int tt,
920	const Charset *fl) {
921	tailcall:
922	switch (tree->tag) {
923	case TChar: codechar(compst, tree->u.n, tt); break;
924	case TAny: addinstruction(compst, IAny, `0`); break;
925	case TSet: codecharset(compst, treebuffer(tree), tt); break;
926	case TTrue: break;
927	case TFalse: addinstruction(compst, IFail, `0`); break;
928	case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
929	case TRep: coderep(compst, sib1(tree), opt, fl); break;
930	case TBehind: codebehind(compst, tree); break;
931	case TNot: codenot(compst, sib1(tree)); break;
932	case TAnd: codeand(compst, sib1(tree), tt); break;
933	case TCapture: codecapture(compst, tree, tt, fl); break;
934	case TRunTime: coderuntime(compst, tree, tt); break;
935	case TGrammar: codegrammar(compst, tree); break;
936	case TCall: codecall(compst, tree); break;
937	case TSeq: {
938	tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); / code 'p1' /
939	/ codegen(compst, p2, opt, tt, fl); /
940	tree = sib2(tree); goto tailcall;
941	}
942	default: assert(`0`);
943	}
944	}
945
946
947	/*
948	** Optimize jumps and other jump-like instructions.
949	** * Update labels of instructions with labels to their final
950	** destinations (e.g., choice L1; ... L1: jmp L2: becomes
951	** choice L2)
952	** * Jumps to other instructions that do jumps become those
953	** instructions (e.g., jump to return becomes a return; jump
954	** to commit becomes a commit)
955	*/
956	static void peephole (CompileState *compst) {
957	Instruction *code = compst->p->code;
958	int i;
959	for (i = `0`; i < compst->ncode; i += sizei(&code[i])) {
960	redo:
961	switch (code[i].i.code) {
962	case IChoice: case ICall: case ICommit: case IPartialCommit:
963	case IBackCommit: case ITestChar: case ITestSet:
964	case ITestAny: { / instructions with labels /
965	jumptothere(compst, i, finallabel(code, i)); / optimize label /
966	break;
967	}
968	case IJmp: {
969	int ft = finaltarget(code, i);
970	switch (code[ft].i.code) { / jumping to what? /
971	case IRet: case IFail: case IFailTwice:
972	case IEnd: { / instructions with unconditional implicit jumps /
973	code[i] = code[ft]; / jump becomes that instruction /
974	code[i + `1`].i.code = IAny; / 'no-op' for target position /
975	break;
976	}
977	case ICommit: case IPartialCommit:
978	case IBackCommit: { / inst. with unconditional explicit jumps /
979	int fft = finallabel(code, ft);
980	code[i] = code[ft]; / jump becomes that instruction... /
981	jumptothere(compst, i, fft); / but must correct its offset /
982	goto redo; / reoptimize its label /
983	}
984	default: {
985	jumptothere(compst, i, ft); / optimize label /
986	break;
987	}
988	}
989	break;
990	}
991	default: break;
992	}
993	}
994	assert(code[i - `1`].i.code == IEnd);
995	}
996
997
998	/*
999	** Compile a pattern
1000	*/
1001	Instruction compile (lua_State L, Pattern *p) {
1002	CompileState compst;
1003	compst.p = p; compst.ncode = `0`; compst.L = L;
1004	realloccode(L, p, `2`); / minimum initial size /
1005	codegen(&compst, p->tree, `0`, NOINST, fullset);
1006	addinstruction(&compst, IEnd, `0`);
1007	realloccode(L, p, compst.ncode); / set final size /
1008	peephole(&compst);
1009	return p->code;
1010	}
1011
1012
1013	/ }====================================================== /
1014
1015

Browse the source code of TIC-80/vendor/lpeg/lpcode.c