llex.c source code [TIC-80/vendor/lua/llex.c]

1	/*
2	** $Id: llex.c,v 2.96.1.1 2017/04/19 17:20:42 roberto Exp $
3	** Lexical Analyzer
4	** See Copyright Notice in lua.h
5	*/
6
7	#define llex_c
8	#define LUA_CORE
9
10	#include "lprefix.h"
11
12
13	#include <locale.h>
14	#include <string.h>
15
16	#include "lua.h"
17
18	#include "lctype.h"
19	#include "ldebug.h"
20	#include "ldo.h"
21	#include "lgc.h"
22	#include "llex.h"
23	#include "lobject.h"
24	#include "lparser.h"
25	#include "lstate.h"
26	#include "lstring.h"
27	#include "ltable.h"
28	#include "lzio.h"
29
30
31
32	#define next(ls) (ls->current = zgetc(ls->z))
33
34
35
36	#define currIsNewline(ls) (ls->current == '\n' \|\| ls->current == '\r')
37
38
39	/ ORDER RESERVED /
40	static const char *const luaX_tokens [] = {
41	"and", "break", "do", "else", "elseif",
42	"end", "false", "for", "function", "goto", "if",
43	"in", "local", "nil", "not", "or", "repeat",
44	"return", "then", "true", "until", "while",
45	"//", "..", "...", "==", ">=", "<=", "~=",
46	"<<", ">>", "::", "<eof>",
47	"<number>", "<integer>", "<name>", "<string>"
48	};
49
50
51	#define save_and_next(ls) (save(ls, ls->current), next(ls))
52
53
54	static l_noret lexerror (LexState ls, const* char msg, int* token);
55
56
57	static void save (LexState ls, int* c) {
58	Mbuffer *b = ls->buff;
59	if (luaZ_bufflen(b) + `1` > luaZ_sizebuffer(b)) {
60	size_t newsize;
61	if (luaZ_sizebuffer(b) >= MAX_SIZE/`2`)
62	lexerror(ls, "lexical element too long", `0`);
63	newsize = luaZ_sizebuffer(b) * `2`;
64	luaZ_resizebuffer(ls->L, b, newsize);
65	}
66	b->buffer[luaZ_bufflen(b)++] = cast(char, c);
67	}
68
69
70	void luaX_init (lua_State *L) {
71	int i;
72	TString e = luaS_newliteral(L, LUA_ENV); /* create env name /
73	luaC_fix(L, obj2gco(e)); / never collect this name /
74	for (i=`0`; i<NUM_RESERVED; i++) {
75	TString *ts = luaS_new(L, luaX_tokens[i]);
76	luaC_fix(L, obj2gco(ts)); / reserved words are never collected /
77	ts->extra = cast_byte(i+`1`); / reserved word /
78	}
79	}
80
81
82	const char luaX_token2str (LexState ls, int token) {
83	if (token < FIRST_RESERVED) { / single-byte symbols? /
84	lua_assert(token == cast_uchar(token));
85	return luaO_pushfstring(ls->L, "'%c'", token);
86	}
87	else {
88	const char *s = luaX_tokens[token - FIRST_RESERVED];
89	if (token < TK_EOS) / fixed format (symbols and reserved words)? /
90	return luaO_pushfstring(ls->L, "'%s'", s);
91	else / names, strings, and numerals /
92	return s;
93	}
94	}
95
96
97	static const char txtToken (LexState ls, int token) {
98	switch (token) {
99	case TK_NAME: case TK_STRING:
100	case TK_FLT: case TK_INT:
101	save(ls, `'\0'`);
102	return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
103	default:
104	return luaX_token2str(ls, token);
105	}
106	}
107
108
109	static l_noret lexerror (LexState ls, const* char msg, int* token) {
110	msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
111	if (token)
112	luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
113	luaD_throw(ls->L, LUA_ERRSYNTAX);
114	}
115
116
117	l_noret luaX_syntaxerror (LexState ls, const* char *msg) {
118	lexerror(ls, msg, ls->t.token);
119	}
120
121
122	/*
123	** creates a new string and anchors it in scanner's table so that
124	** it will not be collected until the end of the compilation
125	** (by that time it should be anchored somewhere)
126	*/
127	TString luaX_newstring (LexState ls, const char *str, size_t l) {
128	lua_State *L = ls->L;
129	TValue o; /* entry for 'str' /
130	TString ts = luaS_newlstr(L, str, l); /* create new string /
131	setsvalue2s(L, L->top++, ts); / temporarily anchor it in stack /
132	o = luaH_set(L, ls->h, L->top - `1`);
133	if (ttisnil(o)) { / not in use yet? /
134	/ boolean value does not need GC barrier;*
135	table has no metatable, so it does not need to invalidate cache /*
136	setbvalue(o, `1`); / t[string] = true /
137	luaC_checkGC(L);
138	}
139	else { / string already present /
140	ts = tsvalue(keyfromval(o)); / re-use value previously stored /
141	}
142	L->top--; / remove string from stack /
143	return ts;
144	}
145
146
147	/*
148	** increment line number and skips newline sequence (any of
149	** \n, \r, \n\r, or \r\n)
150	*/
151	static void inclinenumber (LexState *ls) {
152	int old = ls->current;
153	lua_assert(currIsNewline(ls));
154	next(ls); / skip '\n' or '\r' /
155	if (currIsNewline(ls) && ls->current != old)
156	next(ls); / skip '\n\r' or '\r\n' /
157	if (++ls->linenumber >= MAX_INT)
158	lexerror(ls, "chunk has too many lines", `0`);
159	}
160
161
162	void luaX_setinput (lua_State L, LexState ls, ZIO z, TString source,
163	int firstchar) {
164	ls->t.token = `0`;
165	ls->L = L;
166	ls->current = firstchar;
167	ls->lookahead.token = TK_EOS; / no look-ahead token /
168	ls->z = z;
169	ls->fs = NULL;
170	ls->linenumber = `1`;
171	ls->lastline = `1`;
172	ls->source = source;
173	ls->envn = luaS_newliteral(L, LUA_ENV); / get env name /
174	luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); / initialize buffer /
175	}
176
177
178
179	/*
180	** =======================================================
181	** LEXICAL ANALYZER
182	** =======================================================
183	*/
184
185
186	static int check_next1 (LexState ls, int* c) {
187	if (ls->current == c) {
188	next(ls);
189	return `1`;
190	}
191	else return `0`;
192	}
193
194
195	/*
196	** Check whether current char is in set 'set' (with two chars) and
197	** saves it
198	*/
199	static int check_next2 (LexState ls, const* char *set) {
200	lua_assert(set[`2`] == `'\0'`);
201	if (ls->current == set[`0`] \|\| ls->current == set[`1`]) {
202	save_and_next(ls);
203	return `1`;
204	}
205	else return `0`;
206	}
207
208
209	/ LUA_NUMBER /
210	/*
211	** this function is quite liberal in what it accepts, as 'luaO_str2num'
212	** will reject ill-formed numerals.
213	*/
214	static int read_numeral (LexState ls, SemInfo seminfo) {
215	TValue obj;
216	const char *expo = "Ee";
217	int first = ls->current;
218	lua_assert(lisdigit(ls->current));
219	save_and_next(ls);
220	if (first == `'0'` && check_next2(ls, "xX")) / hexadecimal? /
221	expo = "Pp";
222	for (;;) {
223	if (check_next2(ls, expo)) / exponent part? /
224	check_next2(ls, "-+"); / optional exponent sign /
225	if (lisxdigit(ls->current))
226	save_and_next(ls);
227	else if (ls->current == `'.'`)
228	save_and_next(ls);
229	else break;
230	}
231	save(ls, `'\0'`);
232	if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == `0`) / format error? /
233	lexerror(ls, "malformed number", TK_FLT);
234	if (ttisinteger(&obj)) {
235	seminfo->i = ivalue(&obj);
236	return TK_INT;
237	}
238	else {
239	lua_assert(ttisfloat(&obj));
240	seminfo->r = fltvalue(&obj);
241	return TK_FLT;
242	}
243	}
244
245
246	/*
247	** reads a sequence '[=[' or ']=]', leaving the last bracket.
248	** If sequence is well formed, return its number of '='s + 2; otherwise,
249	** return 1 if there is no '='s or 0 otherwise (an unfinished '[==...').
250	*/
251	static size_t skip_sep (LexState *ls) {
252	size_t count = `0`;
253	int s = ls->current;
254	lua_assert(s == `'['` \|\| s == `']'`);
255	save_and_next(ls);
256	while (ls->current == `'='`) {
257	save_and_next(ls);
258	count++;
259	}
260	return (ls->current == s) ? count + `2`
261	: (count == `0`) ? `1`
262	: `0`;
263
264	}
265
266
267	static void read_long_string (LexState ls, SemInfo seminfo, size_t sep) {
268	int line = ls->linenumber; / initial line (for error message) /
269	save_and_next(ls); / skip 2nd '[' /
270	if (currIsNewline(ls)) / string starts with a newline? /
271	inclinenumber(ls); / skip it /
272	for (;;) {
273	switch (ls->current) {
274	case EOZ: { / error /
275	const char *what = (seminfo ? "string" : "comment");
276	const char *msg = luaO_pushfstring(ls->L,
277	"unfinished long %s (starting at line %d)", what, line);
278	lexerror(ls, msg, TK_EOS);
279	break; / to avoid warnings /
280	}
281	case `']'`: {
282	if (skip_sep(ls) == sep) {
283	save_and_next(ls); / skip 2nd ']' /
284	goto endloop;
285	}
286	break;
287	}
288	case `'\n'`: case `'\r'`: {
289	save(ls, `'\n'`);
290	inclinenumber(ls);
291	if (!seminfo) luaZ_resetbuffer(ls->buff); / avoid wasting space /
292	break;
293	}
294	default: {
295	if (seminfo) save_and_next(ls);
296	else next(ls);
297	}
298	}
299	} endloop:
300	if (seminfo)
301	seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
302	luaZ_bufflen(ls->buff) - `2` * sep);
303	}
304
305
306	static void esccheck (LexState ls, int* c, const char *msg) {
307	if (!c) {
308	if (ls->current != EOZ)
309	save_and_next(ls); / add current to buffer for error message /
310	lexerror(ls, msg, TK_STRING);
311	}
312	}
313
314
315	static int gethexa (LexState *ls) {
316	save_and_next(ls);
317	esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
318	return luaO_hexavalue(ls->current);
319	}
320
321
322	static int readhexaesc (LexState *ls) {
323	int r = gethexa(ls);
324	r = (r << `4`) + gethexa(ls);
325	luaZ_buffremove(ls->buff, `2`); / remove saved chars from buffer /
326	return r;
327	}
328
329
330	static unsigned long readutf8esc (LexState *ls) {
331	unsigned long r;
332	int i = `4`; / chars to be removed: '\', 'u', '{', and first digit /
333	save_and_next(ls); / skip 'u' /
334	esccheck(ls, ls->current == `'{'`, "missing '{'");
335	r = gethexa(ls); / must have at least one digit /
336	while ((save_and_next(ls), lisxdigit(ls->current))) {
337	i++;
338	r = (r << `4`) + luaO_hexavalue(ls->current);
339	esccheck(ls, r <= `0x10FFFF`, "UTF-8 value too large");
340	}
341	esccheck(ls, ls->current == `'}'`, "missing '}'");
342	next(ls); / skip '}' /
343	luaZ_buffremove(ls->buff, i); / remove saved chars from buffer /
344	return r;
345	}
346
347
348	static void utf8esc (LexState *ls) {
349	char buff[UTF8BUFFSZ];
350	int n = luaO_utf8esc(buff, readutf8esc(ls));
351	for (; n > `0`; n--) / add 'buff' to string /
352	save(ls, buff[UTF8BUFFSZ - n]);
353	}
354
355
356	static int readdecesc (LexState *ls) {
357	int i;
358	int r = `0`; / result accumulator /
359	for (i = `0`; i < `3` && lisdigit(ls->current); i++) { / read up to 3 digits /
360	r = `10`*r + ls->current - `'0'`;
361	save_and_next(ls);
362	}
363	esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
364	luaZ_buffremove(ls->buff, i); / remove read digits from buffer /
365	return r;
366	}
367
368
369	static void read_string (LexState ls, int* del, SemInfo *seminfo) {
370	save_and_next(ls); / keep delimiter (for error messages) /
371	while (ls->current != del) {
372	switch (ls->current) {
373	case EOZ:
374	lexerror(ls, "unfinished string", TK_EOS);
375	break; / to avoid warnings /
376	case `'\n'`:
377	case `'\r'`:
378	lexerror(ls, "unfinished string", TK_STRING);
379	break; / to avoid warnings /
380	case `'\\'`: { / escape sequences /
381	int c; / final character to be saved /
382	save_and_next(ls); / keep '\\' for error messages /
383	switch (ls->current) {
384	case `'a'`: c = `'\a'`; goto read_save;
385	case `'b'`: c = `'\b'`; goto read_save;
386	case `'f'`: c = `'\f'`; goto read_save;
387	case `'n'`: c = `'\n'`; goto read_save;
388	case `'r'`: c = `'\r'`; goto read_save;
389	case `'t'`: c = `'\t'`; goto read_save;
390	case `'v'`: c = `'\v'`; goto read_save;
391	case `'x'`: c = readhexaesc(ls); goto read_save;
392	case `'u'`: utf8esc(ls); goto no_save;
393	case `'\n'`: case `'\r'`:
394	inclinenumber(ls); c = `'\n'`; goto only_save;
395	case `'\\'`: case `'\"'`: case `'\''`:
396	c = ls->current; goto read_save;
397	case EOZ: goto no_save; / will raise an error next loop /
398	case `'z'`: { / zap following span of spaces /
399	luaZ_buffremove(ls->buff, `1`); / remove '\\' /
400	next(ls); / skip the 'z' /
401	while (lisspace(ls->current)) {
402	if (currIsNewline(ls)) inclinenumber(ls);
403	else next(ls);
404	}
405	goto no_save;
406	}
407	default: {
408	esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
409	c = readdecesc(ls); / digital escape '\ddd' /
410	goto only_save;
411	}
412	}
413	read_save:
414	next(ls);
415	/ go through /
416	only_save:
417	luaZ_buffremove(ls->buff, `1`); / remove '\\' /
418	save(ls, c);
419	/ go through /
420	no_save: break;
421	}
422	default:
423	save_and_next(ls);
424	}
425	}
426	save_and_next(ls); / skip delimiter /
427	seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + `1`,
428	luaZ_bufflen(ls->buff) - `2`);
429	}
430
431
432	static int llex (LexState ls, SemInfo seminfo) {
433	luaZ_resetbuffer(ls->buff);
434	for (;;) {
435	switch (ls->current) {
436	case `'\n'`: case `'\r'`: { / line breaks /
437	inclinenumber(ls);
438	break;
439	}
440	case `' '`: case `'\f'`: case `'\t'`: case `'\v'`: { / spaces /
441	next(ls);
442	break;
443	}
444	case `'-'`: { / '-' or '--' (comment) /
445	next(ls);
446	if (ls->current != `'-'`) return `'-'`;
447	/ else is a comment /
448	next(ls);
449	if (ls->current == `'['`) { / long comment? /
450	size_t sep = skip_sep(ls);
451	luaZ_resetbuffer(ls->buff); / 'skip_sep' may dirty the buffer /
452	if (sep >= `2`) {
453	read_long_string(ls, NULL, sep); / skip long comment /
454	luaZ_resetbuffer(ls->buff); / previous call may dirty the buff. /
455	break;
456	}
457	}
458	/ else short comment /
459	while (!currIsNewline(ls) && ls->current != EOZ)
460	next(ls); / skip until end of line (or end of file) /
461	break;
462	}
463	case `'['`: { / long string or simply '[' /
464	size_t sep = skip_sep(ls);
465	if (sep >= `2`) {
466	read_long_string(ls, seminfo, sep);
467	return TK_STRING;
468	}
469	else if (sep == `0`) / '[=...' missing second bracket /
470	lexerror(ls, "invalid long string delimiter", TK_STRING);
471	return `'['`;
472	}
473	case `'='`: {
474	next(ls);
475	if (check_next1(ls, `'='`)) return TK_EQ;
476	else return `'='`;
477	}
478	case `'<'`: {
479	next(ls);
480	if (check_next1(ls, `'='`)) return TK_LE;
481	else if (check_next1(ls, `'<'`)) return TK_SHL;
482	else return `'<'`;
483	}
484	case `'>'`: {
485	next(ls);
486	if (check_next1(ls, `'='`)) return TK_GE;
487	else if (check_next1(ls, `'>'`)) return TK_SHR;
488	else return `'>'`;
489	}
490	case `'/'`: {
491	next(ls);
492	if (check_next1(ls, `'/'`)) return TK_IDIV;
493	else return `'/'`;
494	}
495	case `'~'`: {
496	next(ls);
497	if (check_next1(ls, `'='`)) return TK_NE;
498	else return `'~'`;
499	}
500	case `':'`: {
501	next(ls);
502	if (check_next1(ls, `':'`)) return TK_DBCOLON;
503	else return `':'`;
504	}
505	case `'"'`: case `'\''`: { / short literal strings /
506	read_string(ls, ls->current, seminfo);
507	return TK_STRING;
508	}
509	case `'.'`: { / '.', '..', '...', or number /
510	save_and_next(ls);
511	if (check_next1(ls, `'.'`)) {
512	if (check_next1(ls, `'.'`))
513	return TK_DOTS; / '...' /
514	else return TK_CONCAT; / '..' /
515	}
516	else if (!lisdigit(ls->current)) return `'.'`;
517	else return read_numeral(ls, seminfo);
518	}
519	case `'0'`: case `'1'`: case `'2'`: case `'3'`: case `'4'`:
520	case `'5'`: case `'6'`: case `'7'`: case `'8'`: case `'9'`: {
521	return read_numeral(ls, seminfo);
522	}
523	case EOZ: {
524	return TK_EOS;
525	}
526	default: {
527	if (lislalpha(ls->current)) { / identifier or reserved word? /
528	TString *ts;
529	do {
530	save_and_next(ls);
531	} while (lislalnum(ls->current));
532	ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
533	luaZ_bufflen(ls->buff));
534	seminfo->ts = ts;
535	if (isreserved(ts)) / reserved word? /
536	return ts->extra - `1` + FIRST_RESERVED;
537	else {
538	return TK_NAME;
539	}
540	}
541	else { / single-char tokens (+ - / ...) /
542	int c = ls->current;
543	next(ls);
544	return c;
545	}
546	}
547	}
548	}
549	}
550
551
552	void luaX_next (LexState *ls) {
553	ls->lastline = ls->linenumber;
554	if (ls->lookahead.token != TK_EOS) { / is there a look-ahead token? /
555	ls->t = ls->lookahead; / use this one /
556	ls->lookahead.token = TK_EOS; / and discharge it /
557	}
558	else
559	ls->t.token = llex(ls, &ls->t.seminfo); / read next token /
560	}
561
562
563	int luaX_lookahead (LexState *ls) {
564	lua_assert(ls->lookahead.token == TK_EOS);
565	ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
566	return ls->lookahead.token;
567	}
568
569

Browse the source code of TIC-80/vendor/lua/llex.c