llex.c source code [Aseprite/third_party/lua/llex.c]

1	/*
2	** $Id: llex.c $
3	** Lexical Analyzer
4	** See Copyright Notice in lua.h
5	*/
6
7	#define llex_c
8	#define LUA_CORE
9
10	#include "lprefix.h"
11
12
13	#include <locale.h>
14	#include <string.h>
15
16	#include "lua.h"
17
18	#include "lctype.h"
19	#include "ldebug.h"
20	#include "ldo.h"
21	#include "lgc.h"
22	#include "llex.h"
23	#include "lobject.h"
24	#include "lparser.h"
25	#include "lstate.h"
26	#include "lstring.h"
27	#include "ltable.h"
28	#include "lzio.h"
29
30
31
32	#define next(ls) (ls->current = zgetc(ls->z))
33
34
35
36	#define currIsNewline(ls) (ls->current == '\n' \|\| ls->current == '\r')
37
38
39	/ ORDER RESERVED /
40	static const char *const luaX_tokens [] = {
41	"and", "break", "do", "else", "elseif",
42	"end", "false", "for", "function", "goto", "if",
43	"in", "local", "nil", "not", "or", "repeat",
44	"return", "then", "true", "until", "while",
45	"//", "..", "...", "==", ">=", "<=", "~=",
46	"<<", ">>", "::", "<eof>",
47	"<number>", "<integer>", "<name>", "<string>"
48	};
49
50
51	#define save_and_next(ls) (save(ls, ls->current), next(ls))
52
53
54	static l_noret lexerror (LexState ls, const* char msg, int* token);
55
56
57	static void save (LexState ls, int* c) {
58	Mbuffer *b = ls->buff;
59	if (luaZ_bufflen(b) + `1` > luaZ_sizebuffer(b)) {
60	size_t newsize;
61	if (luaZ_sizebuffer(b) >= MAX_SIZE/`2`)
62	lexerror(ls, "lexical element too long", `0`);
63	newsize = luaZ_sizebuffer(b) * `2`;
64	luaZ_resizebuffer(ls->L, b, newsize);
65	}
66	b->buffer[luaZ_bufflen(b)++] = cast_char(c);
67	}
68
69
70	void luaX_init (lua_State *L) {
71	int i;
72	TString e = luaS_newliteral(L, LUA_ENV); /* create env name /
73	luaC_fix(L, obj2gco(e)); / never collect this name /
74	for (i=`0`; i<NUM_RESERVED; i++) {
75	TString *ts = luaS_new(L, luaX_tokens[i]);
76	luaC_fix(L, obj2gco(ts)); / reserved words are never collected /
77	ts->extra = cast_byte(i+`1`); / reserved word /
78	}
79	}
80
81
82	const char luaX_token2str (LexState ls, int token) {
83	if (token < FIRST_RESERVED) { / single-byte symbols? /
84	if (lisprint(token))
85	return luaO_pushfstring(ls->L, "'%c'", token);
86	else / control character /
87	return luaO_pushfstring(ls->L, "'<\\%d>'", token);
88	}
89	else {
90	const char *s = luaX_tokens[token - FIRST_RESERVED];
91	if (token < TK_EOS) / fixed format (symbols and reserved words)? /
92	return luaO_pushfstring(ls->L, "'%s'", s);
93	else / names, strings, and numerals /
94	return s;
95	}
96	}
97
98
99	static const char txtToken (LexState ls, int token) {
100	switch (token) {
101	case TK_NAME: case TK_STRING:
102	case TK_FLT: case TK_INT:
103	save(ls, `'\0'`);
104	return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
105	default:
106	return luaX_token2str(ls, token);
107	}
108	}
109
110
111	static l_noret lexerror (LexState ls, const* char msg, int* token) {
112	msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
113	if (token)
114	luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
115	luaD_throw(ls->L, LUA_ERRSYNTAX);
116	}
117
118
119	l_noret luaX_syntaxerror (LexState ls, const* char *msg) {
120	lexerror(ls, msg, ls->t.token);
121	}
122
123
124	/*
125	** Creates a new string and anchors it in scanner's table so that it
126	** will not be collected until the end of the compilation; by that time
127	** it should be anchored somewhere. It also internalizes long strings,
128	** ensuring there is only one copy of each unique string. The table
129	** here is used as a set: the string enters as the key, while its value
130	** is irrelevant. We use the string itself as the value only because it
131	** is a TValue readly available. Later, the code generation can change
132	** this value.
133	*/
134	TString luaX_newstring (LexState ls, const char *str, size_t l) {
135	lua_State *L = ls->L;
136	TString ts = luaS_newlstr(L, str, l); /* create new string /
137	const TValue *o = luaH_getstr(ls->h, ts);
138	if (!ttisnil(o)) / string already present? /
139	ts = keystrval(nodefromval(o)); / get saved copy /
140	else { / not in use yet /
141	TValue stv = s2v(L->top++); /* reserve stack space for string /
142	setsvalue(L, stv, ts); / temporarily anchor the string /
143	luaH_finishset(L, ls->h, stv, o, stv); / t[string] = string /
144	/ table is not a metatable, so it does not need to invalidate cache /
145	luaC_checkGC(L);
146	L->top--; / remove string from stack /
147	}
148	return ts;
149	}
150
151
152	/*
153	** increment line number and skips newline sequence (any of
154	** \n, \r, \n\r, or \r\n)
155	*/
156	static void inclinenumber (LexState *ls) {
157	int old = ls->current;
158	lua_assert(currIsNewline(ls));
159	next(ls); / skip '\n' or '\r' /
160	if (currIsNewline(ls) && ls->current != old)
161	next(ls); / skip '\n\r' or '\r\n' /
162	if (++ls->linenumber >= MAX_INT)
163	lexerror(ls, "chunk has too many lines", `0`);
164	}
165
166
167	void luaX_setinput (lua_State L, LexState ls, ZIO z, TString source,
168	int firstchar) {
169	ls->t.token = `0`;
170	ls->L = L;
171	ls->current = firstchar;
172	ls->lookahead.token = TK_EOS; / no look-ahead token /
173	ls->z = z;
174	ls->fs = NULL;
175	ls->linenumber = `1`;
176	ls->lastline = `1`;
177	ls->source = source;
178	ls->envn = luaS_newliteral(L, LUA_ENV); / get env name /
179	luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); / initialize buffer /
180	}
181
182
183
184	/*
185	** =======================================================
186	** LEXICAL ANALYZER
187	** =======================================================
188	*/
189
190
191	static int check_next1 (LexState ls, int* c) {
192	if (ls->current == c) {
193	next(ls);
194	return `1`;
195	}
196	else return `0`;
197	}
198
199
200	/*
201	** Check whether current char is in set 'set' (with two chars) and
202	** saves it
203	*/
204	static int check_next2 (LexState ls, const* char *set) {
205	lua_assert(set[`2`] == `'\0'`);
206	if (ls->current == set[`0`] \|\| ls->current == set[`1`]) {
207	save_and_next(ls);
208	return `1`;
209	}
210	else return `0`;
211	}
212
213
214	/ LUA_NUMBER /
215	/*
216	** This function is quite liberal in what it accepts, as 'luaO_str2num'
217	** will reject ill-formed numerals. Roughly, it accepts the following
218	** pattern:
219	**
220	** %d(%x\|%.\|([Ee][+-]?))* \| 0[Xx](%x\|%.\|([Pp][+-]?))*
221	**
222	** The only tricky part is to accept [+-] only after a valid exponent
223	** mark, to avoid reading '3-4' or '0xe+1' as a single number.
224	**
225	** The caller might have already read an initial dot.
226	*/
227	static int read_numeral (LexState ls, SemInfo seminfo) {
228	TValue obj;
229	const char *expo = "Ee";
230	int first = ls->current;
231	lua_assert(lisdigit(ls->current));
232	save_and_next(ls);
233	if (first == `'0'` && check_next2(ls, "xX")) / hexadecimal? /
234	expo = "Pp";
235	for (;;) {
236	if (check_next2(ls, expo)) / exponent mark? /
237	check_next2(ls, "-+"); / optional exponent sign /
238	else if (lisxdigit(ls->current) \|\| ls->current == `'.'`) / '%x\|%.' /
239	save_and_next(ls);
240	else break;
241	}
242	if (lislalpha(ls->current)) / is numeral touching a letter? /
243	save_and_next(ls); / force an error /
244	save(ls, `'\0'`);
245	if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == `0`) / format error? /
246	lexerror(ls, "malformed number", TK_FLT);
247	if (ttisinteger(&obj)) {
248	seminfo->i = ivalue(&obj);
249	return TK_INT;
250	}
251	else {
252	lua_assert(ttisfloat(&obj));
253	seminfo->r = fltvalue(&obj);
254	return TK_FLT;
255	}
256	}
257
258
259	/*
260	** read a sequence '[=[' or ']=]', leaving the last bracket. If
261	** sequence is well formed, return its number of '='s + 2; otherwise,
262	** return 1 if it is a single bracket (no '='s and no 2nd bracket);
263	** otherwise (an unfinished '[==...') return 0.
264	*/
265	static size_t skip_sep (LexState *ls) {
266	size_t count = `0`;
267	int s = ls->current;
268	lua_assert(s == `'['` \|\| s == `']'`);
269	save_and_next(ls);
270	while (ls->current == `'='`) {
271	save_and_next(ls);
272	count++;
273	}
274	return (ls->current == s) ? count + `2`
275	: (count == `0`) ? `1`
276	: `0`;
277	}
278
279
280	static void read_long_string (LexState ls, SemInfo seminfo, size_t sep) {
281	int line = ls->linenumber; / initial line (for error message) /
282	save_and_next(ls); / skip 2nd '[' /
283	if (currIsNewline(ls)) / string starts with a newline? /
284	inclinenumber(ls); / skip it /
285	for (;;) {
286	switch (ls->current) {
287	case EOZ: { / error /
288	const char *what = (seminfo ? "string" : "comment");
289	const char *msg = luaO_pushfstring(ls->L,
290	"unfinished long %s (starting at line %d)", what, line);
291	lexerror(ls, msg, TK_EOS);
292	break; / to avoid warnings /
293	}
294	case `']'`: {
295	if (skip_sep(ls) == sep) {
296	save_and_next(ls); / skip 2nd ']' /
297	goto endloop;
298	}
299	break;
300	}
301	case `'\n'`: case `'\r'`: {
302	save(ls, `'\n'`);
303	inclinenumber(ls);
304	if (!seminfo) luaZ_resetbuffer(ls->buff); / avoid wasting space /
305	break;
306	}
307	default: {
308	if (seminfo) save_and_next(ls);
309	else next(ls);
310	}
311	}
312	} endloop:
313	if (seminfo)
314	seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
315	luaZ_bufflen(ls->buff) - `2` * sep);
316	}
317
318
319	static void esccheck (LexState ls, int* c, const char *msg) {
320	if (!c) {
321	if (ls->current != EOZ)
322	save_and_next(ls); / add current to buffer for error message /
323	lexerror(ls, msg, TK_STRING);
324	}
325	}
326
327
328	static int gethexa (LexState *ls) {
329	save_and_next(ls);
330	esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
331	return luaO_hexavalue(ls->current);
332	}
333
334
335	static int readhexaesc (LexState *ls) {
336	int r = gethexa(ls);
337	r = (r << `4`) + gethexa(ls);
338	luaZ_buffremove(ls->buff, `2`); / remove saved chars from buffer /
339	return r;
340	}
341
342
343	static unsigned long readutf8esc (LexState *ls) {
344	unsigned long r;
345	int i = `4`; / chars to be removed: '\', 'u', '{', and first digit /
346	save_and_next(ls); / skip 'u' /
347	esccheck(ls, ls->current == `'{'`, "missing '{'");
348	r = gethexa(ls); / must have at least one digit /
349	while (cast_void(save_and_next(ls)), lisxdigit(ls->current)) {
350	i++;
351	esccheck(ls, r <= (`0x7FFFFFFFu` >> `4`), "UTF-8 value too large");
352	r = (r << `4`) + luaO_hexavalue(ls->current);
353	}
354	esccheck(ls, ls->current == `'}'`, "missing '}'");
355	next(ls); / skip '}' /
356	luaZ_buffremove(ls->buff, i); / remove saved chars from buffer /
357	return r;
358	}
359
360
361	static void utf8esc (LexState *ls) {
362	char buff[UTF8BUFFSZ];
363	int n = luaO_utf8esc(buff, readutf8esc(ls));
364	for (; n > `0`; n--) / add 'buff' to string /
365	save(ls, buff[UTF8BUFFSZ - n]);
366	}
367
368
369	static int readdecesc (LexState *ls) {
370	int i;
371	int r = `0`; / result accumulator /
372	for (i = `0`; i < `3` && lisdigit(ls->current); i++) { / read up to 3 digits /
373	r = `10`*r + ls->current - `'0'`;
374	save_and_next(ls);
375	}
376	esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
377	luaZ_buffremove(ls->buff, i); / remove read digits from buffer /
378	return r;
379	}
380
381
382	static void read_string (LexState ls, int* del, SemInfo *seminfo) {
383	save_and_next(ls); / keep delimiter (for error messages) /
384	while (ls->current != del) {
385	switch (ls->current) {
386	case EOZ:
387	lexerror(ls, "unfinished string", TK_EOS);
388	break; / to avoid warnings /
389	case `'\n'`:
390	case `'\r'`:
391	lexerror(ls, "unfinished string", TK_STRING);
392	break; / to avoid warnings /
393	case `'\\'`: { / escape sequences /
394	int c; / final character to be saved /
395	save_and_next(ls); / keep '\\' for error messages /
396	switch (ls->current) {
397	case `'a'`: c = `'\a'`; goto read_save;
398	case `'b'`: c = `'\b'`; goto read_save;
399	case `'f'`: c = `'\f'`; goto read_save;
400	case `'n'`: c = `'\n'`; goto read_save;
401	case `'r'`: c = `'\r'`; goto read_save;
402	case `'t'`: c = `'\t'`; goto read_save;
403	case `'v'`: c = `'\v'`; goto read_save;
404	case `'x'`: c = readhexaesc(ls); goto read_save;
405	case `'u'`: utf8esc(ls); goto no_save;
406	case `'\n'`: case `'\r'`:
407	inclinenumber(ls); c = `'\n'`; goto only_save;
408	case `'\\'`: case `'\"'`: case `'\''`:
409	c = ls->current; goto read_save;
410	case EOZ: goto no_save; / will raise an error next loop /
411	case `'z'`: { / zap following span of spaces /
412	luaZ_buffremove(ls->buff, `1`); / remove '\\' /
413	next(ls); / skip the 'z' /
414	while (lisspace(ls->current)) {
415	if (currIsNewline(ls)) inclinenumber(ls);
416	else next(ls);
417	}
418	goto no_save;
419	}
420	default: {
421	esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
422	c = readdecesc(ls); / digital escape '\ddd' /
423	goto only_save;
424	}
425	}
426	read_save:
427	next(ls);
428	/ go through /
429	only_save:
430	luaZ_buffremove(ls->buff, `1`); / remove '\\' /
431	save(ls, c);
432	/ go through /
433	no_save: break;
434	}
435	default:
436	save_and_next(ls);
437	}
438	}
439	save_and_next(ls); / skip delimiter /
440	seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + `1`,
441	luaZ_bufflen(ls->buff) - `2`);
442	}
443
444
445	static int llex (LexState ls, SemInfo seminfo) {
446	luaZ_resetbuffer(ls->buff);
447	for (;;) {
448	switch (ls->current) {
449	case `'\n'`: case `'\r'`: { / line breaks /
450	inclinenumber(ls);
451	break;
452	}
453	case `' '`: case `'\f'`: case `'\t'`: case `'\v'`: { / spaces /
454	next(ls);
455	break;
456	}
457	case `'-'`: { / '-' or '--' (comment) /
458	next(ls);
459	if (ls->current != `'-'`) return `'-'`;
460	/ else is a comment /
461	next(ls);
462	if (ls->current == `'['`) { / long comment? /
463	size_t sep = skip_sep(ls);
464	luaZ_resetbuffer(ls->buff); / 'skip_sep' may dirty the buffer /
465	if (sep >= `2`) {
466	read_long_string(ls, NULL, sep); / skip long comment /
467	luaZ_resetbuffer(ls->buff); / previous call may dirty the buff. /
468	break;
469	}
470	}
471	/ else short comment /
472	while (!currIsNewline(ls) && ls->current != EOZ)
473	next(ls); / skip until end of line (or end of file) /
474	break;
475	}
476	case `'['`: { / long string or simply '[' /
477	size_t sep = skip_sep(ls);
478	if (sep >= `2`) {
479	read_long_string(ls, seminfo, sep);
480	return TK_STRING;
481	}
482	else if (sep == `0`) / '[=...' missing second bracket? /
483	lexerror(ls, "invalid long string delimiter", TK_STRING);
484	return `'['`;
485	}
486	case `'='`: {
487	next(ls);
488	if (check_next1(ls, `'='`)) return TK_EQ; / '==' /
489	else return `'='`;
490	}
491	case `'<'`: {
492	next(ls);
493	if (check_next1(ls, `'='`)) return TK_LE; / '<=' /
494	else if (check_next1(ls, `'<'`)) return TK_SHL; / '<<' /
495	else return `'<'`;
496	}
497	case `'>'`: {
498	next(ls);
499	if (check_next1(ls, `'='`)) return TK_GE; / '>=' /
500	else if (check_next1(ls, `'>'`)) return TK_SHR; / '>>' /
501	else return `'>'`;
502	}
503	case `'/'`: {
504	next(ls);
505	if (check_next1(ls, `'/'`)) return TK_IDIV; / '//' /
506	else return `'/'`;
507	}
508	case `'~'`: {
509	next(ls);
510	if (check_next1(ls, `'='`)) return TK_NE; / '~=' /
511	else return `'~'`;
512	}
513	case `':'`: {
514	next(ls);
515	if (check_next1(ls, `':'`)) return TK_DBCOLON; / '::' /
516	else return `':'`;
517	}
518	case `'"'`: case `'\''`: { / short literal strings /
519	read_string(ls, ls->current, seminfo);
520	return TK_STRING;
521	}
522	case `'.'`: { / '.', '..', '...', or number /
523	save_and_next(ls);
524	if (check_next1(ls, `'.'`)) {
525	if (check_next1(ls, `'.'`))
526	return TK_DOTS; / '...' /
527	else return TK_CONCAT; / '..' /
528	}
529	else if (!lisdigit(ls->current)) return `'.'`;
530	else return read_numeral(ls, seminfo);
531	}
532	case `'0'`: case `'1'`: case `'2'`: case `'3'`: case `'4'`:
533	case `'5'`: case `'6'`: case `'7'`: case `'8'`: case `'9'`: {
534	return read_numeral(ls, seminfo);
535	}
536	case EOZ: {
537	return TK_EOS;
538	}
539	default: {
540	if (lislalpha(ls->current)) { / identifier or reserved word? /
541	TString *ts;
542	do {
543	save_and_next(ls);
544	} while (lislalnum(ls->current));
545	ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
546	luaZ_bufflen(ls->buff));
547	seminfo->ts = ts;
548	if (isreserved(ts)) / reserved word? /
549	return ts->extra - `1` + FIRST_RESERVED;
550	else {
551	return TK_NAME;
552	}
553	}
554	else { / single-char tokens ('+', '', '%', '{', '}', ...) /*
555	int c = ls->current;
556	next(ls);
557	return c;
558	}
559	}
560	}
561	}
562	}
563
564
565	void luaX_next (LexState *ls) {
566	ls->lastline = ls->linenumber;
567	if (ls->lookahead.token != TK_EOS) { / is there a look-ahead token? /
568	ls->t = ls->lookahead; / use this one /
569	ls->lookahead.token = TK_EOS; / and discharge it /
570	}
571	else
572	ls->t.token = llex(ls, &ls->t.seminfo); / read next token /
573	}
574
575
576	int luaX_lookahead (LexState *ls) {
577	lua_assert(ls->lookahead.token == TK_EOS);
578	ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
579	return ls->lookahead.token;
580	}
581
582

Browse the source code of Aseprite/third_party/lua/llex.c