lj_lex.c source code [Aerospike/modules/luajit/src/lj_lex.c]

1	/*
2	** Lexical analyzer.
3	** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4	**
5	** Major portions taken verbatim or adapted from the Lua interpreter.
6	** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7	*/
8
9	#define lj_lex_c
10	#define LUA_CORE
11
12	#include "lj_obj.h"
13	#include "lj_gc.h"
14	#include "lj_err.h"
15	#include "lj_str.h"
16	#if LJ_HASFFI
17	#include "lj_tab.h"
18	#include "lj_ctype.h"
19	#include "lj_cdata.h"
20	#include "lualib.h"
21	#endif
22	#include "lj_state.h"
23	#include "lj_lex.h"
24	#include "lj_parse.h"
25	#include "lj_char.h"
26	#include "lj_strscan.h"
27
28	/ Lua lexer token names. /
29	static const char *const tokennames[] = {
30	#define TKSTR1(name) #name,
31	#define TKSTR2(name, sym) #sym,
32	TKDEF(TKSTR1, TKSTR2)
33	#undef TKSTR1
34	#undef TKSTR2
35	NULL
36	};
37
38	/ -- Buffer handling ----------------------------------------------------- /
39
40	#define char2int(c) ((int)(uint8_t)(c))
41	#define next(ls) \
42	(ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
43	#define save_and_next(ls) (save(ls, ls->current), next(ls))
44	#define currIsNewline(ls) (ls->current == '\n' \|\| ls->current == '\r')
45	#define END_OF_STREAM (-1)
46
47	static int fillbuf(LexState *ls)
48	{
49	size_t sz;
50	const char *buf = ls->rfunc(ls->L, ls->rdata, &sz);
51	if (buf == NULL \|\| sz == `0`) return END_OF_STREAM;
52	ls->n = (MSize)sz - `1`;
53	ls->p = buf;
54	return char2int(*(ls->p++));
55	}
56
57	static LJ_NOINLINE void save_grow(LexState ls, int* c)
58	{
59	MSize newsize;
60	if (ls->sb.sz >= LJ_MAX_STR/`2`)
61	lj_lex_error(ls, `0`, LJ_ERR_XELEM);
62	newsize = ls->sb.sz * `2`;
63	lj_str_resizebuf(ls->L, &ls->sb, newsize);
64	ls->sb.buf[ls->sb.n++] = (char)c;
65	}
66
67	static LJ_AINLINE void save(LexState ls, int* c)
68	{
69	if (LJ_UNLIKELY(ls->sb.n + `1` > ls->sb.sz))
70	save_grow(ls, c);
71	else
72	ls->sb.buf[ls->sb.n++] = (char)c;
73	}
74
75	static void inclinenumber(LexState *ls)
76	{
77	int old = ls->current;
78	lua_assert(currIsNewline(ls));
79	next(ls); / skip `\n' or `\r' /
80	if (currIsNewline(ls) && ls->current != old)
81	next(ls); / skip `\n\r' or `\r\n' /
82	if (++ls->linenumber >= LJ_MAX_LINE)
83	lj_lex_error(ls, ls->token, LJ_ERR_XLINES);
84	}
85
86	/ -- Scanner for terminals ----------------------------------------------- /
87
88	/ Parse a number literal. /
89	static void lex_number(LexState ls, TValue tv)
90	{
91	StrScanFmt fmt;
92	int c, xp = `'e'`;
93	lua_assert(lj_char_isdigit(ls->current));
94	if ((c = ls->current) == `'0'`) {
95	save_and_next(ls);
96	if ((ls->current \| `0x20`) == `'x'`) xp = `'p'`;
97	}
98	while (lj_char_isident(ls->current) \|\| ls->current == `'.'` \|\|
99	((ls->current == `'-'` \|\| ls->current == `'+'`) && (c \| `0x20`) == xp)) {
100	c = ls->current;
101	save_and_next(ls);
102	}
103	save(ls, `'\0'`);
104	fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv,
105	(LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) \|
106	(LJ_HASFFI ? (STRSCAN_OPT_LL\|STRSCAN_OPT_IMAG) : `0`));
107	if (LJ_DUALNUM && fmt == STRSCAN_INT) {
108	setitype(tv, LJ_TISNUM);
109	} else if (fmt == STRSCAN_NUM) {
110	/ Already in correct format. /
111	#if LJ_HASFFI
112	} else if (fmt != STRSCAN_ERROR) {
113	lua_State *L = ls->L;
114	GCcdata *cd;
115	lua_assert(fmt == STRSCAN_I64 \|\| fmt == STRSCAN_U64 \|\| fmt == STRSCAN_IMAG);
116	if (!ctype_ctsG(G(L))) {
117	ptrdiff_t oldtop = savestack(L, L->top);
118	luaopen_ffi(L); / Load FFI library on-demand. /
119	L->top = restorestack(L, oldtop);
120	}
121	if (fmt == STRSCAN_IMAG) {
122	cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, `2`*sizeof(double));
123	((double *)cdataptr(cd))[`0`] = `0`;
124	((double *)cdataptr(cd))[`1`] = numV(tv);
125	} else {
126	cd = lj_cdata_new_(L, fmt==STRSCAN_I64 ? CTID_INT64 : CTID_UINT64, `8`);
127	(uint64_t )cdataptr(cd) = tv->u64;
128	}
129	lj_parse_keepcdata(ls, tv, cd);
130	#endif
131	} else {
132	lua_assert(fmt == STRSCAN_ERROR);
133	lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
134	}
135	}
136
137	static int skip_sep(LexState *ls)
138	{
139	int count = `0`;
140	int s = ls->current;
141	lua_assert(s == `'['` \|\| s == `']'`);
142	save_and_next(ls);
143	while (ls->current == `'='`) {
144	save_and_next(ls);
145	count++;
146	}
147	return (ls->current == s) ? count : (-count) - `1`;
148	}
149
150	static void read_long_string(LexState ls, TValue tv, int sep)
151	{
152	save_and_next(ls); / skip 2nd `[' /
153	if (currIsNewline(ls)) / string starts with a newline? /
154	inclinenumber(ls); / skip it /
155	for (;;) {
156	switch (ls->current) {
157	case END_OF_STREAM:
158	lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
159	break;
160	case `']'`:
161	if (skip_sep(ls) == sep) {
162	save_and_next(ls); / skip 2nd `]' /
163	goto endloop;
164	}
165	break;
166	case `'\n'`:
167	case `'\r'`:
168	save(ls, `'\n'`);
169	inclinenumber(ls);
170	if (!tv) lj_str_resetbuf(&ls->sb); / avoid wasting space /
171	break;
172	default:
173	if (tv) save_and_next(ls);
174	else next(ls);
175	break;
176	}
177	} endloop:
178	if (tv) {
179	GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (`2` + (MSize)sep),
180	ls->sb.n - `2`*(`2` + (MSize)sep));
181	setstrV(ls->L, tv, str);
182	}
183	}
184
185	static void read_string(LexState ls, int* delim, TValue *tv)
186	{
187	save_and_next(ls);
188	while (ls->current != delim) {
189	switch (ls->current) {
190	case END_OF_STREAM:
191	lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
192	continue;
193	case `'\n'`:
194	case `'\r'`:
195	lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
196	continue;
197	case `'\\'`: {
198	int c = next(ls); / Skip the '\\'. /
199	switch (c) {
200	case `'a'`: c = `'\a'`; break;
201	case `'b'`: c = `'\b'`; break;
202	case `'f'`: c = `'\f'`; break;
203	case `'n'`: c = `'\n'`; break;
204	case `'r'`: c = `'\r'`; break;
205	case `'t'`: c = `'\t'`; break;
206	case `'v'`: c = `'\v'`; break;
207	case `'x'`: / Hexadecimal escape '\xXX'. /
208	c = (next(ls) & `15u`) << `4`;
209	if (!lj_char_isdigit(ls->current)) {
210	if (!lj_char_isxdigit(ls->current)) goto err_xesc;
211	c += `9` << `4`;
212	}
213	c += (next(ls) & `15u`);
214	if (!lj_char_isdigit(ls->current)) {
215	if (!lj_char_isxdigit(ls->current)) goto err_xesc;
216	c += `9`;
217	}
218	break;
219	case `'z'`: / Skip whitespace. /
220	next(ls);
221	while (lj_char_isspace(ls->current))
222	if (currIsNewline(ls)) inclinenumber(ls); else next(ls);
223	continue;
224	case `'\n'`: case `'\r'`: save(ls, `'\n'`); inclinenumber(ls); continue;
225	case `'\\'`: case `'\"'`: case `'\''`: break;
226	case END_OF_STREAM: continue;
227	default:
228	if (!lj_char_isdigit(c))
229	goto err_xesc;
230	c -= `'0'`; / Decimal escape '\ddd'. /
231	if (lj_char_isdigit(next(ls))) {
232	c = c*`10` + (ls->current - `'0'`);
233	if (lj_char_isdigit(next(ls))) {
234	c = c*`10` + (ls->current - `'0'`);
235	if (c > `255`) {
236	err_xesc:
237	lj_lex_error(ls, TK_string, LJ_ERR_XESC);
238	}
239	next(ls);
240	}
241	}
242	save(ls, c);
243	continue;
244	}
245	save(ls, c);
246	next(ls);
247	continue;
248	}
249	default:
250	save_and_next(ls);
251	break;
252	}
253	}
254	save_and_next(ls); / skip delimiter /
255	setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + `1`, ls->sb.n - `2`));
256	}
257
258	/ -- Main lexical scanner ------------------------------------------------ /
259
260	static int llex(LexState ls, TValue tv)
261	{
262	lj_str_resetbuf(&ls->sb);
263	for (;;) {
264	if (lj_char_isident(ls->current)) {
265	GCstr *s;
266	if (lj_char_isdigit(ls->current)) { / Numeric literal. /
267	lex_number(ls, tv);
268	return TK_number;
269	}
270	/ Identifier or reserved word. /
271	do {
272	save_and_next(ls);
273	} while (lj_char_isident(ls->current));
274	s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n);
275	setstrV(ls->L, tv, s);
276	if (s->reserved > `0`) / Reserved word? /
277	return TK_OFS + s->reserved;
278	return TK_name;
279	}
280	switch (ls->current) {
281	case `'\n'`:
282	case `'\r'`:
283	inclinenumber(ls);
284	continue;
285	case `' '`:
286	case `'\t'`:
287	case `'\v'`:
288	case `'\f'`:
289	next(ls);
290	continue;
291	case `'-'`:
292	next(ls);
293	if (ls->current != `'-'`) return `'-'`;
294	/ else is a comment /
295	next(ls);
296	if (ls->current == `'['`) {
297	int sep = skip_sep(ls);
298	lj_str_resetbuf(&ls->sb); / `skip_sep' may dirty the buffer /
299	if (sep >= `0`) {
300	read_long_string(ls, NULL, sep); / long comment /
301	lj_str_resetbuf(&ls->sb);
302	continue;
303	}
304	}
305	/ else short comment /
306	while (!currIsNewline(ls) && ls->current != END_OF_STREAM)
307	next(ls);
308	continue;
309	case `'['`: {
310	int sep = skip_sep(ls);
311	if (sep >= `0`) {
312	read_long_string(ls, tv, sep);
313	return TK_string;
314	} else if (sep == -`1`) {
315	return `'['`;
316	} else {
317	lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM);
318	continue;
319	}
320	}
321	case `'='`:
322	next(ls);
323	if (ls->current != `'='`) return `'='`; else { next(ls); return TK_eq; }
324	case `'<'`:
325	next(ls);
326	if (ls->current != `'='`) return `'<'`; else { next(ls); return TK_le; }
327	case `'>'`:
328	next(ls);
329	if (ls->current != `'='`) return `'>'`; else { next(ls); return TK_ge; }
330	case `'~'`:
331	next(ls);
332	if (ls->current != `'='`) return `'~'`; else { next(ls); return TK_ne; }
333	case `':'`:
334	next(ls);
335	if (ls->current != `':'`) return `':'`; else { next(ls); return TK_label; }
336	case `'"'`:
337	case `'\''`:
338	read_string(ls, ls->current, tv);
339	return TK_string;
340	case `'.'`:
341	save_and_next(ls);
342	if (ls->current == `'.'`) {
343	next(ls);
344	if (ls->current == `'.'`) {
345	next(ls);
346	return TK_dots; / ... /
347	}
348	return TK_concat; / .. /
349	} else if (!lj_char_isdigit(ls->current)) {
350	return `'.'`;
351	} else {
352	lex_number(ls, tv);
353	return TK_number;
354	}
355	case END_OF_STREAM:
356	return TK_eof;
357	default: {
358	int c = ls->current;
359	next(ls);
360	return c; / Single-char tokens (+ - / ...). /
361	}
362	}
363	}
364	}
365
366	/ -- Lexer API ----------------------------------------------------------- /
367
368	/ Setup lexer state. /
369	int lj_lex_setup(lua_State L, LexState ls)
370	{
371	int header = `0`;
372	ls->L = L;
373	ls->fs = NULL;
374	ls->n = `0`;
375	ls->p = NULL;
376	ls->vstack = NULL;
377	ls->sizevstack = `0`;
378	ls->vtop = `0`;
379	ls->bcstack = NULL;
380	ls->sizebcstack = `0`;
381	ls->lookahead = TK_eof; / No look-ahead token. /
382	ls->linenumber = `1`;
383	ls->lastline = `1`;
384	lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF);
385	next(ls); / Read-ahead first char. /
386	if (ls->current == `0xef` && ls->n >= `2` && char2int(ls->p[`0`]) == `0xbb` &&
387	char2int(ls->p[`1`]) == `0xbf`) { / Skip UTF-8 BOM (if buffered). /
388	ls->n -= `2`;
389	ls->p += `2`;
390	next(ls);
391	header = `1`;
392	}
393	if (ls->current == `'#'`) { / Skip POSIX #! header line. /
394	do {
395	next(ls);
396	if (ls->current == END_OF_STREAM) return `0`;
397	} while (!currIsNewline(ls));
398	inclinenumber(ls);
399	header = `1`;
400	}
401	if (ls->current == LUA_SIGNATURE[`0`]) { / Bytecode dump. /
402	if (header) {
403	/*
404	** Loading bytecode with an extra header is disabled for security
405	** reasons. This may circumvent the usual check for bytecode vs.
406	** Lua code by looking at the first char. Since this is a potential
407	** security violation no attempt is made to echo the chunkname either.
408	*/
409	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_BCBAD));
410	lj_err_throw(L, LUA_ERRSYNTAX);
411	}
412	return `1`;
413	}
414	return `0`;
415	}
416
417	/ Cleanup lexer state. /
418	void lj_lex_cleanup(lua_State L, LexState ls)
419	{
420	global_State *g = G(L);
421	lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
422	lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
423	lj_str_freebuf(g, &ls->sb);
424	}
425
426	void lj_lex_next(LexState *ls)
427	{
428	ls->lastline = ls->linenumber;
429	if (LJ_LIKELY(ls->lookahead == TK_eof)) { / No lookahead token? /
430	ls->token = llex(ls, &ls->tokenval); / Get next token. /
431	} else { / Otherwise return lookahead token. /
432	ls->token = ls->lookahead;
433	ls->lookahead = TK_eof;
434	ls->tokenval = ls->lookaheadval;
435	}
436	}
437
438	LexToken lj_lex_lookahead(LexState *ls)
439	{
440	lua_assert(ls->lookahead == TK_eof);
441	ls->lookahead = llex(ls, &ls->lookaheadval);
442	return ls->lookahead;
443	}
444
445	const char lj_lex_token2str(LexState ls, LexToken token)
446	{
447	if (token > TK_OFS)
448	return tokennames[token-TK_OFS-`1`];
449	else if (!lj_char_iscntrl(token))
450	return lj_str_pushf(ls->L, "%c", token);
451	else
452	return lj_str_pushf(ls->L, "char(%d)", token);
453	}
454
455	void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...)
456	{
457	const char *tok;
458	va_list argp;
459	if (token == `0`) {
460	tok = NULL;
461	} else if (token == TK_name \|\| token == TK_string \|\| token == TK_number) {
462	save(ls, `'\0'`);
463	tok = ls->sb.buf;
464	} else {
465	tok = lj_lex_token2str(ls, token);
466	}
467	va_start(argp, em);
468	lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp);
469	va_end(argp);
470	}
471
472	void lj_lex_init(lua_State *L)
473	{
474	uint32_t i;
475	for (i = `0`; i < TK_RESERVED; i++) {
476	GCstr *s = lj_str_newz(L, tokennames[i]);
477	fixstring(s); / Reserved words are never collected. /
478	s->reserved = (uint8_t)(i+`1`);
479	}
480	}
481
482

Browse the source code of Aerospike/modules/luajit/src/lj_lex.c