1/*
2** Bytecode reader.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_bcread_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_err.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_tab.h"
15#include "lj_bc.h"
16#if LJ_HASFFI
17#include "lj_ctype.h"
18#include "lj_cdata.h"
19#include "lualib.h"
20#endif
21#include "lj_lex.h"
22#include "lj_bcdump.h"
23#include "lj_state.h"
24#include "lj_strfmt.h"
25
26/* Reuse some lexer fields for our own purposes. */
27#define bcread_flags(ls) ls->level
28#define bcread_swap(ls) \
29 ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE)
30#define bcread_oldtop(L, ls) restorestack(L, ls->lastline)
31#define bcread_savetop(L, ls, top) \
32 ls->lastline = (BCLine)savestack(L, (top))
33
34/* -- Input buffer handling ----------------------------------------------- */
35
36/* Throw reader error. */
37static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
38{
39 lua_State *L = ls->L;
40 const char *name = ls->chunkarg;
41 if (*name == BCDUMP_HEAD1) name = "(binary)";
42 else if (*name == '@' || *name == '=') name++;
43 lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
44 lj_err_throw(L, LUA_ERRSYNTAX);
45}
46
47/* Refill buffer. */
48static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
49{
50 lj_assertLS(len != 0, "empty refill");
51 if (len > LJ_MAX_BUF || ls->c < 0)
52 bcread_error(ls, LJ_ERR_BCBAD);
53 do {
54 const char *buf;
55 size_t sz;
56 char *p = sbufB(&ls->sb);
57 MSize n = (MSize)(ls->pe - ls->p);
58 if (n) { /* Copy remainder to buffer. */
59 if (sbuflen(&ls->sb)) { /* Move down in buffer. */
60 lj_assertLS(ls->pe == sbufP(&ls->sb), "bad buffer pointer");
61 if (ls->p != p) memmove(p, ls->p, n);
62 } else { /* Copy from buffer provided by reader. */
63 p = lj_buf_need(&ls->sb, len);
64 memcpy(p, ls->p, n);
65 }
66 ls->p = p;
67 ls->pe = p + n;
68 }
69 setsbufP(&ls->sb, p + n);
70 buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */
71 if (buf == NULL || sz == 0) { /* EOF? */
72 if (need) bcread_error(ls, LJ_ERR_BCBAD);
73 ls->c = -1; /* Only bad if we get called again. */
74 break;
75 }
76 if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
77 if (n) { /* Append to buffer. */
78 n += (MSize)sz;
79 p = lj_buf_need(&ls->sb, n < len ? len : n);
80 memcpy(sbufP(&ls->sb), buf, sz);
81 setsbufP(&ls->sb, p + n);
82 ls->p = p;
83 ls->pe = p + n;
84 } else { /* Return buffer provided by reader. */
85 ls->p = buf;
86 ls->pe = buf + sz;
87 }
88 } while ((MSize)(ls->pe - ls->p) < len);
89}
90
91/* Need a certain number of bytes. */
92static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
93{
94 if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
95 bcread_fill(ls, len, 1);
96}
97
98/* Want to read up to a certain number of bytes, but may need less. */
99static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
100{
101 if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
102 bcread_fill(ls, len, 0);
103}
104
105/* Return memory block from buffer. */
106static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
107{
108 uint8_t *p = (uint8_t *)ls->p;
109 ls->p += len;
110 lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
111 return p;
112}
113
114/* Copy memory block from buffer. */
115static void bcread_block(LexState *ls, void *q, MSize len)
116{
117 memcpy(q, bcread_mem(ls, len), len);
118}
119
120/* Read byte from buffer. */
121static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
122{
123 lj_assertLS(ls->p < ls->pe, "buffer read overflow");
124 return (uint32_t)(uint8_t)*ls->p++;
125}
126
127/* Read ULEB128 value from buffer. */
128static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
129{
130 uint32_t v = lj_buf_ruleb128(&ls->p);
131 lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
132 return v;
133}
134
135/* Read top 32 bits of 33 bit ULEB128 value from buffer. */
136static uint32_t bcread_uleb128_33(LexState *ls)
137{
138 const uint8_t *p = (const uint8_t *)ls->p;
139 uint32_t v = (*p++ >> 1);
140 if (LJ_UNLIKELY(v >= 0x40)) {
141 int sh = -1;
142 v &= 0x3f;
143 do {
144 v |= ((*p & 0x7f) << (sh += 7));
145 } while (*p++ >= 0x80);
146 }
147 ls->p = (char *)p;
148 lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
149 return v;
150}
151
152/* -- Bytecode reader ----------------------------------------------------- */
153
154/* Read debug info of a prototype. */
155static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg)
156{
157 void *lineinfo = (void *)proto_lineinfo(pt);
158 bcread_block(ls, lineinfo, sizedbg);
159 /* Swap lineinfo if the endianess differs. */
160 if (bcread_swap(ls) && pt->numline >= 256) {
161 MSize i, n = pt->sizebc-1;
162 if (pt->numline < 65536) {
163 uint16_t *p = (uint16_t *)lineinfo;
164 for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8));
165 } else {
166 uint32_t *p = (uint32_t *)lineinfo;
167 for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]);
168 }
169 }
170}
171
172/* Find pointer to varinfo. */
173static const void *bcread_varinfo(GCproto *pt)
174{
175 const uint8_t *p = proto_uvinfo(pt);
176 MSize n = pt->sizeuv;
177 if (n) while (*p++ || --n) ;
178 return p;
179}
180
181/* Read a single constant key/value of a template table. */
182static void bcread_ktabk(LexState *ls, TValue *o)
183{
184 MSize tp = bcread_uleb128(ls);
185 if (tp >= BCDUMP_KTAB_STR) {
186 MSize len = tp - BCDUMP_KTAB_STR;
187 const char *p = (const char *)bcread_mem(ls, len);
188 setstrV(ls->L, o, lj_str_new(ls->L, p, len));
189 } else if (tp == BCDUMP_KTAB_INT) {
190 setintV(o, (int32_t)bcread_uleb128(ls));
191 } else if (tp == BCDUMP_KTAB_NUM) {
192 o->u32.lo = bcread_uleb128(ls);
193 o->u32.hi = bcread_uleb128(ls);
194 } else {
195 lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
196 setpriV(o, ~tp);
197 }
198}
199
200/* Read a template table. */
201static GCtab *bcread_ktab(LexState *ls)
202{
203 MSize narray = bcread_uleb128(ls);
204 MSize nhash = bcread_uleb128(ls);
205 GCtab *t = lj_tab_new(ls->L, narray, hsize2hbits(nhash));
206 if (narray) { /* Read array entries. */
207 MSize i;
208 TValue *o = tvref(t->array);
209 for (i = 0; i < narray; i++, o++)
210 bcread_ktabk(ls, o);
211 }
212 if (nhash) { /* Read hash entries. */
213 MSize i;
214 for (i = 0; i < nhash; i++) {
215 TValue key;
216 bcread_ktabk(ls, &key);
217 lj_assertLS(!tvisnil(&key), "nil key");
218 bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
219 }
220 }
221 return t;
222}
223
224/* Read GC constants of a prototype. */
225static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
226{
227 MSize i;
228 GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc;
229 for (i = 0; i < sizekgc; i++, kr++) {
230 MSize tp = bcread_uleb128(ls);
231 if (tp >= BCDUMP_KGC_STR) {
232 MSize len = tp - BCDUMP_KGC_STR;
233 const char *p = (const char *)bcread_mem(ls, len);
234 setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len)));
235 } else if (tp == BCDUMP_KGC_TAB) {
236 setgcref(*kr, obj2gco(bcread_ktab(ls)));
237#if LJ_HASFFI
238 } else if (tp != BCDUMP_KGC_CHILD) {
239 CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE :
240 tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64;
241 CTSize sz = tp == BCDUMP_KGC_COMPLEX ? 16 : 8;
242 GCcdata *cd = lj_cdata_new_(ls->L, id, sz);
243 TValue *p = (TValue *)cdataptr(cd);
244 setgcref(*kr, obj2gco(cd));
245 p[0].u32.lo = bcread_uleb128(ls);
246 p[0].u32.hi = bcread_uleb128(ls);
247 if (tp == BCDUMP_KGC_COMPLEX) {
248 p[1].u32.lo = bcread_uleb128(ls);
249 p[1].u32.hi = bcread_uleb128(ls);
250 }
251#endif
252 } else {
253 lua_State *L = ls->L;
254 lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp);
255 if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */
256 bcread_error(ls, LJ_ERR_BCBAD);
257 L->top--;
258 setgcref(*kr, obj2gco(protoV(L->top)));
259 }
260 }
261}
262
263/* Read number constants of a prototype. */
264static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
265{
266 MSize i;
267 TValue *o = mref(pt->k, TValue);
268 for (i = 0; i < sizekn; i++, o++) {
269 int isnum = (ls->p[0] & 1);
270 uint32_t lo = bcread_uleb128_33(ls);
271 if (isnum) {
272 o->u32.lo = lo;
273 o->u32.hi = bcread_uleb128(ls);
274 } else {
275 setintV(o, lo);
276 }
277 }
278}
279
280/* Read bytecode instructions. */
281static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
282{
283 BCIns *bc = proto_bc(pt);
284 bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
285 pt->framesize, 0);
286 bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
287 /* Swap bytecode instructions if the endianess differs. */
288 if (bcread_swap(ls)) {
289 MSize i;
290 for (i = 1; i < sizebc; i++) bc[i] = lj_bswap(bc[i]);
291 }
292}
293
294/* Read upvalue refs. */
295static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
296{
297 if (sizeuv) {
298 uint16_t *uv = proto_uv(pt);
299 bcread_block(ls, uv, sizeuv*2);
300 /* Swap upvalue refs if the endianess differs. */
301 if (bcread_swap(ls)) {
302 MSize i;
303 for (i = 0; i < sizeuv; i++)
304 uv[i] = (uint16_t)((uv[i] >> 8)|(uv[i] << 8));
305 }
306 }
307}
308
309/* Read a prototype. */
310GCproto *lj_bcread_proto(LexState *ls)
311{
312 GCproto *pt;
313 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
314 MSize ofsk, ofsuv, ofsdbg;
315 MSize sizedbg = 0;
316 BCLine firstline = 0, numline = 0;
317
318 /* Read prototype header. */
319 flags = bcread_byte(ls);
320 numparams = bcread_byte(ls);
321 framesize = bcread_byte(ls);
322 sizeuv = bcread_byte(ls);
323 sizekgc = bcread_uleb128(ls);
324 sizekn = bcread_uleb128(ls);
325 sizebc = bcread_uleb128(ls) + 1;
326 if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) {
327 sizedbg = bcread_uleb128(ls);
328 if (sizedbg) {
329 firstline = bcread_uleb128(ls);
330 numline = bcread_uleb128(ls);
331 }
332 }
333
334 /* Calculate total size of prototype including all colocated arrays. */
335 sizept = (MSize)sizeof(GCproto) +
336 sizebc*(MSize)sizeof(BCIns) +
337 sizekgc*(MSize)sizeof(GCRef);
338 sizept = (sizept + (MSize)sizeof(TValue)-1) & ~((MSize)sizeof(TValue)-1);
339 ofsk = sizept; sizept += sizekn*(MSize)sizeof(TValue);
340 ofsuv = sizept; sizept += ((sizeuv+1)&~1)*2;
341 ofsdbg = sizept; sizept += sizedbg;
342
343 /* Allocate prototype object and initialize its fields. */
344 pt = (GCproto *)lj_mem_newgco(ls->L, (MSize)sizept);
345 pt->gct = ~LJ_TPROTO;
346 pt->numparams = (uint8_t)numparams;
347 pt->framesize = (uint8_t)framesize;
348 pt->sizebc = sizebc;
349 setmref(pt->k, (char *)pt + ofsk);
350 setmref(pt->uv, (char *)pt + ofsuv);
351 pt->sizekgc = 0; /* Set to zero until fully initialized. */
352 pt->sizekn = sizekn;
353 pt->sizept = sizept;
354 pt->sizeuv = (uint8_t)sizeuv;
355 pt->flags = (uint8_t)flags;
356 pt->trace = 0;
357 setgcref(pt->chunkname, obj2gco(ls->chunkname));
358
359 /* Close potentially uninitialized gap between bc and kgc. */
360 *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(sizekgc+1)) = 0;
361
362 /* Read bytecode instructions and upvalue refs. */
363 bcread_bytecode(ls, pt, sizebc);
364 bcread_uv(ls, pt, sizeuv);
365
366 /* Read constants. */
367 bcread_kgc(ls, pt, sizekgc);
368 pt->sizekgc = sizekgc;
369 bcread_knum(ls, pt, sizekn);
370
371 /* Read and initialize debug info. */
372 pt->firstline = firstline;
373 pt->numline = numline;
374 if (sizedbg) {
375 MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2);
376 setmref(pt->lineinfo, (char *)pt + ofsdbg);
377 setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli);
378 bcread_dbg(ls, pt, sizedbg);
379 setmref(pt->varinfo, bcread_varinfo(pt));
380 } else {
381 setmref(pt->lineinfo, NULL);
382 setmref(pt->uvinfo, NULL);
383 setmref(pt->varinfo, NULL);
384 }
385 return pt;
386}
387
388/* Read and check header of bytecode dump. */
389static int bcread_header(LexState *ls)
390{
391 uint32_t flags;
392 bcread_want(ls, 3+5+5);
393 if (bcread_byte(ls) != BCDUMP_HEAD2 ||
394 bcread_byte(ls) != BCDUMP_HEAD3 ||
395 bcread_byte(ls) != BCDUMP_VERSION) return 0;
396 bcread_flags(ls) = flags = bcread_uleb128(ls);
397 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
398 if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
399 if ((flags & BCDUMP_F_FFI)) {
400#if LJ_HASFFI
401 lua_State *L = ls->L;
402 if (!ctype_ctsG(G(L))) {
403 ptrdiff_t oldtop = savestack(L, L->top);
404 luaopen_ffi(L); /* Load FFI library on-demand. */
405 L->top = restorestack(L, oldtop);
406 }
407#else
408 return 0;
409#endif
410 }
411 if ((flags & BCDUMP_F_STRIP)) {
412 ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
413 } else {
414 MSize len = bcread_uleb128(ls);
415 bcread_need(ls, len);
416 ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len);
417 }
418 return 1; /* Ok. */
419}
420
421/* Read a bytecode dump. */
422GCproto *lj_bcread(LexState *ls)
423{
424 lua_State *L = ls->L;
425 lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header");
426 bcread_savetop(L, ls, L->top);
427 lj_buf_reset(&ls->sb);
428 /* Check for a valid bytecode dump header. */
429 if (!bcread_header(ls))
430 bcread_error(ls, LJ_ERR_BCFMT);
431 for (;;) { /* Process all prototypes in the bytecode dump. */
432 GCproto *pt;
433 MSize len;
434 const char *startp;
435 /* Read length. */
436 if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */
437 ls->p++;
438 break;
439 }
440 bcread_want(ls, 5);
441 len = bcread_uleb128(ls);
442 if (!len) break; /* EOF */
443 bcread_need(ls, len);
444 startp = ls->p;
445 pt = lj_bcread_proto(ls);
446 if (ls->p != startp + len)
447 bcread_error(ls, LJ_ERR_BCBAD);
448 setprotoV(L, L->top, pt);
449 incr_top(L);
450 }
451 if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
452 bcread_error(ls, LJ_ERR_BCBAD);
453 /* Pop off last prototype. */
454 L->top--;
455 return protoV(L->top);
456}
457
458