1/*
2** Bytecode reader.
3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_bcread_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h"
14#include "lj_bc.h"
15#if LJ_HASFFI
16#include "lj_ctype.h"
17#include "lj_cdata.h"
18#include "lualib.h"
19#endif
20#include "lj_lex.h"
21#include "lj_bcdump.h"
22#include "lj_state.h"
23
24/* Reuse some lexer fields for our own purposes. */
25#define bcread_flags(ls) ls->level
26#define bcread_swap(ls) \
27 ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE)
28#define bcread_oldtop(L, ls) restorestack(L, ls->lastline)
29#define bcread_savetop(L, ls, top) \
30 ls->lastline = (BCLine)savestack(L, (top))
31
32/* -- Input buffer handling ----------------------------------------------- */
33
34/* Throw reader error. */
35static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
36{
37 lua_State *L = ls->L;
38 const char *name = ls->chunkarg;
39 if (*name == BCDUMP_HEAD1) name = "(binary)";
40 else if (*name == '@' || *name == '=') name++;
41 lj_str_pushf(L, "%s: %s", name, err2msg(em));
42 lj_err_throw(L, LUA_ERRSYNTAX);
43}
44
45/* Resize input buffer. */
46static void bcread_resize(LexState *ls, MSize len)
47{
48 if (ls->sb.sz < len) {
49 MSize sz = ls->sb.sz * 2;
50 while (len > sz) sz = sz * 2;
51 lj_str_resizebuf(ls->L, &ls->sb, sz);
52 /* Caveat: this may change ls->sb.buf which may affect ls->p. */
53 }
54}
55
56/* Refill buffer if needed. */
57static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
58{
59 lua_assert(len != 0);
60 if (len > LJ_MAX_MEM || ls->current < 0)
61 bcread_error(ls, LJ_ERR_BCBAD);
62 do {
63 const char *buf;
64 size_t size;
65 if (ls->n) { /* Copy remainder to buffer. */
66 if (ls->sb.n) { /* Move down in buffer. */
67 lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n);
68 if (ls->n != ls->sb.n)
69 memmove(ls->sb.buf, ls->p, ls->n);
70 } else { /* Copy from buffer provided by reader. */
71 bcread_resize(ls, len);
72 memcpy(ls->sb.buf, ls->p, ls->n);
73 }
74 ls->p = ls->sb.buf;
75 }
76 ls->sb.n = ls->n;
77 buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */
78 if (buf == NULL || size == 0) { /* EOF? */
79 if (need) bcread_error(ls, LJ_ERR_BCBAD);
80 ls->current = -1; /* Only bad if we get called again. */
81 break;
82 }
83 if (ls->sb.n) { /* Append to buffer. */
84 MSize n = ls->sb.n + (MSize)size;
85 bcread_resize(ls, n < len ? len : n);
86 memcpy(ls->sb.buf + ls->sb.n, buf, size);
87 ls->n = ls->sb.n = n;
88 ls->p = ls->sb.buf;
89 } else { /* Return buffer provided by reader. */
90 ls->n = (MSize)size;
91 ls->p = buf;
92 }
93 } while (ls->n < len);
94}
95
96/* Need a certain number of bytes. */
97static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
98{
99 if (LJ_UNLIKELY(ls->n < len))
100 bcread_fill(ls, len, 1);
101}
102
103/* Want to read up to a certain number of bytes, but may need less. */
104static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
105{
106 if (LJ_UNLIKELY(ls->n < len))
107 bcread_fill(ls, len, 0);
108}
109
110#define bcread_dec(ls) check_exp(ls->n > 0, ls->n--)
111#define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len))
112
113/* Return memory block from buffer. */
114static uint8_t *bcread_mem(LexState *ls, MSize len)
115{
116 uint8_t *p = (uint8_t *)ls->p;
117 bcread_consume(ls, len);
118 ls->p = (char *)p + len;
119 return p;
120}
121
122/* Copy memory block from buffer. */
123static void bcread_block(LexState *ls, void *q, MSize len)
124{
125 memcpy(q, bcread_mem(ls, len), len);
126}
127
128/* Read byte from buffer. */
129static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
130{
131 bcread_dec(ls);
132 return (uint32_t)(uint8_t)*ls->p++;
133}
134
135/* Read ULEB128 value from buffer. */
136static uint32_t bcread_uleb128(LexState *ls)
137{
138 const uint8_t *p = (const uint8_t *)ls->p;
139 uint32_t v = *p++;
140 if (LJ_UNLIKELY(v >= 0x80)) {
141 int sh = 0;
142 v &= 0x7f;
143 do {
144 v |= ((*p & 0x7f) << (sh += 7));
145 bcread_dec(ls);
146 } while (*p++ >= 0x80);
147 }
148 bcread_dec(ls);
149 ls->p = (char *)p;
150 return v;
151}
152
153/* Read top 32 bits of 33 bit ULEB128 value from buffer. */
154static uint32_t bcread_uleb128_33(LexState *ls)
155{
156 const uint8_t *p = (const uint8_t *)ls->p;
157 uint32_t v = (*p++ >> 1);
158 if (LJ_UNLIKELY(v >= 0x40)) {
159 int sh = -1;
160 v &= 0x3f;
161 do {
162 v |= ((*p & 0x7f) << (sh += 7));
163 bcread_dec(ls);
164 } while (*p++ >= 0x80);
165 }
166 bcread_dec(ls);
167 ls->p = (char *)p;
168 return v;
169}
170
171/* -- Bytecode reader ----------------------------------------------------- */
172
173/* Read debug info of a prototype. */
174static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg)
175{
176 void *lineinfo = (void *)proto_lineinfo(pt);
177 bcread_block(ls, lineinfo, sizedbg);
178 /* Swap lineinfo if the endianess differs. */
179 if (bcread_swap(ls) && pt->numline >= 256) {
180 MSize i, n = pt->sizebc-1;
181 if (pt->numline < 65536) {
182 uint16_t *p = (uint16_t *)lineinfo;
183 for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8));
184 } else {
185 uint32_t *p = (uint32_t *)lineinfo;
186 for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]);
187 }
188 }
189}
190
191/* Find pointer to varinfo. */
192static const void *bcread_varinfo(GCproto *pt)
193{
194 const uint8_t *p = proto_uvinfo(pt);
195 MSize n = pt->sizeuv;
196 if (n) while (*p++ || --n) ;
197 return p;
198}
199
200/* Read a single constant key/value of a template table. */
201static void bcread_ktabk(LexState *ls, TValue *o)
202{
203 MSize tp = bcread_uleb128(ls);
204 if (tp >= BCDUMP_KTAB_STR) {
205 MSize len = tp - BCDUMP_KTAB_STR;
206 const char *p = (const char *)bcread_mem(ls, len);
207 setstrV(ls->L, o, lj_str_new(ls->L, p, len));
208 } else if (tp == BCDUMP_KTAB_INT) {
209 setintV(o, (int32_t)bcread_uleb128(ls));
210 } else if (tp == BCDUMP_KTAB_NUM) {
211 o->u32.lo = bcread_uleb128(ls);
212 o->u32.hi = bcread_uleb128(ls);
213 } else {
214 lua_assert(tp <= BCDUMP_KTAB_TRUE);
215 setitype(o, ~tp);
216 }
217}
218
219/* Read a template table. */
220static GCtab *bcread_ktab(LexState *ls)
221{
222 MSize narray = bcread_uleb128(ls);
223 MSize nhash = bcread_uleb128(ls);
224 GCtab *t = lj_tab_new(ls->L, narray, hsize2hbits(nhash));
225 if (narray) { /* Read array entries. */
226 MSize i;
227 TValue *o = tvref(t->array);
228 for (i = 0; i < narray; i++, o++)
229 bcread_ktabk(ls, o);
230 }
231 if (nhash) { /* Read hash entries. */
232 MSize i;
233 for (i = 0; i < nhash; i++) {
234 TValue key;
235 bcread_ktabk(ls, &key);
236 lua_assert(!tvisnil(&key));
237 bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
238 }
239 }
240 return t;
241}
242
243/* Read GC constants of a prototype. */
244static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
245{
246 MSize i;
247 GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc;
248 for (i = 0; i < sizekgc; i++, kr++) {
249 MSize tp = bcread_uleb128(ls);
250 if (tp >= BCDUMP_KGC_STR) {
251 MSize len = tp - BCDUMP_KGC_STR;
252 const char *p = (const char *)bcread_mem(ls, len);
253 setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len)));
254 } else if (tp == BCDUMP_KGC_TAB) {
255 setgcref(*kr, obj2gco(bcread_ktab(ls)));
256#if LJ_HASFFI
257 } else if (tp != BCDUMP_KGC_CHILD) {
258 CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE :
259 tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64;
260 CTSize sz = tp == BCDUMP_KGC_COMPLEX ? 16 : 8;
261 GCcdata *cd = lj_cdata_new_(ls->L, id, sz);
262 TValue *p = (TValue *)cdataptr(cd);
263 setgcref(*kr, obj2gco(cd));
264 p[0].u32.lo = bcread_uleb128(ls);
265 p[0].u32.hi = bcread_uleb128(ls);
266 if (tp == BCDUMP_KGC_COMPLEX) {
267 p[1].u32.lo = bcread_uleb128(ls);
268 p[1].u32.hi = bcread_uleb128(ls);
269 }
270#endif
271 } else {
272 lua_State *L = ls->L;
273 lua_assert(tp == BCDUMP_KGC_CHILD);
274 if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */
275 bcread_error(ls, LJ_ERR_BCBAD);
276 L->top--;
277 setgcref(*kr, obj2gco(protoV(L->top)));
278 }
279 }
280}
281
282/* Read number constants of a prototype. */
283static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
284{
285 MSize i;
286 TValue *o = mref(pt->k, TValue);
287 for (i = 0; i < sizekn; i++, o++) {
288 int isnum = (ls->p[0] & 1);
289 uint32_t lo = bcread_uleb128_33(ls);
290 if (isnum) {
291 o->u32.lo = lo;
292 o->u32.hi = bcread_uleb128(ls);
293 } else {
294 setintV(o, lo);
295 }
296 }
297}
298
299/* Read bytecode instructions. */
300static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
301{
302 BCIns *bc = proto_bc(pt);
303 bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
304 pt->framesize, 0);
305 bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
306 /* Swap bytecode instructions if the endianess differs. */
307 if (bcread_swap(ls)) {
308 MSize i;
309 for (i = 1; i < sizebc; i++) bc[i] = lj_bswap(bc[i]);
310 }
311}
312
313/* Read upvalue refs. */
314static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
315{
316 if (sizeuv) {
317 uint16_t *uv = proto_uv(pt);
318 bcread_block(ls, uv, sizeuv*2);
319 /* Swap upvalue refs if the endianess differs. */
320 if (bcread_swap(ls)) {
321 MSize i;
322 for (i = 0; i < sizeuv; i++)
323 uv[i] = (uint16_t)((uv[i] >> 8)|(uv[i] << 8));
324 }
325 }
326}
327
328/* Read a prototype. */
329static GCproto *bcread_proto(LexState *ls)
330{
331 GCproto *pt;
332 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
333 MSize ofsk, ofsuv, ofsdbg;
334 MSize sizedbg = 0;
335 BCLine firstline = 0, numline = 0;
336 MSize len, startn;
337
338 /* Read length. */
339 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
340 ls->n--; ls->p++;
341 return NULL;
342 }
343 bcread_want(ls, 5);
344 len = bcread_uleb128(ls);
345 if (!len) return NULL; /* EOF */
346 bcread_need(ls, len);
347 startn = ls->n;
348
349 /* Read prototype header. */
350 flags = bcread_byte(ls);
351 numparams = bcread_byte(ls);
352 framesize = bcread_byte(ls);
353 sizeuv = bcread_byte(ls);
354 sizekgc = bcread_uleb128(ls);
355 sizekn = bcread_uleb128(ls);
356 sizebc = bcread_uleb128(ls) + 1;
357 if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) {
358 sizedbg = bcread_uleb128(ls);
359 if (sizedbg) {
360 firstline = bcread_uleb128(ls);
361 numline = bcread_uleb128(ls);
362 }
363 }
364
365 /* Calculate total size of prototype including all colocated arrays. */
366 sizept = (MSize)sizeof(GCproto) +
367 sizebc*(MSize)sizeof(BCIns) +
368 sizekgc*(MSize)sizeof(GCRef);
369 sizept = (sizept + (MSize)sizeof(TValue)-1) & ~((MSize)sizeof(TValue)-1);
370 ofsk = sizept; sizept += sizekn*(MSize)sizeof(TValue);
371 ofsuv = sizept; sizept += ((sizeuv+1)&~1)*2;
372 ofsdbg = sizept; sizept += sizedbg;
373
374 /* Allocate prototype object and initialize its fields. */
375 pt = (GCproto *)lj_mem_newgco(ls->L, (MSize)sizept);
376 pt->gct = ~LJ_TPROTO;
377 pt->numparams = (uint8_t)numparams;
378 pt->framesize = (uint8_t)framesize;
379 pt->sizebc = sizebc;
380 setmref(pt->k, (char *)pt + ofsk);
381 setmref(pt->uv, (char *)pt + ofsuv);
382 pt->sizekgc = 0; /* Set to zero until fully initialized. */
383 pt->sizekn = sizekn;
384 pt->sizept = sizept;
385 pt->sizeuv = (uint8_t)sizeuv;
386 pt->flags = (uint8_t)flags;
387 pt->trace = 0;
388 setgcref(pt->chunkname, obj2gco(ls->chunkname));
389
390 /* Close potentially uninitialized gap between bc and kgc. */
391 *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(sizekgc+1)) = 0;
392
393 /* Read bytecode instructions and upvalue refs. */
394 bcread_bytecode(ls, pt, sizebc);
395 bcread_uv(ls, pt, sizeuv);
396
397 /* Read constants. */
398 bcread_kgc(ls, pt, sizekgc);
399 pt->sizekgc = sizekgc;
400 bcread_knum(ls, pt, sizekn);
401
402 /* Read and initialize debug info. */
403 pt->firstline = firstline;
404 pt->numline = numline;
405 if (sizedbg) {
406 MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2);
407 setmref(pt->lineinfo, (char *)pt + ofsdbg);
408 setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli);
409 bcread_dbg(ls, pt, sizedbg);
410 setmref(pt->varinfo, bcread_varinfo(pt));
411 } else {
412 setmref(pt->lineinfo, NULL);
413 setmref(pt->uvinfo, NULL);
414 setmref(pt->varinfo, NULL);
415 }
416
417 if (len != startn - ls->n)
418 bcread_error(ls, LJ_ERR_BCBAD);
419 return pt;
420}
421
422/* Read and check header of bytecode dump. */
423static int bcread_header(LexState *ls)
424{
425 uint32_t flags;
426 bcread_want(ls, 3+5+5);
427 if (bcread_byte(ls) != BCDUMP_HEAD2 ||
428 bcread_byte(ls) != BCDUMP_HEAD3 ||
429 bcread_byte(ls) != BCDUMP_VERSION) return 0;
430 bcread_flags(ls) = flags = bcread_uleb128(ls);
431 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
432 if ((flags & BCDUMP_F_FFI)) {
433#if LJ_HASFFI
434 lua_State *L = ls->L;
435 if (!ctype_ctsG(G(L))) {
436 ptrdiff_t oldtop = savestack(L, L->top);
437 luaopen_ffi(L); /* Load FFI library on-demand. */
438 L->top = restorestack(L, oldtop);
439 }
440#else
441 return 0;
442#endif
443 }
444 if ((flags & BCDUMP_F_STRIP)) {
445 ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
446 } else {
447 MSize len = bcread_uleb128(ls);
448 bcread_need(ls, len);
449 ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len);
450 }
451 return 1; /* Ok. */
452}
453
454/* Read a bytecode dump. */
455GCproto *lj_bcread(LexState *ls)
456{
457 lua_State *L = ls->L;
458 lua_assert(ls->current == BCDUMP_HEAD1);
459 bcread_savetop(L, ls, L->top);
460 lj_str_resetbuf(&ls->sb);
461 /* Check for a valid bytecode dump header. */
462 if (!bcread_header(ls))
463 bcread_error(ls, LJ_ERR_BCFMT);
464 for (;;) { /* Process all prototypes in the bytecode dump. */
465 GCproto *pt = bcread_proto(ls);
466 if (!pt) break;
467 setprotoV(L, L->top, pt);
468 incr_top(L);
469 }
470 if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls))
471 bcread_error(ls, LJ_ERR_BCBAD);
472 /* Pop off last prototype. */
473 L->top--;
474 return protoV(L->top);
475}
476
477