1 | /* |
2 | ** Bytecode reader. |
3 | ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h |
4 | */ |
5 | |
6 | #define lj_bcread_c |
7 | #define LUA_CORE |
8 | |
9 | #include "lj_obj.h" |
10 | #include "lj_gc.h" |
11 | #include "lj_err.h" |
12 | #include "lj_str.h" |
13 | #include "lj_tab.h" |
14 | #include "lj_bc.h" |
15 | #if LJ_HASFFI |
16 | #include "lj_ctype.h" |
17 | #include "lj_cdata.h" |
18 | #include "lualib.h" |
19 | #endif |
20 | #include "lj_lex.h" |
21 | #include "lj_bcdump.h" |
22 | #include "lj_state.h" |
23 | |
24 | /* Reuse some lexer fields for our own purposes. */ |
25 | #define bcread_flags(ls) ls->level |
26 | #define bcread_swap(ls) \ |
27 | ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE) |
28 | #define bcread_oldtop(L, ls) restorestack(L, ls->lastline) |
29 | #define bcread_savetop(L, ls, top) \ |
30 | ls->lastline = (BCLine)savestack(L, (top)) |
31 | |
32 | /* -- Input buffer handling ----------------------------------------------- */ |
33 | |
34 | /* Throw reader error. */ |
35 | static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em) |
36 | { |
37 | lua_State *L = ls->L; |
38 | const char *name = ls->chunkarg; |
39 | if (*name == BCDUMP_HEAD1) name = "(binary)" ; |
40 | else if (*name == '@' || *name == '=') name++; |
41 | lj_str_pushf(L, "%s: %s" , name, err2msg(em)); |
42 | lj_err_throw(L, LUA_ERRSYNTAX); |
43 | } |
44 | |
45 | /* Resize input buffer. */ |
46 | static void bcread_resize(LexState *ls, MSize len) |
47 | { |
48 | if (ls->sb.sz < len) { |
49 | MSize sz = ls->sb.sz * 2; |
50 | while (len > sz) sz = sz * 2; |
51 | lj_str_resizebuf(ls->L, &ls->sb, sz); |
52 | /* Caveat: this may change ls->sb.buf which may affect ls->p. */ |
53 | } |
54 | } |
55 | |
56 | /* Refill buffer if needed. */ |
57 | static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) |
58 | { |
59 | lua_assert(len != 0); |
60 | if (len > LJ_MAX_MEM || ls->current < 0) |
61 | bcread_error(ls, LJ_ERR_BCBAD); |
62 | do { |
63 | const char *buf; |
64 | size_t size; |
65 | if (ls->n) { /* Copy remainder to buffer. */ |
66 | if (ls->sb.n) { /* Move down in buffer. */ |
67 | lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); |
68 | if (ls->n != ls->sb.n) |
69 | memmove(ls->sb.buf, ls->p, ls->n); |
70 | } else { /* Copy from buffer provided by reader. */ |
71 | bcread_resize(ls, len); |
72 | memcpy(ls->sb.buf, ls->p, ls->n); |
73 | } |
74 | ls->p = ls->sb.buf; |
75 | } |
76 | ls->sb.n = ls->n; |
77 | buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ |
78 | if (buf == NULL || size == 0) { /* EOF? */ |
79 | if (need) bcread_error(ls, LJ_ERR_BCBAD); |
80 | ls->current = -1; /* Only bad if we get called again. */ |
81 | break; |
82 | } |
83 | if (ls->sb.n) { /* Append to buffer. */ |
84 | MSize n = ls->sb.n + (MSize)size; |
85 | bcread_resize(ls, n < len ? len : n); |
86 | memcpy(ls->sb.buf + ls->sb.n, buf, size); |
87 | ls->n = ls->sb.n = n; |
88 | ls->p = ls->sb.buf; |
89 | } else { /* Return buffer provided by reader. */ |
90 | ls->n = (MSize)size; |
91 | ls->p = buf; |
92 | } |
93 | } while (ls->n < len); |
94 | } |
95 | |
96 | /* Need a certain number of bytes. */ |
97 | static LJ_AINLINE void bcread_need(LexState *ls, MSize len) |
98 | { |
99 | if (LJ_UNLIKELY(ls->n < len)) |
100 | bcread_fill(ls, len, 1); |
101 | } |
102 | |
103 | /* Want to read up to a certain number of bytes, but may need less. */ |
104 | static LJ_AINLINE void bcread_want(LexState *ls, MSize len) |
105 | { |
106 | if (LJ_UNLIKELY(ls->n < len)) |
107 | bcread_fill(ls, len, 0); |
108 | } |
109 | |
110 | #define bcread_dec(ls) check_exp(ls->n > 0, ls->n--) |
111 | #define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len)) |
112 | |
113 | /* Return memory block from buffer. */ |
114 | static uint8_t *bcread_mem(LexState *ls, MSize len) |
115 | { |
116 | uint8_t *p = (uint8_t *)ls->p; |
117 | bcread_consume(ls, len); |
118 | ls->p = (char *)p + len; |
119 | return p; |
120 | } |
121 | |
122 | /* Copy memory block from buffer. */ |
123 | static void bcread_block(LexState *ls, void *q, MSize len) |
124 | { |
125 | memcpy(q, bcread_mem(ls, len), len); |
126 | } |
127 | |
128 | /* Read byte from buffer. */ |
129 | static LJ_AINLINE uint32_t bcread_byte(LexState *ls) |
130 | { |
131 | bcread_dec(ls); |
132 | return (uint32_t)(uint8_t)*ls->p++; |
133 | } |
134 | |
135 | /* Read ULEB128 value from buffer. */ |
136 | static uint32_t bcread_uleb128(LexState *ls) |
137 | { |
138 | const uint8_t *p = (const uint8_t *)ls->p; |
139 | uint32_t v = *p++; |
140 | if (LJ_UNLIKELY(v >= 0x80)) { |
141 | int sh = 0; |
142 | v &= 0x7f; |
143 | do { |
144 | v |= ((*p & 0x7f) << (sh += 7)); |
145 | bcread_dec(ls); |
146 | } while (*p++ >= 0x80); |
147 | } |
148 | bcread_dec(ls); |
149 | ls->p = (char *)p; |
150 | return v; |
151 | } |
152 | |
153 | /* Read top 32 bits of 33 bit ULEB128 value from buffer. */ |
154 | static uint32_t bcread_uleb128_33(LexState *ls) |
155 | { |
156 | const uint8_t *p = (const uint8_t *)ls->p; |
157 | uint32_t v = (*p++ >> 1); |
158 | if (LJ_UNLIKELY(v >= 0x40)) { |
159 | int sh = -1; |
160 | v &= 0x3f; |
161 | do { |
162 | v |= ((*p & 0x7f) << (sh += 7)); |
163 | bcread_dec(ls); |
164 | } while (*p++ >= 0x80); |
165 | } |
166 | bcread_dec(ls); |
167 | ls->p = (char *)p; |
168 | return v; |
169 | } |
170 | |
171 | /* -- Bytecode reader ----------------------------------------------------- */ |
172 | |
173 | /* Read debug info of a prototype. */ |
174 | static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg) |
175 | { |
176 | void *lineinfo = (void *)proto_lineinfo(pt); |
177 | bcread_block(ls, lineinfo, sizedbg); |
178 | /* Swap lineinfo if the endianess differs. */ |
179 | if (bcread_swap(ls) && pt->numline >= 256) { |
180 | MSize i, n = pt->sizebc-1; |
181 | if (pt->numline < 65536) { |
182 | uint16_t *p = (uint16_t *)lineinfo; |
183 | for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8)); |
184 | } else { |
185 | uint32_t *p = (uint32_t *)lineinfo; |
186 | for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]); |
187 | } |
188 | } |
189 | } |
190 | |
191 | /* Find pointer to varinfo. */ |
192 | static const void *bcread_varinfo(GCproto *pt) |
193 | { |
194 | const uint8_t *p = proto_uvinfo(pt); |
195 | MSize n = pt->sizeuv; |
196 | if (n) while (*p++ || --n) ; |
197 | return p; |
198 | } |
199 | |
200 | /* Read a single constant key/value of a template table. */ |
201 | static void bcread_ktabk(LexState *ls, TValue *o) |
202 | { |
203 | MSize tp = bcread_uleb128(ls); |
204 | if (tp >= BCDUMP_KTAB_STR) { |
205 | MSize len = tp - BCDUMP_KTAB_STR; |
206 | const char *p = (const char *)bcread_mem(ls, len); |
207 | setstrV(ls->L, o, lj_str_new(ls->L, p, len)); |
208 | } else if (tp == BCDUMP_KTAB_INT) { |
209 | setintV(o, (int32_t)bcread_uleb128(ls)); |
210 | } else if (tp == BCDUMP_KTAB_NUM) { |
211 | o->u32.lo = bcread_uleb128(ls); |
212 | o->u32.hi = bcread_uleb128(ls); |
213 | } else { |
214 | lua_assert(tp <= BCDUMP_KTAB_TRUE); |
215 | setitype(o, ~tp); |
216 | } |
217 | } |
218 | |
219 | /* Read a template table. */ |
220 | static GCtab *bcread_ktab(LexState *ls) |
221 | { |
222 | MSize narray = bcread_uleb128(ls); |
223 | MSize nhash = bcread_uleb128(ls); |
224 | GCtab *t = lj_tab_new(ls->L, narray, hsize2hbits(nhash)); |
225 | if (narray) { /* Read array entries. */ |
226 | MSize i; |
227 | TValue *o = tvref(t->array); |
228 | for (i = 0; i < narray; i++, o++) |
229 | bcread_ktabk(ls, o); |
230 | } |
231 | if (nhash) { /* Read hash entries. */ |
232 | MSize i; |
233 | for (i = 0; i < nhash; i++) { |
234 | TValue key; |
235 | bcread_ktabk(ls, &key); |
236 | lua_assert(!tvisnil(&key)); |
237 | bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); |
238 | } |
239 | } |
240 | return t; |
241 | } |
242 | |
243 | /* Read GC constants of a prototype. */ |
244 | static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) |
245 | { |
246 | MSize i; |
247 | GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc; |
248 | for (i = 0; i < sizekgc; i++, kr++) { |
249 | MSize tp = bcread_uleb128(ls); |
250 | if (tp >= BCDUMP_KGC_STR) { |
251 | MSize len = tp - BCDUMP_KGC_STR; |
252 | const char *p = (const char *)bcread_mem(ls, len); |
253 | setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len))); |
254 | } else if (tp == BCDUMP_KGC_TAB) { |
255 | setgcref(*kr, obj2gco(bcread_ktab(ls))); |
256 | #if LJ_HASFFI |
257 | } else if (tp != BCDUMP_KGC_CHILD) { |
258 | CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE : |
259 | tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64; |
260 | CTSize sz = tp == BCDUMP_KGC_COMPLEX ? 16 : 8; |
261 | GCcdata *cd = lj_cdata_new_(ls->L, id, sz); |
262 | TValue *p = (TValue *)cdataptr(cd); |
263 | setgcref(*kr, obj2gco(cd)); |
264 | p[0].u32.lo = bcread_uleb128(ls); |
265 | p[0].u32.hi = bcread_uleb128(ls); |
266 | if (tp == BCDUMP_KGC_COMPLEX) { |
267 | p[1].u32.lo = bcread_uleb128(ls); |
268 | p[1].u32.hi = bcread_uleb128(ls); |
269 | } |
270 | #endif |
271 | } else { |
272 | lua_State *L = ls->L; |
273 | lua_assert(tp == BCDUMP_KGC_CHILD); |
274 | if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ |
275 | bcread_error(ls, LJ_ERR_BCBAD); |
276 | L->top--; |
277 | setgcref(*kr, obj2gco(protoV(L->top))); |
278 | } |
279 | } |
280 | } |
281 | |
282 | /* Read number constants of a prototype. */ |
283 | static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn) |
284 | { |
285 | MSize i; |
286 | TValue *o = mref(pt->k, TValue); |
287 | for (i = 0; i < sizekn; i++, o++) { |
288 | int isnum = (ls->p[0] & 1); |
289 | uint32_t lo = bcread_uleb128_33(ls); |
290 | if (isnum) { |
291 | o->u32.lo = lo; |
292 | o->u32.hi = bcread_uleb128(ls); |
293 | } else { |
294 | setintV(o, lo); |
295 | } |
296 | } |
297 | } |
298 | |
299 | /* Read bytecode instructions. */ |
300 | static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc) |
301 | { |
302 | BCIns *bc = proto_bc(pt); |
303 | bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF, |
304 | pt->framesize, 0); |
305 | bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns)); |
306 | /* Swap bytecode instructions if the endianess differs. */ |
307 | if (bcread_swap(ls)) { |
308 | MSize i; |
309 | for (i = 1; i < sizebc; i++) bc[i] = lj_bswap(bc[i]); |
310 | } |
311 | } |
312 | |
313 | /* Read upvalue refs. */ |
314 | static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv) |
315 | { |
316 | if (sizeuv) { |
317 | uint16_t *uv = proto_uv(pt); |
318 | bcread_block(ls, uv, sizeuv*2); |
319 | /* Swap upvalue refs if the endianess differs. */ |
320 | if (bcread_swap(ls)) { |
321 | MSize i; |
322 | for (i = 0; i < sizeuv; i++) |
323 | uv[i] = (uint16_t)((uv[i] >> 8)|(uv[i] << 8)); |
324 | } |
325 | } |
326 | } |
327 | |
328 | /* Read a prototype. */ |
329 | static GCproto *bcread_proto(LexState *ls) |
330 | { |
331 | GCproto *pt; |
332 | MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; |
333 | MSize ofsk, ofsuv, ofsdbg; |
334 | MSize sizedbg = 0; |
335 | BCLine firstline = 0, numline = 0; |
336 | MSize len, startn; |
337 | |
338 | /* Read length. */ |
339 | if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */ |
340 | ls->n--; ls->p++; |
341 | return NULL; |
342 | } |
343 | bcread_want(ls, 5); |
344 | len = bcread_uleb128(ls); |
345 | if (!len) return NULL; /* EOF */ |
346 | bcread_need(ls, len); |
347 | startn = ls->n; |
348 | |
349 | /* Read prototype header. */ |
350 | flags = bcread_byte(ls); |
351 | numparams = bcread_byte(ls); |
352 | framesize = bcread_byte(ls); |
353 | sizeuv = bcread_byte(ls); |
354 | sizekgc = bcread_uleb128(ls); |
355 | sizekn = bcread_uleb128(ls); |
356 | sizebc = bcread_uleb128(ls) + 1; |
357 | if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) { |
358 | sizedbg = bcread_uleb128(ls); |
359 | if (sizedbg) { |
360 | firstline = bcread_uleb128(ls); |
361 | numline = bcread_uleb128(ls); |
362 | } |
363 | } |
364 | |
365 | /* Calculate total size of prototype including all colocated arrays. */ |
366 | sizept = (MSize)sizeof(GCproto) + |
367 | sizebc*(MSize)sizeof(BCIns) + |
368 | sizekgc*(MSize)sizeof(GCRef); |
369 | sizept = (sizept + (MSize)sizeof(TValue)-1) & ~((MSize)sizeof(TValue)-1); |
370 | ofsk = sizept; sizept += sizekn*(MSize)sizeof(TValue); |
371 | ofsuv = sizept; sizept += ((sizeuv+1)&~1)*2; |
372 | ofsdbg = sizept; sizept += sizedbg; |
373 | |
374 | /* Allocate prototype object and initialize its fields. */ |
375 | pt = (GCproto *)lj_mem_newgco(ls->L, (MSize)sizept); |
376 | pt->gct = ~LJ_TPROTO; |
377 | pt->numparams = (uint8_t)numparams; |
378 | pt->framesize = (uint8_t)framesize; |
379 | pt->sizebc = sizebc; |
380 | setmref(pt->k, (char *)pt + ofsk); |
381 | setmref(pt->uv, (char *)pt + ofsuv); |
382 | pt->sizekgc = 0; /* Set to zero until fully initialized. */ |
383 | pt->sizekn = sizekn; |
384 | pt->sizept = sizept; |
385 | pt->sizeuv = (uint8_t)sizeuv; |
386 | pt->flags = (uint8_t)flags; |
387 | pt->trace = 0; |
388 | setgcref(pt->chunkname, obj2gco(ls->chunkname)); |
389 | |
390 | /* Close potentially uninitialized gap between bc and kgc. */ |
391 | *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(sizekgc+1)) = 0; |
392 | |
393 | /* Read bytecode instructions and upvalue refs. */ |
394 | bcread_bytecode(ls, pt, sizebc); |
395 | bcread_uv(ls, pt, sizeuv); |
396 | |
397 | /* Read constants. */ |
398 | bcread_kgc(ls, pt, sizekgc); |
399 | pt->sizekgc = sizekgc; |
400 | bcread_knum(ls, pt, sizekn); |
401 | |
402 | /* Read and initialize debug info. */ |
403 | pt->firstline = firstline; |
404 | pt->numline = numline; |
405 | if (sizedbg) { |
406 | MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2); |
407 | setmref(pt->lineinfo, (char *)pt + ofsdbg); |
408 | setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli); |
409 | bcread_dbg(ls, pt, sizedbg); |
410 | setmref(pt->varinfo, bcread_varinfo(pt)); |
411 | } else { |
412 | setmref(pt->lineinfo, NULL); |
413 | setmref(pt->uvinfo, NULL); |
414 | setmref(pt->varinfo, NULL); |
415 | } |
416 | |
417 | if (len != startn - ls->n) |
418 | bcread_error(ls, LJ_ERR_BCBAD); |
419 | return pt; |
420 | } |
421 | |
422 | /* Read and check header of bytecode dump. */ |
423 | static int (LexState *ls) |
424 | { |
425 | uint32_t flags; |
426 | bcread_want(ls, 3+5+5); |
427 | if (bcread_byte(ls) != BCDUMP_HEAD2 || |
428 | bcread_byte(ls) != BCDUMP_HEAD3 || |
429 | bcread_byte(ls) != BCDUMP_VERSION) return 0; |
430 | bcread_flags(ls) = flags = bcread_uleb128(ls); |
431 | if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; |
432 | if ((flags & BCDUMP_F_FFI)) { |
433 | #if LJ_HASFFI |
434 | lua_State *L = ls->L; |
435 | if (!ctype_ctsG(G(L))) { |
436 | ptrdiff_t oldtop = savestack(L, L->top); |
437 | luaopen_ffi(L); /* Load FFI library on-demand. */ |
438 | L->top = restorestack(L, oldtop); |
439 | } |
440 | #else |
441 | return 0; |
442 | #endif |
443 | } |
444 | if ((flags & BCDUMP_F_STRIP)) { |
445 | ls->chunkname = lj_str_newz(ls->L, ls->chunkarg); |
446 | } else { |
447 | MSize len = bcread_uleb128(ls); |
448 | bcread_need(ls, len); |
449 | ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len); |
450 | } |
451 | return 1; /* Ok. */ |
452 | } |
453 | |
454 | /* Read a bytecode dump. */ |
455 | GCproto *lj_bcread(LexState *ls) |
456 | { |
457 | lua_State *L = ls->L; |
458 | lua_assert(ls->current == BCDUMP_HEAD1); |
459 | bcread_savetop(L, ls, L->top); |
460 | lj_str_resetbuf(&ls->sb); |
461 | /* Check for a valid bytecode dump header. */ |
462 | if (!bcread_header(ls)) |
463 | bcread_error(ls, LJ_ERR_BCFMT); |
464 | for (;;) { /* Process all prototypes in the bytecode dump. */ |
465 | GCproto *pt = bcread_proto(ls); |
466 | if (!pt) break; |
467 | setprotoV(L, L->top, pt); |
468 | incr_top(L); |
469 | } |
470 | if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls)) |
471 | bcread_error(ls, LJ_ERR_BCBAD); |
472 | /* Pop off last prototype. */ |
473 | L->top--; |
474 | return protoV(L->top); |
475 | } |
476 | |
477 | |