| 1 | /* |
| 2 | ** Bytecode reader. |
| 3 | ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h |
| 4 | */ |
| 5 | |
| 6 | #define lj_bcread_c |
| 7 | #define LUA_CORE |
| 8 | |
| 9 | #include "lj_obj.h" |
| 10 | #include "lj_gc.h" |
| 11 | #include "lj_err.h" |
| 12 | #include "lj_str.h" |
| 13 | #include "lj_tab.h" |
| 14 | #include "lj_bc.h" |
| 15 | #if LJ_HASFFI |
| 16 | #include "lj_ctype.h" |
| 17 | #include "lj_cdata.h" |
| 18 | #include "lualib.h" |
| 19 | #endif |
| 20 | #include "lj_lex.h" |
| 21 | #include "lj_bcdump.h" |
| 22 | #include "lj_state.h" |
| 23 | |
| 24 | /* Reuse some lexer fields for our own purposes. */ |
| 25 | #define bcread_flags(ls) ls->level |
| 26 | #define bcread_swap(ls) \ |
| 27 | ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE) |
| 28 | #define bcread_oldtop(L, ls) restorestack(L, ls->lastline) |
| 29 | #define bcread_savetop(L, ls, top) \ |
| 30 | ls->lastline = (BCLine)savestack(L, (top)) |
| 31 | |
| 32 | /* -- Input buffer handling ----------------------------------------------- */ |
| 33 | |
| 34 | /* Throw reader error. */ |
| 35 | static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em) |
| 36 | { |
| 37 | lua_State *L = ls->L; |
| 38 | const char *name = ls->chunkarg; |
| 39 | if (*name == BCDUMP_HEAD1) name = "(binary)" ; |
| 40 | else if (*name == '@' || *name == '=') name++; |
| 41 | lj_str_pushf(L, "%s: %s" , name, err2msg(em)); |
| 42 | lj_err_throw(L, LUA_ERRSYNTAX); |
| 43 | } |
| 44 | |
| 45 | /* Resize input buffer. */ |
| 46 | static void bcread_resize(LexState *ls, MSize len) |
| 47 | { |
| 48 | if (ls->sb.sz < len) { |
| 49 | MSize sz = ls->sb.sz * 2; |
| 50 | while (len > sz) sz = sz * 2; |
| 51 | lj_str_resizebuf(ls->L, &ls->sb, sz); |
| 52 | /* Caveat: this may change ls->sb.buf which may affect ls->p. */ |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | /* Refill buffer if needed. */ |
| 57 | static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) |
| 58 | { |
| 59 | lua_assert(len != 0); |
| 60 | if (len > LJ_MAX_MEM || ls->current < 0) |
| 61 | bcread_error(ls, LJ_ERR_BCBAD); |
| 62 | do { |
| 63 | const char *buf; |
| 64 | size_t size; |
| 65 | if (ls->n) { /* Copy remainder to buffer. */ |
| 66 | if (ls->sb.n) { /* Move down in buffer. */ |
| 67 | lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); |
| 68 | if (ls->n != ls->sb.n) |
| 69 | memmove(ls->sb.buf, ls->p, ls->n); |
| 70 | } else { /* Copy from buffer provided by reader. */ |
| 71 | bcread_resize(ls, len); |
| 72 | memcpy(ls->sb.buf, ls->p, ls->n); |
| 73 | } |
| 74 | ls->p = ls->sb.buf; |
| 75 | } |
| 76 | ls->sb.n = ls->n; |
| 77 | buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ |
| 78 | if (buf == NULL || size == 0) { /* EOF? */ |
| 79 | if (need) bcread_error(ls, LJ_ERR_BCBAD); |
| 80 | ls->current = -1; /* Only bad if we get called again. */ |
| 81 | break; |
| 82 | } |
| 83 | if (ls->sb.n) { /* Append to buffer. */ |
| 84 | MSize n = ls->sb.n + (MSize)size; |
| 85 | bcread_resize(ls, n < len ? len : n); |
| 86 | memcpy(ls->sb.buf + ls->sb.n, buf, size); |
| 87 | ls->n = ls->sb.n = n; |
| 88 | ls->p = ls->sb.buf; |
| 89 | } else { /* Return buffer provided by reader. */ |
| 90 | ls->n = (MSize)size; |
| 91 | ls->p = buf; |
| 92 | } |
| 93 | } while (ls->n < len); |
| 94 | } |
| 95 | |
| 96 | /* Need a certain number of bytes. */ |
| 97 | static LJ_AINLINE void bcread_need(LexState *ls, MSize len) |
| 98 | { |
| 99 | if (LJ_UNLIKELY(ls->n < len)) |
| 100 | bcread_fill(ls, len, 1); |
| 101 | } |
| 102 | |
| 103 | /* Want to read up to a certain number of bytes, but may need less. */ |
| 104 | static LJ_AINLINE void bcread_want(LexState *ls, MSize len) |
| 105 | { |
| 106 | if (LJ_UNLIKELY(ls->n < len)) |
| 107 | bcread_fill(ls, len, 0); |
| 108 | } |
| 109 | |
| 110 | #define bcread_dec(ls) check_exp(ls->n > 0, ls->n--) |
| 111 | #define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len)) |
| 112 | |
| 113 | /* Return memory block from buffer. */ |
| 114 | static uint8_t *bcread_mem(LexState *ls, MSize len) |
| 115 | { |
| 116 | uint8_t *p = (uint8_t *)ls->p; |
| 117 | bcread_consume(ls, len); |
| 118 | ls->p = (char *)p + len; |
| 119 | return p; |
| 120 | } |
| 121 | |
| 122 | /* Copy memory block from buffer. */ |
| 123 | static void bcread_block(LexState *ls, void *q, MSize len) |
| 124 | { |
| 125 | memcpy(q, bcread_mem(ls, len), len); |
| 126 | } |
| 127 | |
| 128 | /* Read byte from buffer. */ |
| 129 | static LJ_AINLINE uint32_t bcread_byte(LexState *ls) |
| 130 | { |
| 131 | bcread_dec(ls); |
| 132 | return (uint32_t)(uint8_t)*ls->p++; |
| 133 | } |
| 134 | |
| 135 | /* Read ULEB128 value from buffer. */ |
| 136 | static uint32_t bcread_uleb128(LexState *ls) |
| 137 | { |
| 138 | const uint8_t *p = (const uint8_t *)ls->p; |
| 139 | uint32_t v = *p++; |
| 140 | if (LJ_UNLIKELY(v >= 0x80)) { |
| 141 | int sh = 0; |
| 142 | v &= 0x7f; |
| 143 | do { |
| 144 | v |= ((*p & 0x7f) << (sh += 7)); |
| 145 | bcread_dec(ls); |
| 146 | } while (*p++ >= 0x80); |
| 147 | } |
| 148 | bcread_dec(ls); |
| 149 | ls->p = (char *)p; |
| 150 | return v; |
| 151 | } |
| 152 | |
| 153 | /* Read top 32 bits of 33 bit ULEB128 value from buffer. */ |
| 154 | static uint32_t bcread_uleb128_33(LexState *ls) |
| 155 | { |
| 156 | const uint8_t *p = (const uint8_t *)ls->p; |
| 157 | uint32_t v = (*p++ >> 1); |
| 158 | if (LJ_UNLIKELY(v >= 0x40)) { |
| 159 | int sh = -1; |
| 160 | v &= 0x3f; |
| 161 | do { |
| 162 | v |= ((*p & 0x7f) << (sh += 7)); |
| 163 | bcread_dec(ls); |
| 164 | } while (*p++ >= 0x80); |
| 165 | } |
| 166 | bcread_dec(ls); |
| 167 | ls->p = (char *)p; |
| 168 | return v; |
| 169 | } |
| 170 | |
| 171 | /* -- Bytecode reader ----------------------------------------------------- */ |
| 172 | |
| 173 | /* Read debug info of a prototype. */ |
| 174 | static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg) |
| 175 | { |
| 176 | void *lineinfo = (void *)proto_lineinfo(pt); |
| 177 | bcread_block(ls, lineinfo, sizedbg); |
| 178 | /* Swap lineinfo if the endianess differs. */ |
| 179 | if (bcread_swap(ls) && pt->numline >= 256) { |
| 180 | MSize i, n = pt->sizebc-1; |
| 181 | if (pt->numline < 65536) { |
| 182 | uint16_t *p = (uint16_t *)lineinfo; |
| 183 | for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8)); |
| 184 | } else { |
| 185 | uint32_t *p = (uint32_t *)lineinfo; |
| 186 | for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]); |
| 187 | } |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | /* Find pointer to varinfo. */ |
| 192 | static const void *bcread_varinfo(GCproto *pt) |
| 193 | { |
| 194 | const uint8_t *p = proto_uvinfo(pt); |
| 195 | MSize n = pt->sizeuv; |
| 196 | if (n) while (*p++ || --n) ; |
| 197 | return p; |
| 198 | } |
| 199 | |
| 200 | /* Read a single constant key/value of a template table. */ |
| 201 | static void bcread_ktabk(LexState *ls, TValue *o) |
| 202 | { |
| 203 | MSize tp = bcread_uleb128(ls); |
| 204 | if (tp >= BCDUMP_KTAB_STR) { |
| 205 | MSize len = tp - BCDUMP_KTAB_STR; |
| 206 | const char *p = (const char *)bcread_mem(ls, len); |
| 207 | setstrV(ls->L, o, lj_str_new(ls->L, p, len)); |
| 208 | } else if (tp == BCDUMP_KTAB_INT) { |
| 209 | setintV(o, (int32_t)bcread_uleb128(ls)); |
| 210 | } else if (tp == BCDUMP_KTAB_NUM) { |
| 211 | o->u32.lo = bcread_uleb128(ls); |
| 212 | o->u32.hi = bcread_uleb128(ls); |
| 213 | } else { |
| 214 | lua_assert(tp <= BCDUMP_KTAB_TRUE); |
| 215 | setitype(o, ~tp); |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | /* Read a template table. */ |
| 220 | static GCtab *bcread_ktab(LexState *ls) |
| 221 | { |
| 222 | MSize narray = bcread_uleb128(ls); |
| 223 | MSize nhash = bcread_uleb128(ls); |
| 224 | GCtab *t = lj_tab_new(ls->L, narray, hsize2hbits(nhash)); |
| 225 | if (narray) { /* Read array entries. */ |
| 226 | MSize i; |
| 227 | TValue *o = tvref(t->array); |
| 228 | for (i = 0; i < narray; i++, o++) |
| 229 | bcread_ktabk(ls, o); |
| 230 | } |
| 231 | if (nhash) { /* Read hash entries. */ |
| 232 | MSize i; |
| 233 | for (i = 0; i < nhash; i++) { |
| 234 | TValue key; |
| 235 | bcread_ktabk(ls, &key); |
| 236 | lua_assert(!tvisnil(&key)); |
| 237 | bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); |
| 238 | } |
| 239 | } |
| 240 | return t; |
| 241 | } |
| 242 | |
| 243 | /* Read GC constants of a prototype. */ |
| 244 | static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) |
| 245 | { |
| 246 | MSize i; |
| 247 | GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc; |
| 248 | for (i = 0; i < sizekgc; i++, kr++) { |
| 249 | MSize tp = bcread_uleb128(ls); |
| 250 | if (tp >= BCDUMP_KGC_STR) { |
| 251 | MSize len = tp - BCDUMP_KGC_STR; |
| 252 | const char *p = (const char *)bcread_mem(ls, len); |
| 253 | setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len))); |
| 254 | } else if (tp == BCDUMP_KGC_TAB) { |
| 255 | setgcref(*kr, obj2gco(bcread_ktab(ls))); |
| 256 | #if LJ_HASFFI |
| 257 | } else if (tp != BCDUMP_KGC_CHILD) { |
| 258 | CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE : |
| 259 | tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64; |
| 260 | CTSize sz = tp == BCDUMP_KGC_COMPLEX ? 16 : 8; |
| 261 | GCcdata *cd = lj_cdata_new_(ls->L, id, sz); |
| 262 | TValue *p = (TValue *)cdataptr(cd); |
| 263 | setgcref(*kr, obj2gco(cd)); |
| 264 | p[0].u32.lo = bcread_uleb128(ls); |
| 265 | p[0].u32.hi = bcread_uleb128(ls); |
| 266 | if (tp == BCDUMP_KGC_COMPLEX) { |
| 267 | p[1].u32.lo = bcread_uleb128(ls); |
| 268 | p[1].u32.hi = bcread_uleb128(ls); |
| 269 | } |
| 270 | #endif |
| 271 | } else { |
| 272 | lua_State *L = ls->L; |
| 273 | lua_assert(tp == BCDUMP_KGC_CHILD); |
| 274 | if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ |
| 275 | bcread_error(ls, LJ_ERR_BCBAD); |
| 276 | L->top--; |
| 277 | setgcref(*kr, obj2gco(protoV(L->top))); |
| 278 | } |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | /* Read number constants of a prototype. */ |
| 283 | static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn) |
| 284 | { |
| 285 | MSize i; |
| 286 | TValue *o = mref(pt->k, TValue); |
| 287 | for (i = 0; i < sizekn; i++, o++) { |
| 288 | int isnum = (ls->p[0] & 1); |
| 289 | uint32_t lo = bcread_uleb128_33(ls); |
| 290 | if (isnum) { |
| 291 | o->u32.lo = lo; |
| 292 | o->u32.hi = bcread_uleb128(ls); |
| 293 | } else { |
| 294 | setintV(o, lo); |
| 295 | } |
| 296 | } |
| 297 | } |
| 298 | |
| 299 | /* Read bytecode instructions. */ |
| 300 | static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc) |
| 301 | { |
| 302 | BCIns *bc = proto_bc(pt); |
| 303 | bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF, |
| 304 | pt->framesize, 0); |
| 305 | bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns)); |
| 306 | /* Swap bytecode instructions if the endianess differs. */ |
| 307 | if (bcread_swap(ls)) { |
| 308 | MSize i; |
| 309 | for (i = 1; i < sizebc; i++) bc[i] = lj_bswap(bc[i]); |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | /* Read upvalue refs. */ |
| 314 | static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv) |
| 315 | { |
| 316 | if (sizeuv) { |
| 317 | uint16_t *uv = proto_uv(pt); |
| 318 | bcread_block(ls, uv, sizeuv*2); |
| 319 | /* Swap upvalue refs if the endianess differs. */ |
| 320 | if (bcread_swap(ls)) { |
| 321 | MSize i; |
| 322 | for (i = 0; i < sizeuv; i++) |
| 323 | uv[i] = (uint16_t)((uv[i] >> 8)|(uv[i] << 8)); |
| 324 | } |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | /* Read a prototype. */ |
| 329 | static GCproto *bcread_proto(LexState *ls) |
| 330 | { |
| 331 | GCproto *pt; |
| 332 | MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; |
| 333 | MSize ofsk, ofsuv, ofsdbg; |
| 334 | MSize sizedbg = 0; |
| 335 | BCLine firstline = 0, numline = 0; |
| 336 | MSize len, startn; |
| 337 | |
| 338 | /* Read length. */ |
| 339 | if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */ |
| 340 | ls->n--; ls->p++; |
| 341 | return NULL; |
| 342 | } |
| 343 | bcread_want(ls, 5); |
| 344 | len = bcread_uleb128(ls); |
| 345 | if (!len) return NULL; /* EOF */ |
| 346 | bcread_need(ls, len); |
| 347 | startn = ls->n; |
| 348 | |
| 349 | /* Read prototype header. */ |
| 350 | flags = bcread_byte(ls); |
| 351 | numparams = bcread_byte(ls); |
| 352 | framesize = bcread_byte(ls); |
| 353 | sizeuv = bcread_byte(ls); |
| 354 | sizekgc = bcread_uleb128(ls); |
| 355 | sizekn = bcread_uleb128(ls); |
| 356 | sizebc = bcread_uleb128(ls) + 1; |
| 357 | if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) { |
| 358 | sizedbg = bcread_uleb128(ls); |
| 359 | if (sizedbg) { |
| 360 | firstline = bcread_uleb128(ls); |
| 361 | numline = bcread_uleb128(ls); |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | /* Calculate total size of prototype including all colocated arrays. */ |
| 366 | sizept = (MSize)sizeof(GCproto) + |
| 367 | sizebc*(MSize)sizeof(BCIns) + |
| 368 | sizekgc*(MSize)sizeof(GCRef); |
| 369 | sizept = (sizept + (MSize)sizeof(TValue)-1) & ~((MSize)sizeof(TValue)-1); |
| 370 | ofsk = sizept; sizept += sizekn*(MSize)sizeof(TValue); |
| 371 | ofsuv = sizept; sizept += ((sizeuv+1)&~1)*2; |
| 372 | ofsdbg = sizept; sizept += sizedbg; |
| 373 | |
| 374 | /* Allocate prototype object and initialize its fields. */ |
| 375 | pt = (GCproto *)lj_mem_newgco(ls->L, (MSize)sizept); |
| 376 | pt->gct = ~LJ_TPROTO; |
| 377 | pt->numparams = (uint8_t)numparams; |
| 378 | pt->framesize = (uint8_t)framesize; |
| 379 | pt->sizebc = sizebc; |
| 380 | setmref(pt->k, (char *)pt + ofsk); |
| 381 | setmref(pt->uv, (char *)pt + ofsuv); |
| 382 | pt->sizekgc = 0; /* Set to zero until fully initialized. */ |
| 383 | pt->sizekn = sizekn; |
| 384 | pt->sizept = sizept; |
| 385 | pt->sizeuv = (uint8_t)sizeuv; |
| 386 | pt->flags = (uint8_t)flags; |
| 387 | pt->trace = 0; |
| 388 | setgcref(pt->chunkname, obj2gco(ls->chunkname)); |
| 389 | |
| 390 | /* Close potentially uninitialized gap between bc and kgc. */ |
| 391 | *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(sizekgc+1)) = 0; |
| 392 | |
| 393 | /* Read bytecode instructions and upvalue refs. */ |
| 394 | bcread_bytecode(ls, pt, sizebc); |
| 395 | bcread_uv(ls, pt, sizeuv); |
| 396 | |
| 397 | /* Read constants. */ |
| 398 | bcread_kgc(ls, pt, sizekgc); |
| 399 | pt->sizekgc = sizekgc; |
| 400 | bcread_knum(ls, pt, sizekn); |
| 401 | |
| 402 | /* Read and initialize debug info. */ |
| 403 | pt->firstline = firstline; |
| 404 | pt->numline = numline; |
| 405 | if (sizedbg) { |
| 406 | MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2); |
| 407 | setmref(pt->lineinfo, (char *)pt + ofsdbg); |
| 408 | setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli); |
| 409 | bcread_dbg(ls, pt, sizedbg); |
| 410 | setmref(pt->varinfo, bcread_varinfo(pt)); |
| 411 | } else { |
| 412 | setmref(pt->lineinfo, NULL); |
| 413 | setmref(pt->uvinfo, NULL); |
| 414 | setmref(pt->varinfo, NULL); |
| 415 | } |
| 416 | |
| 417 | if (len != startn - ls->n) |
| 418 | bcread_error(ls, LJ_ERR_BCBAD); |
| 419 | return pt; |
| 420 | } |
| 421 | |
| 422 | /* Read and check header of bytecode dump. */ |
| 423 | static int (LexState *ls) |
| 424 | { |
| 425 | uint32_t flags; |
| 426 | bcread_want(ls, 3+5+5); |
| 427 | if (bcread_byte(ls) != BCDUMP_HEAD2 || |
| 428 | bcread_byte(ls) != BCDUMP_HEAD3 || |
| 429 | bcread_byte(ls) != BCDUMP_VERSION) return 0; |
| 430 | bcread_flags(ls) = flags = bcread_uleb128(ls); |
| 431 | if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; |
| 432 | if ((flags & BCDUMP_F_FFI)) { |
| 433 | #if LJ_HASFFI |
| 434 | lua_State *L = ls->L; |
| 435 | if (!ctype_ctsG(G(L))) { |
| 436 | ptrdiff_t oldtop = savestack(L, L->top); |
| 437 | luaopen_ffi(L); /* Load FFI library on-demand. */ |
| 438 | L->top = restorestack(L, oldtop); |
| 439 | } |
| 440 | #else |
| 441 | return 0; |
| 442 | #endif |
| 443 | } |
| 444 | if ((flags & BCDUMP_F_STRIP)) { |
| 445 | ls->chunkname = lj_str_newz(ls->L, ls->chunkarg); |
| 446 | } else { |
| 447 | MSize len = bcread_uleb128(ls); |
| 448 | bcread_need(ls, len); |
| 449 | ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len); |
| 450 | } |
| 451 | return 1; /* Ok. */ |
| 452 | } |
| 453 | |
| 454 | /* Read a bytecode dump. */ |
| 455 | GCproto *lj_bcread(LexState *ls) |
| 456 | { |
| 457 | lua_State *L = ls->L; |
| 458 | lua_assert(ls->current == BCDUMP_HEAD1); |
| 459 | bcread_savetop(L, ls, L->top); |
| 460 | lj_str_resetbuf(&ls->sb); |
| 461 | /* Check for a valid bytecode dump header. */ |
| 462 | if (!bcread_header(ls)) |
| 463 | bcread_error(ls, LJ_ERR_BCFMT); |
| 464 | for (;;) { /* Process all prototypes in the bytecode dump. */ |
| 465 | GCproto *pt = bcread_proto(ls); |
| 466 | if (!pt) break; |
| 467 | setprotoV(L, L->top, pt); |
| 468 | incr_top(L); |
| 469 | } |
| 470 | if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls)) |
| 471 | bcread_error(ls, LJ_ERR_BCBAD); |
| 472 | /* Pop off last prototype. */ |
| 473 | L->top--; |
| 474 | return protoV(L->top); |
| 475 | } |
| 476 | |
| 477 | |