| 1 | /* |
| 2 | ** Bytecode reader. |
| 3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h |
| 4 | */ |
| 5 | |
| 6 | #define lj_bcread_c |
| 7 | #define LUA_CORE |
| 8 | |
| 9 | #include "lj_obj.h" |
| 10 | #include "lj_gc.h" |
| 11 | #include "lj_err.h" |
| 12 | #include "lj_buf.h" |
| 13 | #include "lj_str.h" |
| 14 | #include "lj_tab.h" |
| 15 | #include "lj_bc.h" |
| 16 | #if LJ_HASFFI |
| 17 | #include "lj_ctype.h" |
| 18 | #include "lj_cdata.h" |
| 19 | #include "lualib.h" |
| 20 | #endif |
| 21 | #include "lj_lex.h" |
| 22 | #include "lj_bcdump.h" |
| 23 | #include "lj_state.h" |
| 24 | #include "lj_strfmt.h" |
| 25 | |
| 26 | /* Reuse some lexer fields for our own purposes. */ |
| 27 | #define bcread_flags(ls) ls->level |
| 28 | #define bcread_swap(ls) \ |
| 29 | ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE) |
| 30 | #define bcread_oldtop(L, ls) restorestack(L, ls->lastline) |
| 31 | #define bcread_savetop(L, ls, top) \ |
| 32 | ls->lastline = (BCLine)savestack(L, (top)) |
| 33 | |
| 34 | /* -- Input buffer handling ----------------------------------------------- */ |
| 35 | |
| 36 | /* Throw reader error. */ |
| 37 | static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em) |
| 38 | { |
| 39 | lua_State *L = ls->L; |
| 40 | const char *name = ls->chunkarg; |
| 41 | if (*name == BCDUMP_HEAD1) name = "(binary)" ; |
| 42 | else if (*name == '@' || *name == '=') name++; |
| 43 | lj_strfmt_pushf(L, "%s: %s" , name, err2msg(em)); |
| 44 | lj_err_throw(L, LUA_ERRSYNTAX); |
| 45 | } |
| 46 | |
| 47 | /* Refill buffer. */ |
| 48 | static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) |
| 49 | { |
| 50 | lj_assertLS(len != 0, "empty refill" ); |
| 51 | if (len > LJ_MAX_BUF || ls->c < 0) |
| 52 | bcread_error(ls, LJ_ERR_BCBAD); |
| 53 | do { |
| 54 | const char *buf; |
| 55 | size_t sz; |
| 56 | char *p = sbufB(&ls->sb); |
| 57 | MSize n = (MSize)(ls->pe - ls->p); |
| 58 | if (n) { /* Copy remainder to buffer. */ |
| 59 | if (sbuflen(&ls->sb)) { /* Move down in buffer. */ |
| 60 | lj_assertLS(ls->pe == sbufP(&ls->sb), "bad buffer pointer" ); |
| 61 | if (ls->p != p) memmove(p, ls->p, n); |
| 62 | } else { /* Copy from buffer provided by reader. */ |
| 63 | p = lj_buf_need(&ls->sb, len); |
| 64 | memcpy(p, ls->p, n); |
| 65 | } |
| 66 | ls->p = p; |
| 67 | ls->pe = p + n; |
| 68 | } |
| 69 | setsbufP(&ls->sb, p + n); |
| 70 | buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */ |
| 71 | if (buf == NULL || sz == 0) { /* EOF? */ |
| 72 | if (need) bcread_error(ls, LJ_ERR_BCBAD); |
| 73 | ls->c = -1; /* Only bad if we get called again. */ |
| 74 | break; |
| 75 | } |
| 76 | if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L); |
| 77 | if (n) { /* Append to buffer. */ |
| 78 | n += (MSize)sz; |
| 79 | p = lj_buf_need(&ls->sb, n < len ? len : n); |
| 80 | memcpy(sbufP(&ls->sb), buf, sz); |
| 81 | setsbufP(&ls->sb, p + n); |
| 82 | ls->p = p; |
| 83 | ls->pe = p + n; |
| 84 | } else { /* Return buffer provided by reader. */ |
| 85 | ls->p = buf; |
| 86 | ls->pe = buf + sz; |
| 87 | } |
| 88 | } while ((MSize)(ls->pe - ls->p) < len); |
| 89 | } |
| 90 | |
| 91 | /* Need a certain number of bytes. */ |
| 92 | static LJ_AINLINE void bcread_need(LexState *ls, MSize len) |
| 93 | { |
| 94 | if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) |
| 95 | bcread_fill(ls, len, 1); |
| 96 | } |
| 97 | |
| 98 | /* Want to read up to a certain number of bytes, but may need less. */ |
| 99 | static LJ_AINLINE void bcread_want(LexState *ls, MSize len) |
| 100 | { |
| 101 | if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) |
| 102 | bcread_fill(ls, len, 0); |
| 103 | } |
| 104 | |
| 105 | /* Return memory block from buffer. */ |
| 106 | static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len) |
| 107 | { |
| 108 | uint8_t *p = (uint8_t *)ls->p; |
| 109 | ls->p += len; |
| 110 | lj_assertLS(ls->p <= ls->pe, "buffer read overflow" ); |
| 111 | return p; |
| 112 | } |
| 113 | |
| 114 | /* Copy memory block from buffer. */ |
| 115 | static void bcread_block(LexState *ls, void *q, MSize len) |
| 116 | { |
| 117 | memcpy(q, bcread_mem(ls, len), len); |
| 118 | } |
| 119 | |
| 120 | /* Read byte from buffer. */ |
| 121 | static LJ_AINLINE uint32_t bcread_byte(LexState *ls) |
| 122 | { |
| 123 | lj_assertLS(ls->p < ls->pe, "buffer read overflow" ); |
| 124 | return (uint32_t)(uint8_t)*ls->p++; |
| 125 | } |
| 126 | |
| 127 | /* Read ULEB128 value from buffer. */ |
| 128 | static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls) |
| 129 | { |
| 130 | uint32_t v = lj_buf_ruleb128(&ls->p); |
| 131 | lj_assertLS(ls->p <= ls->pe, "buffer read overflow" ); |
| 132 | return v; |
| 133 | } |
| 134 | |
| 135 | /* Read top 32 bits of 33 bit ULEB128 value from buffer. */ |
| 136 | static uint32_t bcread_uleb128_33(LexState *ls) |
| 137 | { |
| 138 | const uint8_t *p = (const uint8_t *)ls->p; |
| 139 | uint32_t v = (*p++ >> 1); |
| 140 | if (LJ_UNLIKELY(v >= 0x40)) { |
| 141 | int sh = -1; |
| 142 | v &= 0x3f; |
| 143 | do { |
| 144 | v |= ((*p & 0x7f) << (sh += 7)); |
| 145 | } while (*p++ >= 0x80); |
| 146 | } |
| 147 | ls->p = (char *)p; |
| 148 | lj_assertLS(ls->p <= ls->pe, "buffer read overflow" ); |
| 149 | return v; |
| 150 | } |
| 151 | |
| 152 | /* -- Bytecode reader ----------------------------------------------------- */ |
| 153 | |
| 154 | /* Read debug info of a prototype. */ |
| 155 | static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg) |
| 156 | { |
| 157 | void *lineinfo = (void *)proto_lineinfo(pt); |
| 158 | bcread_block(ls, lineinfo, sizedbg); |
| 159 | /* Swap lineinfo if the endianess differs. */ |
| 160 | if (bcread_swap(ls) && pt->numline >= 256) { |
| 161 | MSize i, n = pt->sizebc-1; |
| 162 | if (pt->numline < 65536) { |
| 163 | uint16_t *p = (uint16_t *)lineinfo; |
| 164 | for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8)); |
| 165 | } else { |
| 166 | uint32_t *p = (uint32_t *)lineinfo; |
| 167 | for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]); |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | /* Find pointer to varinfo. */ |
| 173 | static const void *bcread_varinfo(GCproto *pt) |
| 174 | { |
| 175 | const uint8_t *p = proto_uvinfo(pt); |
| 176 | MSize n = pt->sizeuv; |
| 177 | if (n) while (*p++ || --n) ; |
| 178 | return p; |
| 179 | } |
| 180 | |
| 181 | /* Read a single constant key/value of a template table. */ |
| 182 | static void bcread_ktabk(LexState *ls, TValue *o) |
| 183 | { |
| 184 | MSize tp = bcread_uleb128(ls); |
| 185 | if (tp >= BCDUMP_KTAB_STR) { |
| 186 | MSize len = tp - BCDUMP_KTAB_STR; |
| 187 | const char *p = (const char *)bcread_mem(ls, len); |
| 188 | setstrV(ls->L, o, lj_str_new(ls->L, p, len)); |
| 189 | } else if (tp == BCDUMP_KTAB_INT) { |
| 190 | setintV(o, (int32_t)bcread_uleb128(ls)); |
| 191 | } else if (tp == BCDUMP_KTAB_NUM) { |
| 192 | o->u32.lo = bcread_uleb128(ls); |
| 193 | o->u32.hi = bcread_uleb128(ls); |
| 194 | } else { |
| 195 | lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d" , tp); |
| 196 | setpriV(o, ~tp); |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | /* Read a template table. */ |
| 201 | static GCtab *bcread_ktab(LexState *ls) |
| 202 | { |
| 203 | MSize narray = bcread_uleb128(ls); |
| 204 | MSize nhash = bcread_uleb128(ls); |
| 205 | GCtab *t = lj_tab_new(ls->L, narray, hsize2hbits(nhash)); |
| 206 | if (narray) { /* Read array entries. */ |
| 207 | MSize i; |
| 208 | TValue *o = tvref(t->array); |
| 209 | for (i = 0; i < narray; i++, o++) |
| 210 | bcread_ktabk(ls, o); |
| 211 | } |
| 212 | if (nhash) { /* Read hash entries. */ |
| 213 | MSize i; |
| 214 | for (i = 0; i < nhash; i++) { |
| 215 | TValue key; |
| 216 | bcread_ktabk(ls, &key); |
| 217 | lj_assertLS(!tvisnil(&key), "nil key" ); |
| 218 | bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); |
| 219 | } |
| 220 | } |
| 221 | return t; |
| 222 | } |
| 223 | |
| 224 | /* Read GC constants of a prototype. */ |
| 225 | static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) |
| 226 | { |
| 227 | MSize i; |
| 228 | GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc; |
| 229 | for (i = 0; i < sizekgc; i++, kr++) { |
| 230 | MSize tp = bcread_uleb128(ls); |
| 231 | if (tp >= BCDUMP_KGC_STR) { |
| 232 | MSize len = tp - BCDUMP_KGC_STR; |
| 233 | const char *p = (const char *)bcread_mem(ls, len); |
| 234 | setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len))); |
| 235 | } else if (tp == BCDUMP_KGC_TAB) { |
| 236 | setgcref(*kr, obj2gco(bcread_ktab(ls))); |
| 237 | #if LJ_HASFFI |
| 238 | } else if (tp != BCDUMP_KGC_CHILD) { |
| 239 | CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE : |
| 240 | tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64; |
| 241 | CTSize sz = tp == BCDUMP_KGC_COMPLEX ? 16 : 8; |
| 242 | GCcdata *cd = lj_cdata_new_(ls->L, id, sz); |
| 243 | TValue *p = (TValue *)cdataptr(cd); |
| 244 | setgcref(*kr, obj2gco(cd)); |
| 245 | p[0].u32.lo = bcread_uleb128(ls); |
| 246 | p[0].u32.hi = bcread_uleb128(ls); |
| 247 | if (tp == BCDUMP_KGC_COMPLEX) { |
| 248 | p[1].u32.lo = bcread_uleb128(ls); |
| 249 | p[1].u32.hi = bcread_uleb128(ls); |
| 250 | } |
| 251 | #endif |
| 252 | } else { |
| 253 | lua_State *L = ls->L; |
| 254 | lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d" , tp); |
| 255 | if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ |
| 256 | bcread_error(ls, LJ_ERR_BCBAD); |
| 257 | L->top--; |
| 258 | setgcref(*kr, obj2gco(protoV(L->top))); |
| 259 | } |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | /* Read number constants of a prototype. */ |
| 264 | static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn) |
| 265 | { |
| 266 | MSize i; |
| 267 | TValue *o = mref(pt->k, TValue); |
| 268 | for (i = 0; i < sizekn; i++, o++) { |
| 269 | int isnum = (ls->p[0] & 1); |
| 270 | uint32_t lo = bcread_uleb128_33(ls); |
| 271 | if (isnum) { |
| 272 | o->u32.lo = lo; |
| 273 | o->u32.hi = bcread_uleb128(ls); |
| 274 | } else { |
| 275 | setintV(o, lo); |
| 276 | } |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | /* Read bytecode instructions. */ |
| 281 | static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc) |
| 282 | { |
| 283 | BCIns *bc = proto_bc(pt); |
| 284 | bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF, |
| 285 | pt->framesize, 0); |
| 286 | bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns)); |
| 287 | /* Swap bytecode instructions if the endianess differs. */ |
| 288 | if (bcread_swap(ls)) { |
| 289 | MSize i; |
| 290 | for (i = 1; i < sizebc; i++) bc[i] = lj_bswap(bc[i]); |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | /* Read upvalue refs. */ |
| 295 | static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv) |
| 296 | { |
| 297 | if (sizeuv) { |
| 298 | uint16_t *uv = proto_uv(pt); |
| 299 | bcread_block(ls, uv, sizeuv*2); |
| 300 | /* Swap upvalue refs if the endianess differs. */ |
| 301 | if (bcread_swap(ls)) { |
| 302 | MSize i; |
| 303 | for (i = 0; i < sizeuv; i++) |
| 304 | uv[i] = (uint16_t)((uv[i] >> 8)|(uv[i] << 8)); |
| 305 | } |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | /* Read a prototype. */ |
| 310 | GCproto *lj_bcread_proto(LexState *ls) |
| 311 | { |
| 312 | GCproto *pt; |
| 313 | MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; |
| 314 | MSize ofsk, ofsuv, ofsdbg; |
| 315 | MSize sizedbg = 0; |
| 316 | BCLine firstline = 0, numline = 0; |
| 317 | |
| 318 | /* Read prototype header. */ |
| 319 | flags = bcread_byte(ls); |
| 320 | numparams = bcread_byte(ls); |
| 321 | framesize = bcread_byte(ls); |
| 322 | sizeuv = bcread_byte(ls); |
| 323 | sizekgc = bcread_uleb128(ls); |
| 324 | sizekn = bcread_uleb128(ls); |
| 325 | sizebc = bcread_uleb128(ls) + 1; |
| 326 | if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) { |
| 327 | sizedbg = bcread_uleb128(ls); |
| 328 | if (sizedbg) { |
| 329 | firstline = bcread_uleb128(ls); |
| 330 | numline = bcread_uleb128(ls); |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | /* Calculate total size of prototype including all colocated arrays. */ |
| 335 | sizept = (MSize)sizeof(GCproto) + |
| 336 | sizebc*(MSize)sizeof(BCIns) + |
| 337 | sizekgc*(MSize)sizeof(GCRef); |
| 338 | sizept = (sizept + (MSize)sizeof(TValue)-1) & ~((MSize)sizeof(TValue)-1); |
| 339 | ofsk = sizept; sizept += sizekn*(MSize)sizeof(TValue); |
| 340 | ofsuv = sizept; sizept += ((sizeuv+1)&~1)*2; |
| 341 | ofsdbg = sizept; sizept += sizedbg; |
| 342 | |
| 343 | /* Allocate prototype object and initialize its fields. */ |
| 344 | pt = (GCproto *)lj_mem_newgco(ls->L, (MSize)sizept); |
| 345 | pt->gct = ~LJ_TPROTO; |
| 346 | pt->numparams = (uint8_t)numparams; |
| 347 | pt->framesize = (uint8_t)framesize; |
| 348 | pt->sizebc = sizebc; |
| 349 | setmref(pt->k, (char *)pt + ofsk); |
| 350 | setmref(pt->uv, (char *)pt + ofsuv); |
| 351 | pt->sizekgc = 0; /* Set to zero until fully initialized. */ |
| 352 | pt->sizekn = sizekn; |
| 353 | pt->sizept = sizept; |
| 354 | pt->sizeuv = (uint8_t)sizeuv; |
| 355 | pt->flags = (uint8_t)flags; |
| 356 | pt->trace = 0; |
| 357 | setgcref(pt->chunkname, obj2gco(ls->chunkname)); |
| 358 | |
| 359 | /* Close potentially uninitialized gap between bc and kgc. */ |
| 360 | *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(sizekgc+1)) = 0; |
| 361 | |
| 362 | /* Read bytecode instructions and upvalue refs. */ |
| 363 | bcread_bytecode(ls, pt, sizebc); |
| 364 | bcread_uv(ls, pt, sizeuv); |
| 365 | |
| 366 | /* Read constants. */ |
| 367 | bcread_kgc(ls, pt, sizekgc); |
| 368 | pt->sizekgc = sizekgc; |
| 369 | bcread_knum(ls, pt, sizekn); |
| 370 | |
| 371 | /* Read and initialize debug info. */ |
| 372 | pt->firstline = firstline; |
| 373 | pt->numline = numline; |
| 374 | if (sizedbg) { |
| 375 | MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2); |
| 376 | setmref(pt->lineinfo, (char *)pt + ofsdbg); |
| 377 | setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli); |
| 378 | bcread_dbg(ls, pt, sizedbg); |
| 379 | setmref(pt->varinfo, bcread_varinfo(pt)); |
| 380 | } else { |
| 381 | setmref(pt->lineinfo, NULL); |
| 382 | setmref(pt->uvinfo, NULL); |
| 383 | setmref(pt->varinfo, NULL); |
| 384 | } |
| 385 | return pt; |
| 386 | } |
| 387 | |
| 388 | /* Read and check header of bytecode dump. */ |
| 389 | static int (LexState *ls) |
| 390 | { |
| 391 | uint32_t flags; |
| 392 | bcread_want(ls, 3+5+5); |
| 393 | if (bcread_byte(ls) != BCDUMP_HEAD2 || |
| 394 | bcread_byte(ls) != BCDUMP_HEAD3 || |
| 395 | bcread_byte(ls) != BCDUMP_VERSION) return 0; |
| 396 | bcread_flags(ls) = flags = bcread_uleb128(ls); |
| 397 | if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; |
| 398 | if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0; |
| 399 | if ((flags & BCDUMP_F_FFI)) { |
| 400 | #if LJ_HASFFI |
| 401 | lua_State *L = ls->L; |
| 402 | if (!ctype_ctsG(G(L))) { |
| 403 | ptrdiff_t oldtop = savestack(L, L->top); |
| 404 | luaopen_ffi(L); /* Load FFI library on-demand. */ |
| 405 | L->top = restorestack(L, oldtop); |
| 406 | } |
| 407 | #else |
| 408 | return 0; |
| 409 | #endif |
| 410 | } |
| 411 | if ((flags & BCDUMP_F_STRIP)) { |
| 412 | ls->chunkname = lj_str_newz(ls->L, ls->chunkarg); |
| 413 | } else { |
| 414 | MSize len = bcread_uleb128(ls); |
| 415 | bcread_need(ls, len); |
| 416 | ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len); |
| 417 | } |
| 418 | return 1; /* Ok. */ |
| 419 | } |
| 420 | |
| 421 | /* Read a bytecode dump. */ |
| 422 | GCproto *lj_bcread(LexState *ls) |
| 423 | { |
| 424 | lua_State *L = ls->L; |
| 425 | lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header" ); |
| 426 | bcread_savetop(L, ls, L->top); |
| 427 | lj_buf_reset(&ls->sb); |
| 428 | /* Check for a valid bytecode dump header. */ |
| 429 | if (!bcread_header(ls)) |
| 430 | bcread_error(ls, LJ_ERR_BCFMT); |
| 431 | for (;;) { /* Process all prototypes in the bytecode dump. */ |
| 432 | GCproto *pt; |
| 433 | MSize len; |
| 434 | const char *startp; |
| 435 | /* Read length. */ |
| 436 | if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */ |
| 437 | ls->p++; |
| 438 | break; |
| 439 | } |
| 440 | bcread_want(ls, 5); |
| 441 | len = bcread_uleb128(ls); |
| 442 | if (!len) break; /* EOF */ |
| 443 | bcread_need(ls, len); |
| 444 | startp = ls->p; |
| 445 | pt = lj_bcread_proto(ls); |
| 446 | if (ls->p != startp + len) |
| 447 | bcread_error(ls, LJ_ERR_BCBAD); |
| 448 | setprotoV(L, L->top, pt); |
| 449 | incr_top(L); |
| 450 | } |
| 451 | if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) |
| 452 | bcread_error(ls, LJ_ERR_BCBAD); |
| 453 | /* Pop off last prototype. */ |
| 454 | L->top--; |
| 455 | return protoV(L->top); |
| 456 | } |
| 457 | |
| 458 | |