| 1 | /* |
| 2 | ** LuaJIT VM builder. |
| 3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h |
| 4 | ** |
| 5 | ** This is a tool to build the hand-tuned assembler code required for |
| 6 | ** LuaJIT's bytecode interpreter. It supports a variety of output formats |
| 7 | ** to feed different toolchains (see usage() below). |
| 8 | ** |
| 9 | ** This tool is not particularly optimized because it's only used while |
| 10 | ** _building_ LuaJIT. There's no point in distributing or installing it. |
| 11 | ** Only the object code generated by this tool is linked into LuaJIT. |
| 12 | ** |
| 13 | ** Caveat: some memory is not free'd, error handling is lazy. |
| 14 | ** It's a one-shot tool -- any effort fixing this would be wasted. |
| 15 | */ |
| 16 | |
| 17 | #include "buildvm.h" |
| 18 | #include "lj_obj.h" |
| 19 | #include "lj_gc.h" |
| 20 | #include "lj_bc.h" |
| 21 | #include "lj_ir.h" |
| 22 | #include "lj_ircall.h" |
| 23 | #include "lj_frame.h" |
| 24 | #include "lj_dispatch.h" |
| 25 | #if LJ_HASFFI |
| 26 | #include "lj_ctype.h" |
| 27 | #include "lj_ccall.h" |
| 28 | #endif |
| 29 | #include "luajit.h" |
| 30 | |
| 31 | #if defined(_WIN32) |
| 32 | #include <fcntl.h> |
| 33 | #include <io.h> |
| 34 | #endif |
| 35 | |
| 36 | /* ------------------------------------------------------------------------ */ |
| 37 | |
| 38 | /* DynASM glue definitions. */ |
| 39 | #define Dst ctx |
| 40 | #define Dst_DECL BuildCtx *ctx |
| 41 | #define Dst_REF (ctx->D) |
| 42 | #define DASM_CHECKS 1 |
| 43 | |
| 44 | #include "../dynasm/dasm_proto.h" |
| 45 | |
| 46 | /* Glue macros for DynASM. */ |
| 47 | static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); |
| 48 | |
| 49 | #define DASM_EXTERN(ctx, addr, idx, type) \ |
| 50 | collect_reloc(ctx, addr, idx, type) |
| 51 | |
| 52 | /* ------------------------------------------------------------------------ */ |
| 53 | |
| 54 | /* Avoid trouble if cross-compiling for an x86 target. Speed doesn't matter. */ |
| 55 | #define DASM_ALIGNED_WRITES 1 |
| 56 | |
| 57 | /* Embed architecture-specific DynASM encoder. */ |
| 58 | #if LJ_TARGET_X86ORX64 |
| 59 | #include "../dynasm/dasm_x86.h" |
| 60 | #elif LJ_TARGET_ARM |
| 61 | #include "../dynasm/dasm_arm.h" |
| 62 | #elif LJ_TARGET_ARM64 |
| 63 | #include "../dynasm/dasm_arm64.h" |
| 64 | #elif LJ_TARGET_PPC |
| 65 | #include "../dynasm/dasm_ppc.h" |
| 66 | #elif LJ_TARGET_MIPS |
| 67 | #include "../dynasm/dasm_mips.h" |
| 68 | #else |
| 69 | #error "No support for this architecture (yet)" |
| 70 | #endif |
| 71 | |
| 72 | /* Embed generated architecture-specific backend. */ |
| 73 | #include "buildvm_arch.h" |
| 74 | |
| 75 | /* ------------------------------------------------------------------------ */ |
| 76 | |
| 77 | void owrite(BuildCtx *ctx, const void *ptr, size_t sz) |
| 78 | { |
| 79 | if (fwrite(ptr, 1, sz, ctx->fp) != sz) { |
| 80 | fprintf(stderr, "Error: cannot write to output file: %s\n" , |
| 81 | strerror(errno)); |
| 82 | exit(1); |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | /* ------------------------------------------------------------------------ */ |
| 87 | |
| 88 | /* Emit code as raw bytes. Only used for DynASM debugging. */ |
| 89 | static void emit_raw(BuildCtx *ctx) |
| 90 | { |
| 91 | owrite(ctx, ctx->code, ctx->codesz); |
| 92 | } |
| 93 | |
| 94 | /* -- Build machine code -------------------------------------------------- */ |
| 95 | |
| 96 | static const char *sym_decorate(BuildCtx *ctx, |
| 97 | const char *prefix, const char *suffix) |
| 98 | { |
| 99 | char name[256]; |
| 100 | char *p; |
| 101 | #if LJ_64 |
| 102 | const char *symprefix = ctx->mode == BUILD_machasm ? "_" : "" ; |
| 103 | #elif LJ_TARGET_XBOX360 |
| 104 | const char *symprefix = "" ; |
| 105 | #else |
| 106 | const char *symprefix = ctx->mode != BUILD_elfasm ? "_" : "" ; |
| 107 | #endif |
| 108 | sprintf(name, "%s%s%s" , symprefix, prefix, suffix); |
| 109 | p = strchr(name, '@'); |
| 110 | if (p) { |
| 111 | #if LJ_TARGET_X86ORX64 |
| 112 | if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) |
| 113 | name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ |
| 114 | else |
| 115 | *p = '\0'; |
| 116 | #elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE |
| 117 | /* Keep @plt etc. */ |
| 118 | #else |
| 119 | *p = '\0'; |
| 120 | #endif |
| 121 | } |
| 122 | p = (char *)malloc(strlen(name)+1); /* MSVC doesn't like strdup. */ |
| 123 | strcpy(p, name); |
| 124 | return p; |
| 125 | } |
| 126 | |
| 127 | #define NRELOCSYM (sizeof(extnames)/sizeof(extnames[0])-1) |
| 128 | |
| 129 | static int relocmap[NRELOCSYM]; |
| 130 | |
| 131 | /* Collect external relocations. */ |
| 132 | static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type) |
| 133 | { |
| 134 | if (ctx->nreloc >= BUILD_MAX_RELOC) { |
| 135 | fprintf(stderr, "Error: too many relocations, increase BUILD_MAX_RELOC.\n" ); |
| 136 | exit(1); |
| 137 | } |
| 138 | if (relocmap[idx] < 0) { |
| 139 | relocmap[idx] = ctx->nrelocsym; |
| 140 | ctx->relocsym[ctx->nrelocsym] = sym_decorate(ctx, "" , extnames[idx]); |
| 141 | ctx->nrelocsym++; |
| 142 | } |
| 143 | ctx->reloc[ctx->nreloc].ofs = (int32_t)(addr - ctx->code); |
| 144 | ctx->reloc[ctx->nreloc].sym = relocmap[idx]; |
| 145 | ctx->reloc[ctx->nreloc].type = type; |
| 146 | ctx->nreloc++; |
| 147 | #if LJ_TARGET_XBOX360 |
| 148 | return (int)(ctx->code - addr) + 4; /* Encode symbol offset of .text. */ |
| 149 | #else |
| 150 | return 0; /* Encode symbol offset of 0. */ |
| 151 | #endif |
| 152 | } |
| 153 | |
| 154 | /* Naive insertion sort. Performance doesn't matter here. */ |
| 155 | static void sym_insert(BuildCtx *ctx, int32_t ofs, |
| 156 | const char *prefix, const char *suffix) |
| 157 | { |
| 158 | ptrdiff_t i = ctx->nsym++; |
| 159 | while (i > 0) { |
| 160 | if (ctx->sym[i-1].ofs <= ofs) |
| 161 | break; |
| 162 | ctx->sym[i] = ctx->sym[i-1]; |
| 163 | i--; |
| 164 | } |
| 165 | ctx->sym[i].ofs = ofs; |
| 166 | ctx->sym[i].name = sym_decorate(ctx, prefix, suffix); |
| 167 | } |
| 168 | |
| 169 | /* Build the machine code. */ |
| 170 | static int build_code(BuildCtx *ctx) |
| 171 | { |
| 172 | int status; |
| 173 | int i; |
| 174 | |
| 175 | /* Initialize DynASM structures. */ |
| 176 | ctx->nglob = GLOB__MAX; |
| 177 | ctx->glob = (void **)malloc(ctx->nglob*sizeof(void *)); |
| 178 | memset(ctx->glob, 0, ctx->nglob*sizeof(void *)); |
| 179 | ctx->nreloc = 0; |
| 180 | |
| 181 | ctx->globnames = globnames; |
| 182 | ctx->extnames = extnames; |
| 183 | ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); |
| 184 | ctx->nrelocsym = 0; |
| 185 | for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; |
| 186 | |
| 187 | ctx->dasm_ident = DASM_IDENT; |
| 188 | ctx->dasm_arch = DASM_ARCH; |
| 189 | |
| 190 | dasm_init(Dst, DASM_MAXSECTION); |
| 191 | dasm_setupglobal(Dst, ctx->glob, ctx->nglob); |
| 192 | dasm_setup(Dst, build_actionlist); |
| 193 | |
| 194 | /* Call arch-specific backend to emit the code. */ |
| 195 | ctx->npc = build_backend(ctx); |
| 196 | |
| 197 | /* Finalize the code. */ |
| 198 | (void)dasm_checkstep(Dst, -1); |
| 199 | if ((status = dasm_link(Dst, &ctx->codesz))) return status; |
| 200 | ctx->code = (uint8_t *)malloc(ctx->codesz); |
| 201 | if ((status = dasm_encode(Dst, (void *)ctx->code))) return status; |
| 202 | |
| 203 | /* Allocate symbol table and bytecode offsets. */ |
| 204 | ctx->beginsym = sym_decorate(ctx, "" , LABEL_PREFIX "vm_asm_begin" ); |
| 205 | ctx->sym = (BuildSym *)malloc((ctx->npc+ctx->nglob+1)*sizeof(BuildSym)); |
| 206 | ctx->nsym = 0; |
| 207 | ctx->bc_ofs = (int32_t *)malloc(ctx->npc*sizeof(int32_t)); |
| 208 | |
| 209 | /* Collect the opcodes (PC labels). */ |
| 210 | for (i = 0; i < ctx->npc; i++) { |
| 211 | int32_t ofs = dasm_getpclabel(Dst, i); |
| 212 | if (ofs < 0) return 0x22000000|i; |
| 213 | ctx->bc_ofs[i] = ofs; |
| 214 | if ((LJ_HASJIT || |
| 215 | !(i == BC_JFORI || i == BC_JFORL || i == BC_JITERL || i == BC_JLOOP || |
| 216 | i == BC_IFORL || i == BC_IITERL || i == BC_ILOOP)) && |
| 217 | (LJ_HASFFI || i != BC_KCDATA)) |
| 218 | sym_insert(ctx, ofs, LABEL_PREFIX_BC, bc_names[i]); |
| 219 | } |
| 220 | |
| 221 | /* Collect the globals (named labels). */ |
| 222 | for (i = 0; i < ctx->nglob; i++) { |
| 223 | const char *gl = globnames[i]; |
| 224 | int len = (int)strlen(gl); |
| 225 | if (!ctx->glob[i]) { |
| 226 | fprintf(stderr, "Error: undefined global %s\n" , gl); |
| 227 | exit(2); |
| 228 | } |
| 229 | /* Skip the _Z symbols. */ |
| 230 | if (!(len >= 2 && gl[len-2] == '_' && gl[len-1] == 'Z')) |
| 231 | sym_insert(ctx, (int32_t)((uint8_t *)(ctx->glob[i]) - ctx->code), |
| 232 | LABEL_PREFIX, globnames[i]); |
| 233 | } |
| 234 | |
| 235 | /* Close the address range. */ |
| 236 | sym_insert(ctx, (int32_t)ctx->codesz, "" , "" ); |
| 237 | ctx->nsym--; |
| 238 | |
| 239 | dasm_free(Dst); |
| 240 | |
| 241 | return 0; |
| 242 | } |
| 243 | |
| 244 | /* -- Generate VM enums --------------------------------------------------- */ |
| 245 | |
| 246 | const char *const bc_names[] = { |
| 247 | #define BCNAME(name, ma, mb, mc, mt) #name, |
| 248 | BCDEF(BCNAME) |
| 249 | #undef BCNAME |
| 250 | NULL |
| 251 | }; |
| 252 | |
| 253 | const char *const ir_names[] = { |
| 254 | #define IRNAME(name, m, m1, m2) #name, |
| 255 | IRDEF(IRNAME) |
| 256 | #undef IRNAME |
| 257 | NULL |
| 258 | }; |
| 259 | |
| 260 | const char *const irt_names[] = { |
| 261 | #define IRTNAME(name, size) #name, |
| 262 | IRTDEF(IRTNAME) |
| 263 | #undef IRTNAME |
| 264 | NULL |
| 265 | }; |
| 266 | |
| 267 | const char *const irfpm_names[] = { |
| 268 | #define FPMNAME(name) #name, |
| 269 | IRFPMDEF(FPMNAME) |
| 270 | #undef FPMNAME |
| 271 | NULL |
| 272 | }; |
| 273 | |
| 274 | const char *const irfield_names[] = { |
| 275 | #define FLNAME(name, ofs) #name, |
| 276 | IRFLDEF(FLNAME) |
| 277 | #undef FLNAME |
| 278 | NULL |
| 279 | }; |
| 280 | |
| 281 | const char *const ircall_names[] = { |
| 282 | #define IRCALLNAME(cond, name, nargs, kind, type, flags) #name, |
| 283 | IRCALLDEF(IRCALLNAME) |
| 284 | #undef IRCALLNAME |
| 285 | NULL |
| 286 | }; |
| 287 | |
| 288 | static const char *const trace_errors[] = { |
| 289 | #define TREDEF(name, msg) msg, |
| 290 | #include "lj_traceerr.h" |
| 291 | NULL |
| 292 | }; |
| 293 | |
| 294 | static const char *lower(char *buf, const char *s) |
| 295 | { |
| 296 | char *p = buf; |
| 297 | while (*s) { |
| 298 | *p++ = (*s >= 'A' && *s <= 'Z') ? *s+0x20 : *s; |
| 299 | s++; |
| 300 | } |
| 301 | *p = '\0'; |
| 302 | return buf; |
| 303 | } |
| 304 | |
| 305 | /* Emit C source code for bytecode-related definitions. */ |
| 306 | static void emit_bcdef(BuildCtx *ctx) |
| 307 | { |
| 308 | int i; |
| 309 | fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n" ); |
| 310 | fprintf(ctx->fp, "LJ_DATADEF const uint16_t lj_bc_ofs[] = {\n" ); |
| 311 | for (i = 0; i < ctx->npc; i++) { |
| 312 | if (i != 0) |
| 313 | fprintf(ctx->fp, ",\n" ); |
| 314 | fprintf(ctx->fp, "%d" , ctx->bc_ofs[i]); |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | /* Emit VM definitions as Lua code for debug modules. */ |
| 319 | static void emit_vmdef(BuildCtx *ctx) |
| 320 | { |
| 321 | char buf[80]; |
| 322 | int i; |
| 323 | fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n" ); |
| 324 | fprintf(ctx->fp, "return {\n\n" ); |
| 325 | |
| 326 | fprintf(ctx->fp, "bcnames = \"" ); |
| 327 | for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s" , bc_names[i]); |
| 328 | fprintf(ctx->fp, "\",\n\n" ); |
| 329 | |
| 330 | fprintf(ctx->fp, "irnames = \"" ); |
| 331 | for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s" , ir_names[i]); |
| 332 | fprintf(ctx->fp, "\",\n\n" ); |
| 333 | |
| 334 | fprintf(ctx->fp, "irfpm = { [0]=" ); |
| 335 | for (i = 0; irfpm_names[i]; i++) |
| 336 | fprintf(ctx->fp, "\"%s\", " , lower(buf, irfpm_names[i])); |
| 337 | fprintf(ctx->fp, "},\n\n" ); |
| 338 | |
| 339 | fprintf(ctx->fp, "irfield = { [0]=" ); |
| 340 | for (i = 0; irfield_names[i]; i++) { |
| 341 | char *p; |
| 342 | lower(buf, irfield_names[i]); |
| 343 | p = strchr(buf, '_'); |
| 344 | if (p) *p = '.'; |
| 345 | fprintf(ctx->fp, "\"%s\", " , buf); |
| 346 | } |
| 347 | fprintf(ctx->fp, "},\n\n" ); |
| 348 | |
| 349 | fprintf(ctx->fp, "ircall = {\n[0]=" ); |
| 350 | for (i = 0; ircall_names[i]; i++) |
| 351 | fprintf(ctx->fp, "\"%s\",\n" , ircall_names[i]); |
| 352 | fprintf(ctx->fp, "},\n\n" ); |
| 353 | |
| 354 | fprintf(ctx->fp, "traceerr = {\n[0]=" ); |
| 355 | for (i = 0; trace_errors[i]; i++) |
| 356 | fprintf(ctx->fp, "\"%s\",\n" , trace_errors[i]); |
| 357 | fprintf(ctx->fp, "},\n\n" ); |
| 358 | } |
| 359 | |
| 360 | /* -- Argument parsing ---------------------------------------------------- */ |
| 361 | |
| 362 | /* Build mode names. */ |
| 363 | static const char *const modenames[] = { |
| 364 | #define BUILDNAME(name) #name, |
| 365 | BUILDDEF(BUILDNAME) |
| 366 | #undef BUILDNAME |
| 367 | NULL |
| 368 | }; |
| 369 | |
| 370 | /* Print usage information and exit. */ |
| 371 | static void usage(void) |
| 372 | { |
| 373 | int i; |
| 374 | fprintf(stderr, LUAJIT_VERSION " VM builder.\n" ); |
| 375 | fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n" ); |
| 376 | fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n" ); |
| 377 | fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n" ); |
| 378 | fprintf(stderr, "Available modes:\n" ); |
| 379 | for (i = 0; i < BUILD__MAX; i++) |
| 380 | fprintf(stderr, " %s\n" , modenames[i]); |
| 381 | exit(1); |
| 382 | } |
| 383 | |
| 384 | /* Parse the output mode name. */ |
| 385 | static BuildMode parsemode(const char *mode) |
| 386 | { |
| 387 | int i; |
| 388 | for (i = 0; modenames[i]; i++) |
| 389 | if (!strcmp(mode, modenames[i])) |
| 390 | return (BuildMode)i; |
| 391 | usage(); |
| 392 | return (BuildMode)-1; |
| 393 | } |
| 394 | |
| 395 | /* Parse arguments. */ |
| 396 | static void parseargs(BuildCtx *ctx, char **argv) |
| 397 | { |
| 398 | const char *a; |
| 399 | int i; |
| 400 | ctx->mode = (BuildMode)-1; |
| 401 | ctx->outname = "-" ; |
| 402 | for (i = 1; (a = argv[i]) != NULL; i++) { |
| 403 | if (a[0] != '-') |
| 404 | break; |
| 405 | switch (a[1]) { |
| 406 | case '-': |
| 407 | if (a[2]) goto err; |
| 408 | i++; |
| 409 | goto ok; |
| 410 | case '\0': |
| 411 | goto ok; |
| 412 | case 'm': |
| 413 | i++; |
| 414 | if (a[2] || argv[i] == NULL) goto err; |
| 415 | ctx->mode = parsemode(argv[i]); |
| 416 | break; |
| 417 | case 'o': |
| 418 | i++; |
| 419 | if (a[2] || argv[i] == NULL) goto err; |
| 420 | ctx->outname = argv[i]; |
| 421 | break; |
| 422 | default: err: |
| 423 | usage(); |
| 424 | break; |
| 425 | } |
| 426 | } |
| 427 | ok: |
| 428 | ctx->args = argv+i; |
| 429 | if (ctx->mode == (BuildMode)-1) goto err; |
| 430 | } |
| 431 | |
| 432 | int main(int argc, char **argv) |
| 433 | { |
| 434 | BuildCtx ctx_; |
| 435 | BuildCtx *ctx = &ctx_; |
| 436 | int status, binmode; |
| 437 | |
| 438 | if (sizeof(void *) != 4*LJ_32+8*LJ_64) { |
| 439 | fprintf(stderr,"Error: pointer size mismatch in cross-build.\n" ); |
| 440 | fprintf(stderr,"Try: make HOST_CC=\"gcc -m32\" CROSS=...\n\n" ); |
| 441 | return 1; |
| 442 | } |
| 443 | |
| 444 | UNUSED(argc); |
| 445 | parseargs(ctx, argv); |
| 446 | |
| 447 | if ((status = build_code(ctx))) { |
| 448 | fprintf(stderr,"Error: DASM error %08x\n" , status); |
| 449 | return 1; |
| 450 | } |
| 451 | |
| 452 | switch (ctx->mode) { |
| 453 | case BUILD_peobj: |
| 454 | case BUILD_raw: |
| 455 | binmode = 1; |
| 456 | break; |
| 457 | default: |
| 458 | binmode = 0; |
| 459 | break; |
| 460 | } |
| 461 | |
| 462 | if (ctx->outname[0] == '-' && ctx->outname[1] == '\0') { |
| 463 | ctx->fp = stdout; |
| 464 | #if defined(_WIN32) |
| 465 | if (binmode) |
| 466 | _setmode(_fileno(stdout), _O_BINARY); /* Yuck. */ |
| 467 | #endif |
| 468 | } else if (!(ctx->fp = fopen(ctx->outname, binmode ? "wb" : "w" ))) { |
| 469 | fprintf(stderr, "Error: cannot open output file '%s': %s\n" , |
| 470 | ctx->outname, strerror(errno)); |
| 471 | exit(1); |
| 472 | } |
| 473 | |
| 474 | switch (ctx->mode) { |
| 475 | case BUILD_elfasm: |
| 476 | case BUILD_coffasm: |
| 477 | case BUILD_machasm: |
| 478 | emit_asm(ctx); |
| 479 | emit_asm_debug(ctx); |
| 480 | break; |
| 481 | case BUILD_peobj: |
| 482 | emit_peobj(ctx); |
| 483 | break; |
| 484 | case BUILD_raw: |
| 485 | emit_raw(ctx); |
| 486 | break; |
| 487 | case BUILD_bcdef: |
| 488 | emit_bcdef(ctx); |
| 489 | emit_lib(ctx); |
| 490 | break; |
| 491 | case BUILD_vmdef: |
| 492 | emit_vmdef(ctx); |
| 493 | emit_lib(ctx); |
| 494 | fprintf(ctx->fp, "}\n\n" ); |
| 495 | break; |
| 496 | case BUILD_ffdef: |
| 497 | case BUILD_libdef: |
| 498 | case BUILD_recdef: |
| 499 | emit_lib(ctx); |
| 500 | break; |
| 501 | case BUILD_folddef: |
| 502 | emit_fold(ctx); |
| 503 | break; |
| 504 | default: |
| 505 | break; |
| 506 | } |
| 507 | |
| 508 | fflush(ctx->fp); |
| 509 | if (ferror(ctx->fp)) { |
| 510 | fprintf(stderr, "Error: cannot write to output file: %s\n" , |
| 511 | strerror(errno)); |
| 512 | exit(1); |
| 513 | } |
| 514 | fclose(ctx->fp); |
| 515 | |
| 516 | return 0; |
| 517 | } |
| 518 | |
| 519 | |