1 | /* |
2 | ** LuaJIT VM builder. |
3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h |
4 | ** |
5 | ** This is a tool to build the hand-tuned assembler code required for |
6 | ** LuaJIT's bytecode interpreter. It supports a variety of output formats |
7 | ** to feed different toolchains (see usage() below). |
8 | ** |
9 | ** This tool is not particularly optimized because it's only used while |
10 | ** _building_ LuaJIT. There's no point in distributing or installing it. |
11 | ** Only the object code generated by this tool is linked into LuaJIT. |
12 | ** |
13 | ** Caveat: some memory is not free'd, error handling is lazy. |
14 | ** It's a one-shot tool -- any effort fixing this would be wasted. |
15 | */ |
16 | |
17 | #include "buildvm.h" |
18 | #include "lj_obj.h" |
19 | #include "lj_gc.h" |
20 | #include "lj_bc.h" |
21 | #include "lj_ir.h" |
22 | #include "lj_ircall.h" |
23 | #include "lj_frame.h" |
24 | #include "lj_dispatch.h" |
25 | #if LJ_HASFFI |
26 | #include "lj_ctype.h" |
27 | #include "lj_ccall.h" |
28 | #endif |
29 | #include "luajit.h" |
30 | |
31 | #if defined(_WIN32) |
32 | #include <fcntl.h> |
33 | #include <io.h> |
34 | #endif |
35 | |
36 | /* ------------------------------------------------------------------------ */ |
37 | |
38 | /* DynASM glue definitions. */ |
39 | #define Dst ctx |
40 | #define Dst_DECL BuildCtx *ctx |
41 | #define Dst_REF (ctx->D) |
42 | #define DASM_CHECKS 1 |
43 | |
44 | #include "../dynasm/dasm_proto.h" |
45 | |
46 | /* Glue macros for DynASM. */ |
47 | static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); |
48 | |
49 | #define DASM_EXTERN(ctx, addr, idx, type) \ |
50 | collect_reloc(ctx, addr, idx, type) |
51 | |
52 | /* ------------------------------------------------------------------------ */ |
53 | |
54 | /* Avoid trouble if cross-compiling for an x86 target. Speed doesn't matter. */ |
55 | #define DASM_ALIGNED_WRITES 1 |
56 | |
57 | /* Embed architecture-specific DynASM encoder. */ |
58 | #if LJ_TARGET_X86ORX64 |
59 | #include "../dynasm/dasm_x86.h" |
60 | #elif LJ_TARGET_ARM |
61 | #include "../dynasm/dasm_arm.h" |
62 | #elif LJ_TARGET_ARM64 |
63 | #include "../dynasm/dasm_arm64.h" |
64 | #elif LJ_TARGET_PPC |
65 | #include "../dynasm/dasm_ppc.h" |
66 | #elif LJ_TARGET_MIPS |
67 | #include "../dynasm/dasm_mips.h" |
68 | #else |
69 | #error "No support for this architecture (yet)" |
70 | #endif |
71 | |
72 | /* Embed generated architecture-specific backend. */ |
73 | #include "buildvm_arch.h" |
74 | |
75 | /* ------------------------------------------------------------------------ */ |
76 | |
77 | void owrite(BuildCtx *ctx, const void *ptr, size_t sz) |
78 | { |
79 | if (fwrite(ptr, 1, sz, ctx->fp) != sz) { |
80 | fprintf(stderr, "Error: cannot write to output file: %s\n" , |
81 | strerror(errno)); |
82 | exit(1); |
83 | } |
84 | } |
85 | |
86 | /* ------------------------------------------------------------------------ */ |
87 | |
88 | /* Emit code as raw bytes. Only used for DynASM debugging. */ |
89 | static void emit_raw(BuildCtx *ctx) |
90 | { |
91 | owrite(ctx, ctx->code, ctx->codesz); |
92 | } |
93 | |
94 | /* -- Build machine code -------------------------------------------------- */ |
95 | |
96 | static const char *sym_decorate(BuildCtx *ctx, |
97 | const char *prefix, const char *suffix) |
98 | { |
99 | char name[256]; |
100 | char *p; |
101 | #if LJ_64 |
102 | const char *symprefix = ctx->mode == BUILD_machasm ? "_" : "" ; |
103 | #elif LJ_TARGET_XBOX360 |
104 | const char *symprefix = "" ; |
105 | #else |
106 | const char *symprefix = ctx->mode != BUILD_elfasm ? "_" : "" ; |
107 | #endif |
108 | sprintf(name, "%s%s%s" , symprefix, prefix, suffix); |
109 | p = strchr(name, '@'); |
110 | if (p) { |
111 | #if LJ_TARGET_X86ORX64 |
112 | if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) |
113 | name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ |
114 | else |
115 | *p = '\0'; |
116 | #elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE |
117 | /* Keep @plt etc. */ |
118 | #else |
119 | *p = '\0'; |
120 | #endif |
121 | } |
122 | p = (char *)malloc(strlen(name)+1); /* MSVC doesn't like strdup. */ |
123 | strcpy(p, name); |
124 | return p; |
125 | } |
126 | |
127 | #define NRELOCSYM (sizeof(extnames)/sizeof(extnames[0])-1) |
128 | |
129 | static int relocmap[NRELOCSYM]; |
130 | |
131 | /* Collect external relocations. */ |
132 | static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type) |
133 | { |
134 | if (ctx->nreloc >= BUILD_MAX_RELOC) { |
135 | fprintf(stderr, "Error: too many relocations, increase BUILD_MAX_RELOC.\n" ); |
136 | exit(1); |
137 | } |
138 | if (relocmap[idx] < 0) { |
139 | relocmap[idx] = ctx->nrelocsym; |
140 | ctx->relocsym[ctx->nrelocsym] = sym_decorate(ctx, "" , extnames[idx]); |
141 | ctx->nrelocsym++; |
142 | } |
143 | ctx->reloc[ctx->nreloc].ofs = (int32_t)(addr - ctx->code); |
144 | ctx->reloc[ctx->nreloc].sym = relocmap[idx]; |
145 | ctx->reloc[ctx->nreloc].type = type; |
146 | ctx->nreloc++; |
147 | #if LJ_TARGET_XBOX360 |
148 | return (int)(ctx->code - addr) + 4; /* Encode symbol offset of .text. */ |
149 | #else |
150 | return 0; /* Encode symbol offset of 0. */ |
151 | #endif |
152 | } |
153 | |
154 | /* Naive insertion sort. Performance doesn't matter here. */ |
155 | static void sym_insert(BuildCtx *ctx, int32_t ofs, |
156 | const char *prefix, const char *suffix) |
157 | { |
158 | ptrdiff_t i = ctx->nsym++; |
159 | while (i > 0) { |
160 | if (ctx->sym[i-1].ofs <= ofs) |
161 | break; |
162 | ctx->sym[i] = ctx->sym[i-1]; |
163 | i--; |
164 | } |
165 | ctx->sym[i].ofs = ofs; |
166 | ctx->sym[i].name = sym_decorate(ctx, prefix, suffix); |
167 | } |
168 | |
169 | /* Build the machine code. */ |
170 | static int build_code(BuildCtx *ctx) |
171 | { |
172 | int status; |
173 | int i; |
174 | |
175 | /* Initialize DynASM structures. */ |
176 | ctx->nglob = GLOB__MAX; |
177 | ctx->glob = (void **)malloc(ctx->nglob*sizeof(void *)); |
178 | memset(ctx->glob, 0, ctx->nglob*sizeof(void *)); |
179 | ctx->nreloc = 0; |
180 | |
181 | ctx->globnames = globnames; |
182 | ctx->extnames = extnames; |
183 | ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); |
184 | ctx->nrelocsym = 0; |
185 | for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; |
186 | |
187 | ctx->dasm_ident = DASM_IDENT; |
188 | ctx->dasm_arch = DASM_ARCH; |
189 | |
190 | dasm_init(Dst, DASM_MAXSECTION); |
191 | dasm_setupglobal(Dst, ctx->glob, ctx->nglob); |
192 | dasm_setup(Dst, build_actionlist); |
193 | |
194 | /* Call arch-specific backend to emit the code. */ |
195 | ctx->npc = build_backend(ctx); |
196 | |
197 | /* Finalize the code. */ |
198 | (void)dasm_checkstep(Dst, -1); |
199 | if ((status = dasm_link(Dst, &ctx->codesz))) return status; |
200 | ctx->code = (uint8_t *)malloc(ctx->codesz); |
201 | if ((status = dasm_encode(Dst, (void *)ctx->code))) return status; |
202 | |
203 | /* Allocate symbol table and bytecode offsets. */ |
204 | ctx->beginsym = sym_decorate(ctx, "" , LABEL_PREFIX "vm_asm_begin" ); |
205 | ctx->sym = (BuildSym *)malloc((ctx->npc+ctx->nglob+1)*sizeof(BuildSym)); |
206 | ctx->nsym = 0; |
207 | ctx->bc_ofs = (int32_t *)malloc(ctx->npc*sizeof(int32_t)); |
208 | |
209 | /* Collect the opcodes (PC labels). */ |
210 | for (i = 0; i < ctx->npc; i++) { |
211 | int32_t ofs = dasm_getpclabel(Dst, i); |
212 | if (ofs < 0) return 0x22000000|i; |
213 | ctx->bc_ofs[i] = ofs; |
214 | if ((LJ_HASJIT || |
215 | !(i == BC_JFORI || i == BC_JFORL || i == BC_JITERL || i == BC_JLOOP || |
216 | i == BC_IFORL || i == BC_IITERL || i == BC_ILOOP)) && |
217 | (LJ_HASFFI || i != BC_KCDATA)) |
218 | sym_insert(ctx, ofs, LABEL_PREFIX_BC, bc_names[i]); |
219 | } |
220 | |
221 | /* Collect the globals (named labels). */ |
222 | for (i = 0; i < ctx->nglob; i++) { |
223 | const char *gl = globnames[i]; |
224 | int len = (int)strlen(gl); |
225 | if (!ctx->glob[i]) { |
226 | fprintf(stderr, "Error: undefined global %s\n" , gl); |
227 | exit(2); |
228 | } |
229 | /* Skip the _Z symbols. */ |
230 | if (!(len >= 2 && gl[len-2] == '_' && gl[len-1] == 'Z')) |
231 | sym_insert(ctx, (int32_t)((uint8_t *)(ctx->glob[i]) - ctx->code), |
232 | LABEL_PREFIX, globnames[i]); |
233 | } |
234 | |
235 | /* Close the address range. */ |
236 | sym_insert(ctx, (int32_t)ctx->codesz, "" , "" ); |
237 | ctx->nsym--; |
238 | |
239 | dasm_free(Dst); |
240 | |
241 | return 0; |
242 | } |
243 | |
244 | /* -- Generate VM enums --------------------------------------------------- */ |
245 | |
246 | const char *const bc_names[] = { |
247 | #define BCNAME(name, ma, mb, mc, mt) #name, |
248 | BCDEF(BCNAME) |
249 | #undef BCNAME |
250 | NULL |
251 | }; |
252 | |
253 | const char *const ir_names[] = { |
254 | #define IRNAME(name, m, m1, m2) #name, |
255 | IRDEF(IRNAME) |
256 | #undef IRNAME |
257 | NULL |
258 | }; |
259 | |
260 | const char *const irt_names[] = { |
261 | #define IRTNAME(name, size) #name, |
262 | IRTDEF(IRTNAME) |
263 | #undef IRTNAME |
264 | NULL |
265 | }; |
266 | |
267 | const char *const irfpm_names[] = { |
268 | #define FPMNAME(name) #name, |
269 | IRFPMDEF(FPMNAME) |
270 | #undef FPMNAME |
271 | NULL |
272 | }; |
273 | |
274 | const char *const irfield_names[] = { |
275 | #define FLNAME(name, ofs) #name, |
276 | IRFLDEF(FLNAME) |
277 | #undef FLNAME |
278 | NULL |
279 | }; |
280 | |
281 | const char *const ircall_names[] = { |
282 | #define IRCALLNAME(cond, name, nargs, kind, type, flags) #name, |
283 | IRCALLDEF(IRCALLNAME) |
284 | #undef IRCALLNAME |
285 | NULL |
286 | }; |
287 | |
288 | static const char *const trace_errors[] = { |
289 | #define TREDEF(name, msg) msg, |
290 | #include "lj_traceerr.h" |
291 | NULL |
292 | }; |
293 | |
294 | static const char *lower(char *buf, const char *s) |
295 | { |
296 | char *p = buf; |
297 | while (*s) { |
298 | *p++ = (*s >= 'A' && *s <= 'Z') ? *s+0x20 : *s; |
299 | s++; |
300 | } |
301 | *p = '\0'; |
302 | return buf; |
303 | } |
304 | |
305 | /* Emit C source code for bytecode-related definitions. */ |
306 | static void emit_bcdef(BuildCtx *ctx) |
307 | { |
308 | int i; |
309 | fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n" ); |
310 | fprintf(ctx->fp, "LJ_DATADEF const uint16_t lj_bc_ofs[] = {\n" ); |
311 | for (i = 0; i < ctx->npc; i++) { |
312 | if (i != 0) |
313 | fprintf(ctx->fp, ",\n" ); |
314 | fprintf(ctx->fp, "%d" , ctx->bc_ofs[i]); |
315 | } |
316 | } |
317 | |
318 | /* Emit VM definitions as Lua code for debug modules. */ |
319 | static void emit_vmdef(BuildCtx *ctx) |
320 | { |
321 | char buf[80]; |
322 | int i; |
323 | fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n" ); |
324 | fprintf(ctx->fp, "return {\n\n" ); |
325 | |
326 | fprintf(ctx->fp, "bcnames = \"" ); |
327 | for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s" , bc_names[i]); |
328 | fprintf(ctx->fp, "\",\n\n" ); |
329 | |
330 | fprintf(ctx->fp, "irnames = \"" ); |
331 | for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s" , ir_names[i]); |
332 | fprintf(ctx->fp, "\",\n\n" ); |
333 | |
334 | fprintf(ctx->fp, "irfpm = { [0]=" ); |
335 | for (i = 0; irfpm_names[i]; i++) |
336 | fprintf(ctx->fp, "\"%s\", " , lower(buf, irfpm_names[i])); |
337 | fprintf(ctx->fp, "},\n\n" ); |
338 | |
339 | fprintf(ctx->fp, "irfield = { [0]=" ); |
340 | for (i = 0; irfield_names[i]; i++) { |
341 | char *p; |
342 | lower(buf, irfield_names[i]); |
343 | p = strchr(buf, '_'); |
344 | if (p) *p = '.'; |
345 | fprintf(ctx->fp, "\"%s\", " , buf); |
346 | } |
347 | fprintf(ctx->fp, "},\n\n" ); |
348 | |
349 | fprintf(ctx->fp, "ircall = {\n[0]=" ); |
350 | for (i = 0; ircall_names[i]; i++) |
351 | fprintf(ctx->fp, "\"%s\",\n" , ircall_names[i]); |
352 | fprintf(ctx->fp, "},\n\n" ); |
353 | |
354 | fprintf(ctx->fp, "traceerr = {\n[0]=" ); |
355 | for (i = 0; trace_errors[i]; i++) |
356 | fprintf(ctx->fp, "\"%s\",\n" , trace_errors[i]); |
357 | fprintf(ctx->fp, "},\n\n" ); |
358 | } |
359 | |
360 | /* -- Argument parsing ---------------------------------------------------- */ |
361 | |
362 | /* Build mode names. */ |
363 | static const char *const modenames[] = { |
364 | #define BUILDNAME(name) #name, |
365 | BUILDDEF(BUILDNAME) |
366 | #undef BUILDNAME |
367 | NULL |
368 | }; |
369 | |
370 | /* Print usage information and exit. */ |
371 | static void usage(void) |
372 | { |
373 | int i; |
374 | fprintf(stderr, LUAJIT_VERSION " VM builder.\n" ); |
375 | fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n" ); |
376 | fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n" ); |
377 | fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n" ); |
378 | fprintf(stderr, "Available modes:\n" ); |
379 | for (i = 0; i < BUILD__MAX; i++) |
380 | fprintf(stderr, " %s\n" , modenames[i]); |
381 | exit(1); |
382 | } |
383 | |
384 | /* Parse the output mode name. */ |
385 | static BuildMode parsemode(const char *mode) |
386 | { |
387 | int i; |
388 | for (i = 0; modenames[i]; i++) |
389 | if (!strcmp(mode, modenames[i])) |
390 | return (BuildMode)i; |
391 | usage(); |
392 | return (BuildMode)-1; |
393 | } |
394 | |
395 | /* Parse arguments. */ |
396 | static void parseargs(BuildCtx *ctx, char **argv) |
397 | { |
398 | const char *a; |
399 | int i; |
400 | ctx->mode = (BuildMode)-1; |
401 | ctx->outname = "-" ; |
402 | for (i = 1; (a = argv[i]) != NULL; i++) { |
403 | if (a[0] != '-') |
404 | break; |
405 | switch (a[1]) { |
406 | case '-': |
407 | if (a[2]) goto err; |
408 | i++; |
409 | goto ok; |
410 | case '\0': |
411 | goto ok; |
412 | case 'm': |
413 | i++; |
414 | if (a[2] || argv[i] == NULL) goto err; |
415 | ctx->mode = parsemode(argv[i]); |
416 | break; |
417 | case 'o': |
418 | i++; |
419 | if (a[2] || argv[i] == NULL) goto err; |
420 | ctx->outname = argv[i]; |
421 | break; |
422 | default: err: |
423 | usage(); |
424 | break; |
425 | } |
426 | } |
427 | ok: |
428 | ctx->args = argv+i; |
429 | if (ctx->mode == (BuildMode)-1) goto err; |
430 | } |
431 | |
432 | int main(int argc, char **argv) |
433 | { |
434 | BuildCtx ctx_; |
435 | BuildCtx *ctx = &ctx_; |
436 | int status, binmode; |
437 | |
438 | if (sizeof(void *) != 4*LJ_32+8*LJ_64) { |
439 | fprintf(stderr,"Error: pointer size mismatch in cross-build.\n" ); |
440 | fprintf(stderr,"Try: make HOST_CC=\"gcc -m32\" CROSS=...\n\n" ); |
441 | return 1; |
442 | } |
443 | |
444 | UNUSED(argc); |
445 | parseargs(ctx, argv); |
446 | |
447 | if ((status = build_code(ctx))) { |
448 | fprintf(stderr,"Error: DASM error %08x\n" , status); |
449 | return 1; |
450 | } |
451 | |
452 | switch (ctx->mode) { |
453 | case BUILD_peobj: |
454 | case BUILD_raw: |
455 | binmode = 1; |
456 | break; |
457 | default: |
458 | binmode = 0; |
459 | break; |
460 | } |
461 | |
462 | if (ctx->outname[0] == '-' && ctx->outname[1] == '\0') { |
463 | ctx->fp = stdout; |
464 | #if defined(_WIN32) |
465 | if (binmode) |
466 | _setmode(_fileno(stdout), _O_BINARY); /* Yuck. */ |
467 | #endif |
468 | } else if (!(ctx->fp = fopen(ctx->outname, binmode ? "wb" : "w" ))) { |
469 | fprintf(stderr, "Error: cannot open output file '%s': %s\n" , |
470 | ctx->outname, strerror(errno)); |
471 | exit(1); |
472 | } |
473 | |
474 | switch (ctx->mode) { |
475 | case BUILD_elfasm: |
476 | case BUILD_coffasm: |
477 | case BUILD_machasm: |
478 | emit_asm(ctx); |
479 | emit_asm_debug(ctx); |
480 | break; |
481 | case BUILD_peobj: |
482 | emit_peobj(ctx); |
483 | break; |
484 | case BUILD_raw: |
485 | emit_raw(ctx); |
486 | break; |
487 | case BUILD_bcdef: |
488 | emit_bcdef(ctx); |
489 | emit_lib(ctx); |
490 | break; |
491 | case BUILD_vmdef: |
492 | emit_vmdef(ctx); |
493 | emit_lib(ctx); |
494 | fprintf(ctx->fp, "}\n\n" ); |
495 | break; |
496 | case BUILD_ffdef: |
497 | case BUILD_libdef: |
498 | case BUILD_recdef: |
499 | emit_lib(ctx); |
500 | break; |
501 | case BUILD_folddef: |
502 | emit_fold(ctx); |
503 | break; |
504 | default: |
505 | break; |
506 | } |
507 | |
508 | fflush(ctx->fp); |
509 | if (ferror(ctx->fp)) { |
510 | fprintf(stderr, "Error: cannot write to output file: %s\n" , |
511 | strerror(errno)); |
512 | exit(1); |
513 | } |
514 | fclose(ctx->fp); |
515 | |
516 | return 0; |
517 | } |
518 | |
519 | |