1 | /* |
2 | ** LuaJIT VM builder. |
3 | ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h |
4 | ** |
5 | ** This is a tool to build the hand-tuned assembler code required for |
6 | ** LuaJIT's bytecode interpreter. It supports a variety of output formats |
7 | ** to feed different toolchains (see usage() below). |
8 | ** |
9 | ** This tool is not particularly optimized because it's only used while |
10 | ** _building_ LuaJIT. There's no point in distributing or installing it. |
11 | ** Only the object code generated by this tool is linked into LuaJIT. |
12 | ** |
13 | ** Caveat: some memory is not free'd, error handling is lazy. |
14 | ** It's a one-shot tool -- any effort fixing this would be wasted. |
15 | */ |
16 | |
17 | #include "buildvm.h" |
18 | #include "lj_obj.h" |
19 | #include "lj_gc.h" |
20 | #include "lj_bc.h" |
21 | #include "lj_ir.h" |
22 | #include "lj_ircall.h" |
23 | #include "lj_frame.h" |
24 | #include "lj_dispatch.h" |
25 | #if LJ_HASFFI |
26 | #include "lj_ctype.h" |
27 | #include "lj_ccall.h" |
28 | #endif |
29 | #include "luajit.h" |
30 | |
31 | #if defined(_WIN32) |
32 | #include <fcntl.h> |
33 | #include <io.h> |
34 | #endif |
35 | |
36 | /* ------------------------------------------------------------------------ */ |
37 | |
38 | /* DynASM glue definitions. */ |
39 | #define Dst ctx |
40 | #define Dst_DECL BuildCtx *ctx |
41 | #define Dst_REF (ctx->D) |
42 | #define DASM_CHECKS 1 |
43 | |
44 | #include "../dynasm/dasm_proto.h" |
45 | |
46 | /* Glue macros for DynASM. */ |
47 | static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); |
48 | |
49 | #define DASM_EXTERN(ctx, addr, idx, type) \ |
50 | collect_reloc(ctx, addr, idx, type) |
51 | |
52 | /* ------------------------------------------------------------------------ */ |
53 | |
54 | /* Avoid trouble if cross-compiling for an x86 target. Speed doesn't matter. */ |
55 | #define DASM_ALIGNED_WRITES 1 |
56 | |
57 | /* Embed architecture-specific DynASM encoder. */ |
58 | #if LJ_TARGET_X86ORX64 |
59 | #include "../dynasm/dasm_x86.h" |
60 | #elif LJ_TARGET_ARM |
61 | #include "../dynasm/dasm_arm.h" |
62 | #elif LJ_TARGET_PPC |
63 | #include "../dynasm/dasm_ppc.h" |
64 | #elif LJ_TARGET_PPCSPE |
65 | #include "../dynasm/dasm_ppc.h" |
66 | #elif LJ_TARGET_MIPS |
67 | #include "../dynasm/dasm_mips.h" |
68 | #else |
69 | #error "No support for this architecture (yet)" |
70 | #endif |
71 | |
72 | /* Embed generated architecture-specific backend. */ |
73 | #include "buildvm_arch.h" |
74 | |
75 | /* ------------------------------------------------------------------------ */ |
76 | |
77 | void owrite(BuildCtx *ctx, const void *ptr, size_t sz) |
78 | { |
79 | if (fwrite(ptr, 1, sz, ctx->fp) != sz) { |
80 | fprintf(stderr, "Error: cannot write to output file: %s\n" , |
81 | strerror(errno)); |
82 | exit(1); |
83 | } |
84 | } |
85 | |
86 | /* ------------------------------------------------------------------------ */ |
87 | |
88 | /* Emit code as raw bytes. Only used for DynASM debugging. */ |
89 | static void emit_raw(BuildCtx *ctx) |
90 | { |
91 | owrite(ctx, ctx->code, ctx->codesz); |
92 | } |
93 | |
94 | /* -- Build machine code -------------------------------------------------- */ |
95 | |
96 | static const char *sym_decorate(BuildCtx *ctx, |
97 | const char *prefix, const char *suffix) |
98 | { |
99 | char name[256]; |
100 | char *p; |
101 | #if LJ_64 |
102 | const char *symprefix = ctx->mode == BUILD_machasm ? "_" : "" ; |
103 | #elif LJ_TARGET_XBOX360 |
104 | const char *symprefix = "" ; |
105 | #else |
106 | const char *symprefix = ctx->mode != BUILD_elfasm ? "_" : "" ; |
107 | #endif |
108 | sprintf(name, "%s%s%s" , symprefix, prefix, suffix); |
109 | p = strchr(name, '@'); |
110 | if (p) { |
111 | #if LJ_TARGET_X86ORX64 |
112 | if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) |
113 | name[0] = '@'; |
114 | else |
115 | *p = '\0'; |
116 | #elif (LJ_TARGET_PPC || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE |
117 | /* Keep @plt. */ |
118 | #else |
119 | *p = '\0'; |
120 | #endif |
121 | } |
122 | p = (char *)malloc(strlen(name)+1); /* MSVC doesn't like strdup. */ |
123 | strcpy(p, name); |
124 | return p; |
125 | } |
126 | |
127 | #define NRELOCSYM (sizeof(extnames)/sizeof(extnames[0])-1) |
128 | |
129 | static int relocmap[NRELOCSYM]; |
130 | |
131 | /* Collect external relocations. */ |
132 | static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type) |
133 | { |
134 | if (ctx->nreloc >= BUILD_MAX_RELOC) { |
135 | fprintf(stderr, "Error: too many relocations, increase BUILD_MAX_RELOC.\n" ); |
136 | exit(1); |
137 | } |
138 | if (relocmap[idx] < 0) { |
139 | relocmap[idx] = ctx->nrelocsym; |
140 | ctx->relocsym[ctx->nrelocsym] = sym_decorate(ctx, "" , extnames[idx]); |
141 | ctx->nrelocsym++; |
142 | } |
143 | ctx->reloc[ctx->nreloc].ofs = (int32_t)(addr - ctx->code); |
144 | ctx->reloc[ctx->nreloc].sym = relocmap[idx]; |
145 | ctx->reloc[ctx->nreloc].type = type; |
146 | ctx->nreloc++; |
147 | #if LJ_TARGET_XBOX360 |
148 | return (int)(ctx->code - addr) + 4; /* Encode symbol offset of .text. */ |
149 | #else |
150 | return 0; /* Encode symbol offset of 0. */ |
151 | #endif |
152 | } |
153 | |
154 | /* Naive insertion sort. Performance doesn't matter here. */ |
155 | static void sym_insert(BuildCtx *ctx, int32_t ofs, |
156 | const char *prefix, const char *suffix) |
157 | { |
158 | ptrdiff_t i = ctx->nsym++; |
159 | while (i > 0) { |
160 | if (ctx->sym[i-1].ofs <= ofs) |
161 | break; |
162 | ctx->sym[i] = ctx->sym[i-1]; |
163 | i--; |
164 | } |
165 | ctx->sym[i].ofs = ofs; |
166 | ctx->sym[i].name = sym_decorate(ctx, prefix, suffix); |
167 | } |
168 | |
169 | /* Build the machine code. */ |
170 | static int build_code(BuildCtx *ctx) |
171 | { |
172 | int status; |
173 | int i; |
174 | |
175 | /* Initialize DynASM structures. */ |
176 | ctx->nglob = GLOB__MAX; |
177 | ctx->glob = (void **)malloc(ctx->nglob*sizeof(void *)); |
178 | memset(ctx->glob, 0, ctx->nglob*sizeof(void *)); |
179 | ctx->nreloc = 0; |
180 | |
181 | ctx->globnames = globnames; |
182 | ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); |
183 | ctx->nrelocsym = 0; |
184 | for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; |
185 | |
186 | ctx->dasm_ident = DASM_IDENT; |
187 | ctx->dasm_arch = DASM_ARCH; |
188 | |
189 | dasm_init(Dst, DASM_MAXSECTION); |
190 | dasm_setupglobal(Dst, ctx->glob, ctx->nglob); |
191 | dasm_setup(Dst, build_actionlist); |
192 | |
193 | /* Call arch-specific backend to emit the code. */ |
194 | ctx->npc = build_backend(ctx); |
195 | |
196 | /* Finalize the code. */ |
197 | (void)dasm_checkstep(Dst, -1); |
198 | if ((status = dasm_link(Dst, &ctx->codesz))) return status; |
199 | ctx->code = (uint8_t *)malloc(ctx->codesz); |
200 | if ((status = dasm_encode(Dst, (void *)ctx->code))) return status; |
201 | |
202 | /* Allocate symbol table and bytecode offsets. */ |
203 | ctx->beginsym = sym_decorate(ctx, "" , LABEL_PREFIX "vm_asm_begin" ); |
204 | ctx->sym = (BuildSym *)malloc((ctx->npc+ctx->nglob+1)*sizeof(BuildSym)); |
205 | ctx->nsym = 0; |
206 | ctx->bc_ofs = (int32_t *)malloc(ctx->npc*sizeof(int32_t)); |
207 | |
208 | /* Collect the opcodes (PC labels). */ |
209 | for (i = 0; i < ctx->npc; i++) { |
210 | int32_t ofs = dasm_getpclabel(Dst, i); |
211 | if (ofs < 0) return 0x22000000|i; |
212 | ctx->bc_ofs[i] = ofs; |
213 | if ((LJ_HASJIT || |
214 | !(i == BC_JFORI || i == BC_JFORL || i == BC_JITERL || i == BC_JLOOP || |
215 | i == BC_IFORL || i == BC_IITERL || i == BC_ILOOP)) && |
216 | (LJ_HASFFI || i != BC_KCDATA)) |
217 | sym_insert(ctx, ofs, LABEL_PREFIX_BC, bc_names[i]); |
218 | } |
219 | |
220 | /* Collect the globals (named labels). */ |
221 | for (i = 0; i < ctx->nglob; i++) { |
222 | const char *gl = globnames[i]; |
223 | int len = (int)strlen(gl); |
224 | if (!ctx->glob[i]) { |
225 | fprintf(stderr, "Error: undefined global %s\n" , gl); |
226 | exit(2); |
227 | } |
228 | /* Skip the _Z symbols. */ |
229 | if (!(len >= 2 && gl[len-2] == '_' && gl[len-1] == 'Z')) |
230 | sym_insert(ctx, (int32_t)((uint8_t *)(ctx->glob[i]) - ctx->code), |
231 | LABEL_PREFIX, globnames[i]); |
232 | } |
233 | |
234 | /* Close the address range. */ |
235 | sym_insert(ctx, (int32_t)ctx->codesz, "" , "" ); |
236 | ctx->nsym--; |
237 | |
238 | dasm_free(Dst); |
239 | |
240 | return 0; |
241 | } |
242 | |
243 | /* -- Generate VM enums --------------------------------------------------- */ |
244 | |
245 | const char *const bc_names[] = { |
246 | #define BCNAME(name, ma, mb, mc, mt) #name, |
247 | BCDEF(BCNAME) |
248 | #undef BCNAME |
249 | NULL |
250 | }; |
251 | |
252 | const char *const ir_names[] = { |
253 | #define IRNAME(name, m, m1, m2) #name, |
254 | IRDEF(IRNAME) |
255 | #undef IRNAME |
256 | NULL |
257 | }; |
258 | |
259 | const char *const irt_names[] = { |
260 | #define IRTNAME(name, size) #name, |
261 | IRTDEF(IRTNAME) |
262 | #undef IRTNAME |
263 | NULL |
264 | }; |
265 | |
266 | const char *const irfpm_names[] = { |
267 | #define FPMNAME(name) #name, |
268 | IRFPMDEF(FPMNAME) |
269 | #undef FPMNAME |
270 | NULL |
271 | }; |
272 | |
273 | const char *const irfield_names[] = { |
274 | #define FLNAME(name, ofs) #name, |
275 | IRFLDEF(FLNAME) |
276 | #undef FLNAME |
277 | NULL |
278 | }; |
279 | |
280 | const char *const ircall_names[] = { |
281 | #define IRCALLNAME(cond, name, nargs, kind, type, flags) #name, |
282 | IRCALLDEF(IRCALLNAME) |
283 | #undef IRCALLNAME |
284 | NULL |
285 | }; |
286 | |
287 | static const char *const trace_errors[] = { |
288 | #define TREDEF(name, msg) msg, |
289 | #include "lj_traceerr.h" |
290 | NULL |
291 | }; |
292 | |
293 | static const char *lower(char *buf, const char *s) |
294 | { |
295 | char *p = buf; |
296 | while (*s) { |
297 | *p++ = (*s >= 'A' && *s <= 'Z') ? *s+0x20 : *s; |
298 | s++; |
299 | } |
300 | *p = '\0'; |
301 | return buf; |
302 | } |
303 | |
304 | /* Emit C source code for bytecode-related definitions. */ |
305 | static void emit_bcdef(BuildCtx *ctx) |
306 | { |
307 | int i; |
308 | fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n" ); |
309 | fprintf(ctx->fp, "LJ_DATADEF const uint16_t lj_bc_ofs[] = {\n" ); |
310 | for (i = 0; i < ctx->npc; i++) { |
311 | if (i != 0) |
312 | fprintf(ctx->fp, ",\n" ); |
313 | fprintf(ctx->fp, "%d" , ctx->bc_ofs[i]); |
314 | } |
315 | } |
316 | |
317 | /* Emit VM definitions as Lua code for debug modules. */ |
318 | static void emit_vmdef(BuildCtx *ctx) |
319 | { |
320 | char buf[80]; |
321 | int i; |
322 | fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n" ); |
323 | fprintf(ctx->fp, "module(...)\n\n" ); |
324 | |
325 | fprintf(ctx->fp, "bcnames = \"" ); |
326 | for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s" , bc_names[i]); |
327 | fprintf(ctx->fp, "\"\n\n" ); |
328 | |
329 | fprintf(ctx->fp, "irnames = \"" ); |
330 | for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s" , ir_names[i]); |
331 | fprintf(ctx->fp, "\"\n\n" ); |
332 | |
333 | fprintf(ctx->fp, "irfpm = { [0]=" ); |
334 | for (i = 0; irfpm_names[i]; i++) |
335 | fprintf(ctx->fp, "\"%s\", " , lower(buf, irfpm_names[i])); |
336 | fprintf(ctx->fp, "}\n\n" ); |
337 | |
338 | fprintf(ctx->fp, "irfield = { [0]=" ); |
339 | for (i = 0; irfield_names[i]; i++) { |
340 | char *p; |
341 | lower(buf, irfield_names[i]); |
342 | p = strchr(buf, '_'); |
343 | if (p) *p = '.'; |
344 | fprintf(ctx->fp, "\"%s\", " , buf); |
345 | } |
346 | fprintf(ctx->fp, "}\n\n" ); |
347 | |
348 | fprintf(ctx->fp, "ircall = {\n[0]=" ); |
349 | for (i = 0; ircall_names[i]; i++) |
350 | fprintf(ctx->fp, "\"%s\",\n" , ircall_names[i]); |
351 | fprintf(ctx->fp, "}\n\n" ); |
352 | |
353 | fprintf(ctx->fp, "traceerr = {\n[0]=" ); |
354 | for (i = 0; trace_errors[i]; i++) |
355 | fprintf(ctx->fp, "\"%s\",\n" , trace_errors[i]); |
356 | fprintf(ctx->fp, "}\n\n" ); |
357 | } |
358 | |
359 | /* -- Argument parsing ---------------------------------------------------- */ |
360 | |
361 | /* Build mode names. */ |
362 | static const char *const modenames[] = { |
363 | #define BUILDNAME(name) #name, |
364 | BUILDDEF(BUILDNAME) |
365 | #undef BUILDNAME |
366 | NULL |
367 | }; |
368 | |
369 | /* Print usage information and exit. */ |
370 | static void usage(void) |
371 | { |
372 | int i; |
373 | fprintf(stderr, LUAJIT_VERSION " VM builder.\n" ); |
374 | fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n" ); |
375 | fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n" ); |
376 | fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n" ); |
377 | fprintf(stderr, "Available modes:\n" ); |
378 | for (i = 0; i < BUILD__MAX; i++) |
379 | fprintf(stderr, " %s\n" , modenames[i]); |
380 | exit(1); |
381 | } |
382 | |
383 | /* Parse the output mode name. */ |
384 | static BuildMode parsemode(const char *mode) |
385 | { |
386 | int i; |
387 | for (i = 0; modenames[i]; i++) |
388 | if (!strcmp(mode, modenames[i])) |
389 | return (BuildMode)i; |
390 | usage(); |
391 | return (BuildMode)-1; |
392 | } |
393 | |
394 | /* Parse arguments. */ |
395 | static void parseargs(BuildCtx *ctx, char **argv) |
396 | { |
397 | const char *a; |
398 | int i; |
399 | ctx->mode = (BuildMode)-1; |
400 | ctx->outname = "-" ; |
401 | for (i = 1; (a = argv[i]) != NULL; i++) { |
402 | if (a[0] != '-') |
403 | break; |
404 | switch (a[1]) { |
405 | case '-': |
406 | if (a[2]) goto err; |
407 | i++; |
408 | goto ok; |
409 | case '\0': |
410 | goto ok; |
411 | case 'm': |
412 | i++; |
413 | if (a[2] || argv[i] == NULL) goto err; |
414 | ctx->mode = parsemode(argv[i]); |
415 | break; |
416 | case 'o': |
417 | i++; |
418 | if (a[2] || argv[i] == NULL) goto err; |
419 | ctx->outname = argv[i]; |
420 | break; |
421 | default: err: |
422 | usage(); |
423 | break; |
424 | } |
425 | } |
426 | ok: |
427 | ctx->args = argv+i; |
428 | if (ctx->mode == (BuildMode)-1) goto err; |
429 | } |
430 | |
431 | int main(int argc, char **argv) |
432 | { |
433 | BuildCtx ctx_; |
434 | BuildCtx *ctx = &ctx_; |
435 | int status, binmode; |
436 | |
437 | if (sizeof(void *) != 4*LJ_32+8*LJ_64) { |
438 | fprintf(stderr,"Error: pointer size mismatch in cross-build.\n" ); |
439 | fprintf(stderr,"Try: make HOST_CC=\"gcc -m32\" CROSS=...\n\n" ); |
440 | return 1; |
441 | } |
442 | |
443 | UNUSED(argc); |
444 | parseargs(ctx, argv); |
445 | |
446 | if ((status = build_code(ctx))) { |
447 | fprintf(stderr,"Error: DASM error %08x\n" , status); |
448 | return 1; |
449 | } |
450 | |
451 | switch (ctx->mode) { |
452 | case BUILD_peobj: |
453 | case BUILD_raw: |
454 | binmode = 1; |
455 | break; |
456 | default: |
457 | binmode = 0; |
458 | break; |
459 | } |
460 | |
461 | if (ctx->outname[0] == '-' && ctx->outname[1] == '\0') { |
462 | ctx->fp = stdout; |
463 | #if defined(_WIN32) |
464 | if (binmode) |
465 | _setmode(_fileno(stdout), _O_BINARY); /* Yuck. */ |
466 | #endif |
467 | } else if (!(ctx->fp = fopen(ctx->outname, binmode ? "wb" : "w" ))) { |
468 | fprintf(stderr, "Error: cannot open output file '%s': %s\n" , |
469 | ctx->outname, strerror(errno)); |
470 | exit(1); |
471 | } |
472 | |
473 | switch (ctx->mode) { |
474 | case BUILD_elfasm: |
475 | case BUILD_coffasm: |
476 | case BUILD_machasm: |
477 | emit_asm(ctx); |
478 | emit_asm_debug(ctx); |
479 | break; |
480 | case BUILD_peobj: |
481 | emit_peobj(ctx); |
482 | break; |
483 | case BUILD_raw: |
484 | emit_raw(ctx); |
485 | break; |
486 | case BUILD_bcdef: |
487 | emit_bcdef(ctx); |
488 | emit_lib(ctx); |
489 | break; |
490 | case BUILD_vmdef: |
491 | emit_vmdef(ctx); |
492 | emit_lib(ctx); |
493 | break; |
494 | case BUILD_ffdef: |
495 | case BUILD_libdef: |
496 | case BUILD_recdef: |
497 | emit_lib(ctx); |
498 | break; |
499 | case BUILD_folddef: |
500 | emit_fold(ctx); |
501 | break; |
502 | default: |
503 | break; |
504 | } |
505 | |
506 | fflush(ctx->fp); |
507 | if (ferror(ctx->fp)) { |
508 | fprintf(stderr, "Error: cannot write to output file: %s\n" , |
509 | strerror(errno)); |
510 | exit(1); |
511 | } |
512 | fclose(ctx->fp); |
513 | |
514 | return 0; |
515 | } |
516 | |
517 | |