1/*
2** LuaJIT VM builder.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4**
5** This is a tool to build the hand-tuned assembler code required for
6** LuaJIT's bytecode interpreter. It supports a variety of output formats
7** to feed different toolchains (see usage() below).
8**
9** This tool is not particularly optimized because it's only used while
10** _building_ LuaJIT. There's no point in distributing or installing it.
11** Only the object code generated by this tool is linked into LuaJIT.
12**
13** Caveat: some memory is not free'd, error handling is lazy.
14** It's a one-shot tool -- any effort fixing this would be wasted.
15*/
16
17#include "buildvm.h"
18#include "lj_obj.h"
19#include "lj_gc.h"
20#include "lj_bc.h"
21#include "lj_ir.h"
22#include "lj_ircall.h"
23#include "lj_frame.h"
24#include "lj_dispatch.h"
25#if LJ_HASFFI
26#include "lj_ctype.h"
27#include "lj_ccall.h"
28#endif
29#include "luajit.h"
30
31#if defined(_WIN32)
32#include <fcntl.h>
33#include <io.h>
34#endif
35
36/* ------------------------------------------------------------------------ */
37
38/* DynASM glue definitions. */
39#define Dst ctx
40#define Dst_DECL BuildCtx *ctx
41#define Dst_REF (ctx->D)
42#define DASM_CHECKS 1
43
44#include "../dynasm/dasm_proto.h"
45
46/* Glue macros for DynASM. */
47static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
48
49#define DASM_EXTERN(ctx, addr, idx, type) \
50 collect_reloc(ctx, addr, idx, type)
51
52/* ------------------------------------------------------------------------ */
53
54/* Avoid trouble if cross-compiling for an x86 target. Speed doesn't matter. */
55#define DASM_ALIGNED_WRITES 1
56
57/* Embed architecture-specific DynASM encoder. */
58#if LJ_TARGET_X86ORX64
59#include "../dynasm/dasm_x86.h"
60#elif LJ_TARGET_ARM
61#include "../dynasm/dasm_arm.h"
62#elif LJ_TARGET_ARM64
63#include "../dynasm/dasm_arm64.h"
64#elif LJ_TARGET_PPC
65#include "../dynasm/dasm_ppc.h"
66#elif LJ_TARGET_MIPS
67#include "../dynasm/dasm_mips.h"
68#else
69#error "No support for this architecture (yet)"
70#endif
71
72/* Embed generated architecture-specific backend. */
73#include "buildvm_arch.h"
74
75/* ------------------------------------------------------------------------ */
76
77void owrite(BuildCtx *ctx, const void *ptr, size_t sz)
78{
79 if (fwrite(ptr, 1, sz, ctx->fp) != sz) {
80 fprintf(stderr, "Error: cannot write to output file: %s\n",
81 strerror(errno));
82 exit(1);
83 }
84}
85
86/* ------------------------------------------------------------------------ */
87
88/* Emit code as raw bytes. Only used for DynASM debugging. */
89static void emit_raw(BuildCtx *ctx)
90{
91 owrite(ctx, ctx->code, ctx->codesz);
92}
93
94/* -- Build machine code -------------------------------------------------- */
95
96static const char *sym_decorate(BuildCtx *ctx,
97 const char *prefix, const char *suffix)
98{
99 char name[256];
100 char *p;
101#if LJ_64
102 const char *symprefix = ctx->mode == BUILD_machasm ? "_" : "";
103#elif LJ_TARGET_XBOX360
104 const char *symprefix = "";
105#else
106 const char *symprefix = ctx->mode != BUILD_elfasm ? "_" : "";
107#endif
108 sprintf(name, "%s%s%s", symprefix, prefix, suffix);
109 p = strchr(name, '@');
110 if (p) {
111#if LJ_TARGET_X86ORX64
112 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
113 name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */
114 else
115 *p = '\0';
116#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
117 /* Keep @plt etc. */
118#else
119 *p = '\0';
120#endif
121 }
122 p = (char *)malloc(strlen(name)+1); /* MSVC doesn't like strdup. */
123 strcpy(p, name);
124 return p;
125}
126
127#define NRELOCSYM (sizeof(extnames)/sizeof(extnames[0])-1)
128
129static int relocmap[NRELOCSYM];
130
131/* Collect external relocations. */
132static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type)
133{
134 if (ctx->nreloc >= BUILD_MAX_RELOC) {
135 fprintf(stderr, "Error: too many relocations, increase BUILD_MAX_RELOC.\n");
136 exit(1);
137 }
138 if (relocmap[idx] < 0) {
139 relocmap[idx] = ctx->nrelocsym;
140 ctx->relocsym[ctx->nrelocsym] = sym_decorate(ctx, "", extnames[idx]);
141 ctx->nrelocsym++;
142 }
143 ctx->reloc[ctx->nreloc].ofs = (int32_t)(addr - ctx->code);
144 ctx->reloc[ctx->nreloc].sym = relocmap[idx];
145 ctx->reloc[ctx->nreloc].type = type;
146 ctx->nreloc++;
147#if LJ_TARGET_XBOX360
148 return (int)(ctx->code - addr) + 4; /* Encode symbol offset of .text. */
149#else
150 return 0; /* Encode symbol offset of 0. */
151#endif
152}
153
154/* Naive insertion sort. Performance doesn't matter here. */
155static void sym_insert(BuildCtx *ctx, int32_t ofs,
156 const char *prefix, const char *suffix)
157{
158 ptrdiff_t i = ctx->nsym++;
159 while (i > 0) {
160 if (ctx->sym[i-1].ofs <= ofs)
161 break;
162 ctx->sym[i] = ctx->sym[i-1];
163 i--;
164 }
165 ctx->sym[i].ofs = ofs;
166 ctx->sym[i].name = sym_decorate(ctx, prefix, suffix);
167}
168
169/* Build the machine code. */
170static int build_code(BuildCtx *ctx)
171{
172 int status;
173 int i;
174
175 /* Initialize DynASM structures. */
176 ctx->nglob = GLOB__MAX;
177 ctx->glob = (void **)malloc(ctx->nglob*sizeof(void *));
178 memset(ctx->glob, 0, ctx->nglob*sizeof(void *));
179 ctx->nreloc = 0;
180
181 ctx->globnames = globnames;
182 ctx->extnames = extnames;
183 ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
184 ctx->nrelocsym = 0;
185 for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
186
187 ctx->dasm_ident = DASM_IDENT;
188 ctx->dasm_arch = DASM_ARCH;
189
190 dasm_init(Dst, DASM_MAXSECTION);
191 dasm_setupglobal(Dst, ctx->glob, ctx->nglob);
192 dasm_setup(Dst, build_actionlist);
193
194 /* Call arch-specific backend to emit the code. */
195 ctx->npc = build_backend(ctx);
196
197 /* Finalize the code. */
198 (void)dasm_checkstep(Dst, -1);
199 if ((status = dasm_link(Dst, &ctx->codesz))) return status;
200 ctx->code = (uint8_t *)malloc(ctx->codesz);
201 if ((status = dasm_encode(Dst, (void *)ctx->code))) return status;
202
203 /* Allocate symbol table and bytecode offsets. */
204 ctx->beginsym = sym_decorate(ctx, "", LABEL_PREFIX "vm_asm_begin");
205 ctx->sym = (BuildSym *)malloc((ctx->npc+ctx->nglob+1)*sizeof(BuildSym));
206 ctx->nsym = 0;
207 ctx->bc_ofs = (int32_t *)malloc(ctx->npc*sizeof(int32_t));
208
209 /* Collect the opcodes (PC labels). */
210 for (i = 0; i < ctx->npc; i++) {
211 int32_t ofs = dasm_getpclabel(Dst, i);
212 if (ofs < 0) return 0x22000000|i;
213 ctx->bc_ofs[i] = ofs;
214 if ((LJ_HASJIT ||
215 !(i == BC_JFORI || i == BC_JFORL || i == BC_JITERL || i == BC_JLOOP ||
216 i == BC_IFORL || i == BC_IITERL || i == BC_ILOOP)) &&
217 (LJ_HASFFI || i != BC_KCDATA))
218 sym_insert(ctx, ofs, LABEL_PREFIX_BC, bc_names[i]);
219 }
220
221 /* Collect the globals (named labels). */
222 for (i = 0; i < ctx->nglob; i++) {
223 const char *gl = globnames[i];
224 int len = (int)strlen(gl);
225 if (!ctx->glob[i]) {
226 fprintf(stderr, "Error: undefined global %s\n", gl);
227 exit(2);
228 }
229 /* Skip the _Z symbols. */
230 if (!(len >= 2 && gl[len-2] == '_' && gl[len-1] == 'Z'))
231 sym_insert(ctx, (int32_t)((uint8_t *)(ctx->glob[i]) - ctx->code),
232 LABEL_PREFIX, globnames[i]);
233 }
234
235 /* Close the address range. */
236 sym_insert(ctx, (int32_t)ctx->codesz, "", "");
237 ctx->nsym--;
238
239 dasm_free(Dst);
240
241 return 0;
242}
243
244/* -- Generate VM enums --------------------------------------------------- */
245
246const char *const bc_names[] = {
247#define BCNAME(name, ma, mb, mc, mt) #name,
248BCDEF(BCNAME)
249#undef BCNAME
250 NULL
251};
252
253const char *const ir_names[] = {
254#define IRNAME(name, m, m1, m2) #name,
255IRDEF(IRNAME)
256#undef IRNAME
257 NULL
258};
259
260const char *const irt_names[] = {
261#define IRTNAME(name, size) #name,
262IRTDEF(IRTNAME)
263#undef IRTNAME
264 NULL
265};
266
267const char *const irfpm_names[] = {
268#define FPMNAME(name) #name,
269IRFPMDEF(FPMNAME)
270#undef FPMNAME
271 NULL
272};
273
274const char *const irfield_names[] = {
275#define FLNAME(name, ofs) #name,
276IRFLDEF(FLNAME)
277#undef FLNAME
278 NULL
279};
280
281const char *const ircall_names[] = {
282#define IRCALLNAME(cond, name, nargs, kind, type, flags) #name,
283IRCALLDEF(IRCALLNAME)
284#undef IRCALLNAME
285 NULL
286};
287
288static const char *const trace_errors[] = {
289#define TREDEF(name, msg) msg,
290#include "lj_traceerr.h"
291 NULL
292};
293
294static const char *lower(char *buf, const char *s)
295{
296 char *p = buf;
297 while (*s) {
298 *p++ = (*s >= 'A' && *s <= 'Z') ? *s+0x20 : *s;
299 s++;
300 }
301 *p = '\0';
302 return buf;
303}
304
305/* Emit C source code for bytecode-related definitions. */
306static void emit_bcdef(BuildCtx *ctx)
307{
308 int i;
309 fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n");
310 fprintf(ctx->fp, "LJ_DATADEF const uint16_t lj_bc_ofs[] = {\n");
311 for (i = 0; i < ctx->npc; i++) {
312 if (i != 0)
313 fprintf(ctx->fp, ",\n");
314 fprintf(ctx->fp, "%d", ctx->bc_ofs[i]);
315 }
316}
317
318/* Emit VM definitions as Lua code for debug modules. */
319static void emit_vmdef(BuildCtx *ctx)
320{
321 char buf[80];
322 int i;
323 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
324 fprintf(ctx->fp, "return {\n\n");
325
326 fprintf(ctx->fp, "bcnames = \"");
327 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
328 fprintf(ctx->fp, "\",\n\n");
329
330 fprintf(ctx->fp, "irnames = \"");
331 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
332 fprintf(ctx->fp, "\",\n\n");
333
334 fprintf(ctx->fp, "irfpm = { [0]=");
335 for (i = 0; irfpm_names[i]; i++)
336 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
337 fprintf(ctx->fp, "},\n\n");
338
339 fprintf(ctx->fp, "irfield = { [0]=");
340 for (i = 0; irfield_names[i]; i++) {
341 char *p;
342 lower(buf, irfield_names[i]);
343 p = strchr(buf, '_');
344 if (p) *p = '.';
345 fprintf(ctx->fp, "\"%s\", ", buf);
346 }
347 fprintf(ctx->fp, "},\n\n");
348
349 fprintf(ctx->fp, "ircall = {\n[0]=");
350 for (i = 0; ircall_names[i]; i++)
351 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
352 fprintf(ctx->fp, "},\n\n");
353
354 fprintf(ctx->fp, "traceerr = {\n[0]=");
355 for (i = 0; trace_errors[i]; i++)
356 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
357 fprintf(ctx->fp, "},\n\n");
358}
359
360/* -- Argument parsing ---------------------------------------------------- */
361
362/* Build mode names. */
363static const char *const modenames[] = {
364#define BUILDNAME(name) #name,
365BUILDDEF(BUILDNAME)
366#undef BUILDNAME
367 NULL
368};
369
370/* Print usage information and exit. */
371static void usage(void)
372{
373 int i;
374 fprintf(stderr, LUAJIT_VERSION " VM builder.\n");
375 fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n");
376 fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n");
377 fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n");
378 fprintf(stderr, "Available modes:\n");
379 for (i = 0; i < BUILD__MAX; i++)
380 fprintf(stderr, " %s\n", modenames[i]);
381 exit(1);
382}
383
384/* Parse the output mode name. */
385static BuildMode parsemode(const char *mode)
386{
387 int i;
388 for (i = 0; modenames[i]; i++)
389 if (!strcmp(mode, modenames[i]))
390 return (BuildMode)i;
391 usage();
392 return (BuildMode)-1;
393}
394
395/* Parse arguments. */
396static void parseargs(BuildCtx *ctx, char **argv)
397{
398 const char *a;
399 int i;
400 ctx->mode = (BuildMode)-1;
401 ctx->outname = "-";
402 for (i = 1; (a = argv[i]) != NULL; i++) {
403 if (a[0] != '-')
404 break;
405 switch (a[1]) {
406 case '-':
407 if (a[2]) goto err;
408 i++;
409 goto ok;
410 case '\0':
411 goto ok;
412 case 'm':
413 i++;
414 if (a[2] || argv[i] == NULL) goto err;
415 ctx->mode = parsemode(argv[i]);
416 break;
417 case 'o':
418 i++;
419 if (a[2] || argv[i] == NULL) goto err;
420 ctx->outname = argv[i];
421 break;
422 default: err:
423 usage();
424 break;
425 }
426 }
427ok:
428 ctx->args = argv+i;
429 if (ctx->mode == (BuildMode)-1) goto err;
430}
431
432int main(int argc, char **argv)
433{
434 BuildCtx ctx_;
435 BuildCtx *ctx = &ctx_;
436 int status, binmode;
437
438 if (sizeof(void *) != 4*LJ_32+8*LJ_64) {
439 fprintf(stderr,"Error: pointer size mismatch in cross-build.\n");
440 fprintf(stderr,"Try: make HOST_CC=\"gcc -m32\" CROSS=...\n\n");
441 return 1;
442 }
443
444 UNUSED(argc);
445 parseargs(ctx, argv);
446
447 if ((status = build_code(ctx))) {
448 fprintf(stderr,"Error: DASM error %08x\n", status);
449 return 1;
450 }
451
452 switch (ctx->mode) {
453 case BUILD_peobj:
454 case BUILD_raw:
455 binmode = 1;
456 break;
457 default:
458 binmode = 0;
459 break;
460 }
461
462 if (ctx->outname[0] == '-' && ctx->outname[1] == '\0') {
463 ctx->fp = stdout;
464#if defined(_WIN32)
465 if (binmode)
466 _setmode(_fileno(stdout), _O_BINARY); /* Yuck. */
467#endif
468 } else if (!(ctx->fp = fopen(ctx->outname, binmode ? "wb" : "w"))) {
469 fprintf(stderr, "Error: cannot open output file '%s': %s\n",
470 ctx->outname, strerror(errno));
471 exit(1);
472 }
473
474 switch (ctx->mode) {
475 case BUILD_elfasm:
476 case BUILD_coffasm:
477 case BUILD_machasm:
478 emit_asm(ctx);
479 emit_asm_debug(ctx);
480 break;
481 case BUILD_peobj:
482 emit_peobj(ctx);
483 break;
484 case BUILD_raw:
485 emit_raw(ctx);
486 break;
487 case BUILD_bcdef:
488 emit_bcdef(ctx);
489 emit_lib(ctx);
490 break;
491 case BUILD_vmdef:
492 emit_vmdef(ctx);
493 emit_lib(ctx);
494 fprintf(ctx->fp, "}\n\n");
495 break;
496 case BUILD_ffdef:
497 case BUILD_libdef:
498 case BUILD_recdef:
499 emit_lib(ctx);
500 break;
501 case BUILD_folddef:
502 emit_fold(ctx);
503 break;
504 default:
505 break;
506 }
507
508 fflush(ctx->fp);
509 if (ferror(ctx->fp)) {
510 fprintf(stderr, "Error: cannot write to output file: %s\n",
511 strerror(errno));
512 exit(1);
513 }
514 fclose(ctx->fp);
515
516 return 0;
517}
518
519