1/*
2 * i386 specific functions for TCC assembler
3 *
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#define USING_GLOBALS
23#include "tcc.h"
24
25#define MAX_OPERANDS 3
26
27#define TOK_ASM_first TOK_ASM_clc
28#define TOK_ASM_last TOK_ASM_emms
29#define TOK_ASM_alllast TOK_ASM_subps
30
31#define OPC_B 0x01 /* only used with OPC_WL */
32#define OPC_WL 0x02 /* accepts w, l or no suffix */
33#define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
34#define OPC_REG 0x04 /* register is added to opcode */
35#define OPC_MODRM 0x08 /* modrm encoding */
36
37#define OPCT_MASK 0x70
38#define OPC_FWAIT 0x10 /* add fwait opcode */
39#define OPC_SHIFT 0x20 /* shift opcodes */
40#define OPC_ARITH 0x30 /* arithmetic opcodes */
41#define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
42#define OPC_TEST 0x50 /* test opcodes */
43#define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
44
45#define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
46#define OPC_48 0x200 /* Always has REX prefix */
47#ifdef TCC_TARGET_X86_64
48# define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
49# define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
50# define OPC_WLX OPC_WLQ
51# define OPC_BWLX OPC_BWLQ
52#else
53# define OPC_WLX OPC_WL
54# define OPC_BWLX OPC_BWL
55#endif
56
57#define OPC_GROUP_SHIFT 13
58
59/* in order to compress the operand type, we use specific operands and
60 we or only with EA */
61enum {
62 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
63 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
64 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
65#ifdef TCC_TARGET_X86_64
66 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
67#endif
68 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
70 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
72 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
73 OPT_SEG,
74 OPT_ST,
75#ifdef TCC_TARGET_X86_64
76 OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
77 with REX prefix, not used in insn templates */
78#endif
79 OPT_IM8,
80 OPT_IM8S,
81 OPT_IM16,
82 OPT_IM32,
83#ifdef TCC_TARGET_X86_64
84 OPT_IM64,
85#endif
86 OPT_EAX, /* %al, %ax, %eax or %rax register */
87 OPT_ST0, /* %st(0) register */
88 OPT_CL, /* %cl register */
89 OPT_DX, /* %dx register */
90 OPT_ADDR, /* OP_EA with only offset */
91 OPT_INDIR, /* *(expr) */
92 /* composite types */
93 OPT_COMPOSITE_FIRST,
94 OPT_IM, /* IM8 | IM16 | IM32 */
95 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
96 OPT_REGW, /* REG16 | REG32 | REG64 */
97 OPT_IMW, /* IM16 | IM32 */
98 OPT_MMXSSE, /* MMX | SSE */
99 OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
100 OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */
101 /* can be ored with any OPT_xxx */
102 OPT_EA = 0x80
103};
104
105#define OP_REG8 (1 << OPT_REG8)
106#define OP_REG16 (1 << OPT_REG16)
107#define OP_REG32 (1 << OPT_REG32)
108#define OP_MMX (1 << OPT_MMX)
109#define OP_SSE (1 << OPT_SSE)
110#define OP_CR (1 << OPT_CR)
111#define OP_TR (1 << OPT_TR)
112#define OP_DB (1 << OPT_DB)
113#define OP_SEG (1 << OPT_SEG)
114#define OP_ST (1 << OPT_ST)
115#define OP_IM8 (1 << OPT_IM8)
116#define OP_IM8S (1 << OPT_IM8S)
117#define OP_IM16 (1 << OPT_IM16)
118#define OP_IM32 (1 << OPT_IM32)
119#define OP_EAX (1 << OPT_EAX)
120#define OP_ST0 (1 << OPT_ST0)
121#define OP_CL (1 << OPT_CL)
122#define OP_DX (1 << OPT_DX)
123#define OP_ADDR (1 << OPT_ADDR)
124#define OP_INDIR (1 << OPT_INDIR)
125#ifdef TCC_TARGET_X86_64
126# define OP_REG64 (1 << OPT_REG64)
127# define OP_REG8_LOW (1 << OPT_REG8_LOW)
128# define OP_IM64 (1 << OPT_IM64)
129# define OP_EA32 (OP_EA << 1)
130#else
131# define OP_REG64 0
132# define OP_REG8_LOW 0
133# define OP_IM64 0
134# define OP_EA32 0
135#endif
136
137#define OP_EA 0x40000000
138#define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
139
140#ifdef TCC_TARGET_X86_64
141# define TREG_XAX TREG_RAX
142# define TREG_XCX TREG_RCX
143# define TREG_XDX TREG_RDX
144#else
145# define TREG_XAX TREG_EAX
146# define TREG_XCX TREG_ECX
147# define TREG_XDX TREG_EDX
148#endif
149
150typedef struct ASMInstr {
151 uint16_t sym;
152 uint16_t opcode;
153 uint16_t instr_type;
154 uint8_t nb_ops;
155 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
156} ASMInstr;
157
158typedef struct Operand {
159 uint32_t type;
160 int8_t reg; /* register, -1 if none */
161 int8_t reg2; /* second register, -1 if none */
162 uint8_t shift;
163 ExprValue e;
164} Operand;
165
166static const uint8_t reg_to_size[9] = {
167/*
168 [OP_REG8] = 0,
169 [OP_REG16] = 1,
170 [OP_REG32] = 2,
171#ifdef TCC_TARGET_X86_64
172 [OP_REG64] = 3,
173#endif
174*/
175 0, 0, 1, 0, 2, 0, 0, 0, 3
176};
177
178#define NB_TEST_OPCODES 30
179
180static const uint8_t test_bits[NB_TEST_OPCODES] = {
181 0x00, /* o */
182 0x01, /* no */
183 0x02, /* b */
184 0x02, /* c */
185 0x02, /* nae */
186 0x03, /* nb */
187 0x03, /* nc */
188 0x03, /* ae */
189 0x04, /* e */
190 0x04, /* z */
191 0x05, /* ne */
192 0x05, /* nz */
193 0x06, /* be */
194 0x06, /* na */
195 0x07, /* nbe */
196 0x07, /* a */
197 0x08, /* s */
198 0x09, /* ns */
199 0x0a, /* p */
200 0x0a, /* pe */
201 0x0b, /* np */
202 0x0b, /* po */
203 0x0c, /* l */
204 0x0c, /* nge */
205 0x0d, /* nl */
206 0x0d, /* ge */
207 0x0e, /* le */
208 0x0e, /* ng */
209 0x0f, /* nle */
210 0x0f, /* g */
211};
212
213static const uint8_t segment_prefixes[] = {
214 0x26, /* es */
215 0x2e, /* cs */
216 0x36, /* ss */
217 0x3e, /* ds */
218 0x64, /* fs */
219 0x65 /* gs */
220};
221
222static const ASMInstr asm_instrs[] = {
223#define ALT(x) x
224/* This removes a 0x0f in the second byte */
225#define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o)))
226/* This constructs instr_type from opcode, type and group. */
227#define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
228#define DEF_ASM_OP0(name, opcode)
229#define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } },
230#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
231#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
232#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
233#ifdef TCC_TARGET_X86_64
234# include "x86_64-asm.h"
235#else
236# include "i386-asm.h"
237#endif
238 /* last operation */
239 { 0, },
240};
241
242static const uint16_t op0_codes[] = {
243#define ALT(x)
244#define DEF_ASM_OP0(x, opcode) opcode,
245#define DEF_ASM_OP0L(name, opcode, group, instr_type)
246#define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
247#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
248#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
249#ifdef TCC_TARGET_X86_64
250# include "x86_64-asm.h"
251#else
252# include "i386-asm.h"
253#endif
254};
255
256static inline int get_reg_shift(TCCState *s1)
257{
258 int shift, v;
259 v = asm_int_expr(s1);
260 switch(v) {
261 case 1:
262 shift = 0;
263 break;
264 case 2:
265 shift = 1;
266 break;
267 case 4:
268 shift = 2;
269 break;
270 case 8:
271 shift = 3;
272 break;
273 default:
274 expect("1, 2, 4 or 8 constant");
275 shift = 0;
276 break;
277 }
278 return shift;
279}
280
281#ifdef TCC_TARGET_X86_64
282static int asm_parse_numeric_reg(int t, unsigned int *type)
283{
284 int reg = -1;
285 if (t >= TOK_IDENT && t < tok_ident) {
286 const char *s = table_ident[t - TOK_IDENT]->str;
287 char c;
288 *type = OP_REG64;
289 if (*s == 'c') {
290 s++;
291 *type = OP_CR;
292 }
293 if (*s++ != 'r')
294 return -1;
295 /* Don't allow leading '0'. */
296 if ((c = *s++) >= '1' && c <= '9')
297 reg = c - '0';
298 else
299 return -1;
300 if ((c = *s) >= '0' && c <= '5')
301 s++, reg = reg * 10 + c - '0';
302 if (reg > 15)
303 return -1;
304 if ((c = *s) == 0)
305 ;
306 else if (*type != OP_REG64)
307 return -1;
308 else if (c == 'b' && !s[1])
309 *type = OP_REG8;
310 else if (c == 'w' && !s[1])
311 *type = OP_REG16;
312 else if (c == 'd' && !s[1])
313 *type = OP_REG32;
314 else
315 return -1;
316 }
317 return reg;
318}
319#endif
320
321static int asm_parse_reg(unsigned int *type)
322{
323 int reg = 0;
324 *type = 0;
325 if (tok != '%')
326 goto error_32;
327 next();
328 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
329 reg = tok - TOK_ASM_eax;
330 *type = OP_REG32;
331#ifdef TCC_TARGET_X86_64
332 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
333 reg = tok - TOK_ASM_rax;
334 *type = OP_REG64;
335 } else if (tok == TOK_ASM_rip) {
336 reg = -2; /* Probably should use different escape code. */
337 *type = OP_REG64;
338 } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0
339 && (*type == OP_REG32 || *type == OP_REG64)) {
340 ;
341#endif
342 } else {
343 error_32:
344 expect("register");
345 }
346 next();
347 return reg;
348}
349
350static void parse_operand(TCCState *s1, Operand *op)
351{
352 ExprValue e;
353 int reg, indir;
354 const char *p;
355
356 indir = 0;
357 if (tok == '*') {
358 next();
359 indir = OP_INDIR;
360 }
361
362 if (tok == '%') {
363 next();
364 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
365 reg = tok - TOK_ASM_al;
366 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
367 op->reg = reg & 7;
368 if ((op->type & OP_REG) && op->reg == TREG_XAX)
369 op->type |= OP_EAX;
370 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
371 op->type |= OP_CL;
372 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
373 op->type |= OP_DX;
374 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
375 op->type = OP_DB;
376 op->reg = tok - TOK_ASM_dr0;
377 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
378 op->type = OP_SEG;
379 op->reg = tok - TOK_ASM_es;
380 } else if (tok == TOK_ASM_st) {
381 op->type = OP_ST;
382 op->reg = 0;
383 next();
384 if (tok == '(') {
385 next();
386 if (tok != TOK_PPNUM)
387 goto reg_error;
388 p = tokc.str.data;
389 reg = p[0] - '0';
390 if ((unsigned)reg >= 8 || p[1] != '\0')
391 goto reg_error;
392 op->reg = reg;
393 next();
394 skip(')');
395 }
396 if (op->reg == 0)
397 op->type |= OP_ST0;
398 goto no_skip;
399#ifdef TCC_TARGET_X86_64
400 } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) {
401 op->type = OP_REG8 | OP_REG8_LOW;
402 op->reg = 4 + tok - TOK_ASM_spl;
403 } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) {
404 ;
405#endif
406 } else {
407 reg_error:
408 tcc_error("unknown register %%%s", get_tok_str(tok, &tokc));
409 }
410 next();
411 no_skip: ;
412 } else if (tok == '$') {
413 /* constant value */
414 next();
415 asm_expr(s1, &e);
416 op->type = OP_IM32;
417 op->e = e;
418 if (!op->e.sym) {
419 if (op->e.v == (uint8_t)op->e.v)
420 op->type |= OP_IM8;
421 if (op->e.v == (int8_t)op->e.v)
422 op->type |= OP_IM8S;
423 if (op->e.v == (uint16_t)op->e.v)
424 op->type |= OP_IM16;
425#ifdef TCC_TARGET_X86_64
426 if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v)
427 op->type = OP_IM64;
428#endif
429 }
430 } else {
431 /* address(reg,reg2,shift) with all variants */
432 op->type = OP_EA;
433 op->reg = -1;
434 op->reg2 = -1;
435 op->shift = 0;
436 if (tok != '(') {
437 asm_expr(s1, &e);
438 op->e = e;
439 } else {
440 next();
441 if (tok == '%') {
442 unget_tok('(');
443 op->e.v = 0;
444 op->e.sym = NULL;
445 } else {
446 /* bracketed offset expression */
447 asm_expr(s1, &e);
448 if (tok != ')')
449 expect(")");
450 next();
451 op->e.v = e.v;
452 op->e.sym = e.sym;
453 }
454 op->e.pcrel = 0;
455 }
456 if (tok == '(') {
457 unsigned int type = 0;
458 next();
459 if (tok != ',') {
460 op->reg = asm_parse_reg(&type);
461 }
462 if (tok == ',') {
463 next();
464 if (tok != ',') {
465 op->reg2 = asm_parse_reg(&type);
466 }
467 if (tok == ',') {
468 next();
469 op->shift = get_reg_shift(s1);
470 }
471 }
472 if (type & OP_REG32)
473 op->type |= OP_EA32;
474 skip(')');
475 }
476 if (op->reg == -1 && op->reg2 == -1)
477 op->type |= OP_ADDR;
478 }
479 op->type |= indir;
480}
481
482/* XXX: unify with C code output ? */
483ST_FUNC void gen_expr32(ExprValue *pe)
484{
485 if (pe->pcrel)
486 /* If PC-relative, always set VT_SYM, even without symbol,
487 so as to force a relocation to be emitted. */
488 gen_addrpc32(VT_SYM, pe->sym, pe->v);
489 else
490 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
491}
492
493#ifdef TCC_TARGET_X86_64
494ST_FUNC void gen_expr64(ExprValue *pe)
495{
496 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
497}
498#endif
499
500/* XXX: unify with C code output ? */
501static void gen_disp32(ExprValue *pe)
502{
503 Sym *sym = pe->sym;
504 ElfSym *esym = elfsym(sym);
505 if (esym && esym->st_shndx == cur_text_section->sh_num) {
506 /* same section: we can output an absolute value. Note
507 that the TCC compiler behaves differently here because
508 it always outputs a relocation to ease (future) code
509 elimination in the linker */
510 gen_le32(pe->v + esym->st_value - ind - 4);
511 } else {
512 if (sym && sym->type.t == VT_VOID) {
513 sym->type.t = VT_FUNC;
514 sym->type.ref = NULL;
515 }
516 gen_addrpc32(VT_SYM, sym, pe->v);
517 }
518}
519
520/* generate the modrm operand */
521static inline int asm_modrm(int reg, Operand *op)
522{
523 int mod, reg1, reg2, sib_reg1;
524
525 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
526 g(0xc0 + (reg << 3) + op->reg);
527 } else if (op->reg == -1 && op->reg2 == -1) {
528 /* displacement only */
529#ifdef TCC_TARGET_X86_64
530 g(0x04 + (reg << 3));
531 g(0x25);
532#else
533 g(0x05 + (reg << 3));
534#endif
535 gen_expr32(&op->e);
536#ifdef TCC_TARGET_X86_64
537 } else if (op->reg == -2) {
538 ExprValue *pe = &op->e;
539 g(0x05 + (reg << 3));
540 gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
541 return ind;
542#endif
543 } else {
544 sib_reg1 = op->reg;
545 /* fist compute displacement encoding */
546 if (sib_reg1 == -1) {
547 sib_reg1 = 5;
548 mod = 0x00;
549 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
550 mod = 0x00;
551 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
552 mod = 0x40;
553 } else {
554 mod = 0x80;
555 }
556 /* compute if sib byte needed */
557 reg1 = op->reg;
558 if (op->reg2 != -1)
559 reg1 = 4;
560 g(mod + (reg << 3) + reg1);
561 if (reg1 == 4) {
562 /* add sib byte */
563 reg2 = op->reg2;
564 if (reg2 == -1)
565 reg2 = 4; /* indicate no index */
566 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
567 }
568 /* add offset */
569 if (mod == 0x40) {
570 g(op->e.v);
571 } else if (mod == 0x80 || op->reg == -1) {
572 gen_expr32(&op->e);
573 }
574 }
575 return 0;
576}
577
578#ifdef TCC_TARGET_X86_64
579#define REX_W 0x48
580#define REX_R 0x44
581#define REX_X 0x42
582#define REX_B 0x41
583
584static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type,
585 int regi, int rmi)
586{
587 unsigned char rex = width64 ? 0x48 : 0;
588 int saw_high_8bit = 0;
589 int i;
590 if (rmi == -1) {
591 /* No mod/rm byte, but we might have a register op nevertheless
592 (we will add it to the opcode later). */
593 for(i = 0; i < nb_ops; i++) {
594 if (op_type[i] & (OP_REG | OP_ST)) {
595 if (ops[i].reg >= 8) {
596 rex |= REX_B;
597 ops[i].reg -= 8;
598 } else if (ops[i].type & OP_REG8_LOW)
599 rex |= 0x40;
600 else if (ops[i].type & OP_REG8 && ops[i].reg >= 4)
601 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
602 saw_high_8bit = ops[i].reg;
603 break;
604 }
605 }
606 } else {
607 if (regi != -1) {
608 if (ops[regi].reg >= 8) {
609 rex |= REX_R;
610 ops[regi].reg -= 8;
611 } else if (ops[regi].type & OP_REG8_LOW)
612 rex |= 0x40;
613 else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4)
614 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
615 saw_high_8bit = ops[regi].reg;
616 }
617 if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) {
618 if (ops[rmi].reg >= 8) {
619 rex |= REX_B;
620 ops[rmi].reg -= 8;
621 } else if (ops[rmi].type & OP_REG8_LOW)
622 rex |= 0x40;
623 else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4)
624 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
625 saw_high_8bit = ops[rmi].reg;
626 }
627 if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) {
628 rex |= REX_X;
629 ops[rmi].reg2 -= 8;
630 }
631 }
632 if (rex) {
633 if (saw_high_8bit)
634 tcc_error("can't encode register %%%ch when REX prefix is required",
635 "acdb"[saw_high_8bit-4]);
636 g(rex);
637 }
638}
639#endif
640
641static void maybe_print_stats (void)
642{
643 static int already = 1;
644 if (!already)
645 /* print stats about opcodes */
646 {
647 const struct ASMInstr *pa;
648 int freq[4];
649 int op_vals[500];
650 int nb_op_vals, i, j;
651
652 already = 1;
653 nb_op_vals = 0;
654 memset(freq, 0, sizeof(freq));
655 for(pa = asm_instrs; pa->sym != 0; pa++) {
656 freq[pa->nb_ops]++;
657 //for(i=0;i<pa->nb_ops;i++) {
658 for(j=0;j<nb_op_vals;j++) {
659 //if (pa->op_type[i] == op_vals[j])
660 if (pa->instr_type == op_vals[j])
661 goto found;
662 }
663 //op_vals[nb_op_vals++] = pa->op_type[i];
664 op_vals[nb_op_vals++] = pa->instr_type;
665 found: ;
666 //}
667 }
668 for(i=0;i<nb_op_vals;i++) {
669 int v = op_vals[i];
670 //if ((v & (v - 1)) != 0)
671 printf("%3d: %08x\n", i, v);
672 }
673 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
674 (int)sizeof(asm_instrs),
675 (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
676 freq[0], freq[1], freq[2], freq[3]);
677 }
678}
679
680ST_FUNC void asm_opcode(TCCState *s1, int opcode)
681{
682 const ASMInstr *pa;
683 int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc;
684 int nb_ops, s;
685 Operand ops[MAX_OPERANDS], *pop;
686 int op_type[3]; /* decoded op type */
687 int alltypes; /* OR of all operand types */
688 int autosize;
689 int p66;
690#ifdef TCC_TARGET_X86_64
691 int rex64;
692#endif
693
694 maybe_print_stats();
695 /* force synthetic ';' after prefix instruction, so we can handle */
696 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
697 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
698 unget_tok(';');
699
700 /* get operands */
701 pop = ops;
702 nb_ops = 0;
703 seg_prefix = 0;
704 alltypes = 0;
705 for(;;) {
706 if (tok == ';' || tok == TOK_LINEFEED)
707 break;
708 if (nb_ops >= MAX_OPERANDS) {
709 tcc_error("incorrect number of operands");
710 }
711 parse_operand(s1, pop);
712 if (tok == ':') {
713 if (pop->type != OP_SEG || seg_prefix)
714 tcc_error("incorrect prefix");
715 seg_prefix = segment_prefixes[pop->reg];
716 next();
717 parse_operand(s1, pop);
718 if (!(pop->type & OP_EA)) {
719 tcc_error("segment prefix must be followed by memory reference");
720 }
721 }
722 pop++;
723 nb_ops++;
724 if (tok != ',')
725 break;
726 next();
727 }
728
729 s = 0; /* avoid warning */
730
731again:
732 /* optimize matching by using a lookup table (no hashing is needed
733 !) */
734 for(pa = asm_instrs; pa->sym != 0; pa++) {
735 int it = pa->instr_type & OPCT_MASK;
736 s = 0;
737 if (it == OPC_FARITH) {
738 v = opcode - pa->sym;
739 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
740 continue;
741 } else if (it == OPC_ARITH) {
742 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
743 continue;
744 s = (opcode - pa->sym) % NBWLX;
745 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
746 {
747 /* We need to reject the xxxb opcodes that we accepted above.
748 Note that pa->sym for WLX opcodes is the 'w' token,
749 to get the 'b' token subtract one. */
750 if (((opcode - pa->sym + 1) % NBWLX) == 0)
751 continue;
752 s++;
753 }
754 } else if (it == OPC_SHIFT) {
755 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
756 continue;
757 s = (opcode - pa->sym) % NBWLX;
758 } else if (it == OPC_TEST) {
759 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
760 continue;
761 /* cmovxx is a test opcode but accepts multiple sizes.
762 The suffixes aren't encoded in the table, instead we
763 simply force size autodetection always and deal with suffixed
764 variants below when we don't find e.g. "cmovzl". */
765 if (pa->instr_type & OPC_WLX)
766 s = NBWLX - 1;
767 } else if (pa->instr_type & OPC_B) {
768#ifdef TCC_TARGET_X86_64
769 /* Some instructions don't have the full size but only
770 bwl form. insb e.g. */
771 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
772 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
773 continue;
774#endif
775 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
776 continue;
777 s = opcode - pa->sym;
778 } else if (pa->instr_type & OPC_WLX) {
779 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
780 continue;
781 s = opcode - pa->sym + 1;
782 } else {
783 if (pa->sym != opcode)
784 continue;
785 }
786 if (pa->nb_ops != nb_ops)
787 continue;
788#ifdef TCC_TARGET_X86_64
789 /* Special case for moves. Selecting the IM64->REG64 form
790 should only be done if we really have an >32bit imm64, and that
791 is hardcoded. Ignore it here. */
792 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
793 && (ops[1].type & OP_REG) == OP_REG64
794 && !(pa->instr_type & OPC_0F))
795 continue;
796#endif
797 /* now decode and check each operand */
798 alltypes = 0;
799 for(i = 0; i < nb_ops; i++) {
800 int op1, op2;
801 op1 = pa->op_type[i];
802 op2 = op1 & 0x1f;
803 switch(op2) {
804 case OPT_IM:
805 v = OP_IM8 | OP_IM16 | OP_IM32;
806 break;
807 case OPT_REG:
808 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
809 break;
810 case OPT_REGW:
811 v = OP_REG16 | OP_REG32 | OP_REG64;
812 break;
813 case OPT_IMW:
814 v = OP_IM16 | OP_IM32;
815 break;
816 case OPT_MMXSSE:
817 v = OP_MMX | OP_SSE;
818 break;
819 case OPT_DISP:
820 case OPT_DISP8:
821 v = OP_ADDR;
822 break;
823 default:
824 v = 1 << op2;
825 break;
826 }
827 if (op1 & OPT_EA)
828 v |= OP_EA;
829 op_type[i] = v;
830 if ((ops[i].type & v) == 0)
831 goto next;
832 alltypes |= ops[i].type;
833 }
834 /* all is matching ! */
835 break;
836 next: ;
837 }
838 if (pa->sym == 0) {
839 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
840 int b;
841 b = op0_codes[opcode - TOK_ASM_first];
842 if (b & 0xff00)
843 g(b >> 8);
844 g(b);
845 return;
846 } else if (opcode <= TOK_ASM_alllast) {
847 tcc_error("bad operand with opcode '%s'",
848 get_tok_str(opcode, NULL));
849 } else {
850 /* Special case for cmovcc, we accept size suffixes but ignore
851 them, but we don't want them to blow up our tables. */
852 TokenSym *ts = table_ident[opcode - TOK_IDENT];
853 if (ts->len >= 6
854 && strchr("wlq", ts->str[ts->len-1])
855 && !memcmp(ts->str, "cmov", 4)) {
856 opcode = tok_alloc(ts->str, ts->len-1)->tok;
857 goto again;
858 }
859 tcc_error("unknown opcode '%s'", ts->str);
860 }
861 }
862 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
863 autosize = NBWLX-1;
864#ifdef TCC_TARGET_X86_64
865 /* XXX the autosize should rather be zero, to not have to adjust this
866 all the time. */
867 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
868 autosize = NBWLX-2;
869#endif
870 if (s == autosize) {
871 /* Check for register operands providing hints about the size.
872 Start from the end, i.e. destination operands. This matters
873 only for opcodes accepting different sized registers, lar and lsl
874 are such opcodes. */
875 for(i = nb_ops - 1; s == autosize && i >= 0; i--) {
876 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
877 s = reg_to_size[ops[i].type & OP_REG];
878 }
879 if (s == autosize) {
880 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
881 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
882 s = 2;
883 else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
884 (ops[0].type & OP_EA))
885 s = NBWLX - 2;
886 else
887 tcc_error("cannot infer opcode suffix");
888 }
889 }
890
891#ifdef TCC_TARGET_X86_64
892 /* Generate addr32 prefix if needed */
893 for(i = 0; i < nb_ops; i++) {
894 if (ops[i].type & OP_EA32) {
895 g(0x67);
896 break;
897 }
898 }
899#endif
900 /* generate data16 prefix if needed */
901 p66 = 0;
902 if (s == 1)
903 p66 = 1;
904 else {
905 /* accepting mmx+sse in all operands --> needs 0x66 to
906 switch to sse mode. Accepting only sse in an operand --> is
907 already SSE insn and needs 0x66/f2/f3 handling. */
908 for (i = 0; i < nb_ops; i++)
909 if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
910 && ops[i].type & OP_SSE)
911 p66 = 1;
912 }
913 if (p66)
914 g(0x66);
915#ifdef TCC_TARGET_X86_64
916 rex64 = 0;
917 if (pa->instr_type & OPC_48)
918 rex64 = 1;
919 else if (s == 3 || (alltypes & OP_REG64)) {
920 /* generate REX prefix */
921 int default64 = 0;
922 for(i = 0; i < nb_ops; i++) {
923 if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) {
924 /* If only 64bit regs are accepted in one operand
925 this is a default64 instruction without need for
926 REX prefixes, except for movabs(0xb8). */
927 default64 = 1;
928 break;
929 }
930 }
931 /* XXX find better encoding for the default64 instructions. */
932 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
933 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
934 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
935 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
936 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
937 && !default64)
938 rex64 = 1;
939 }
940#endif
941
942 /* now generates the operation */
943 if (OPCT_IS(pa->instr_type, OPC_FWAIT))
944 g(0x9b);
945 if (seg_prefix)
946 g(seg_prefix);
947
948 v = pa->opcode;
949 if (pa->instr_type & OPC_0F)
950 v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
951 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
952 /* kludge for imul $im, %reg */
953 nb_ops = 3;
954 ops[2] = ops[1];
955 op_type[2] = op_type[1];
956 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
957 v--; /* int $3 case */
958 nb_ops = 0;
959 } else if ((v == 0x06 || v == 0x07)) {
960 if (ops[0].reg >= 4) {
961 /* push/pop %fs or %gs */
962 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
963 } else {
964 v += ops[0].reg << 3;
965 }
966 nb_ops = 0;
967 } else if (v <= 0x05) {
968 /* arith case */
969 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
970 } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
971 /* fpu arith case */
972 v += ((opcode - pa->sym) / 6) << 3;
973 }
974
975 /* search which operand will be used for modrm */
976 modrm_index = -1;
977 modreg_index = -1;
978 if (pa->instr_type & OPC_MODRM) {
979 if (!nb_ops) {
980 /* A modrm opcode without operands is a special case (e.g. mfence).
981 It has a group and acts as if there's an register operand 0
982 (ax). */
983 i = 0;
984 ops[i].type = OP_REG;
985 ops[i].reg = 0;
986 goto modrm_found;
987 }
988 /* first look for an ea operand */
989 for(i = 0;i < nb_ops; i++) {
990 if (op_type[i] & OP_EA)
991 goto modrm_found;
992 }
993 /* then if not found, a register or indirection (shift instructions) */
994 for(i = 0;i < nb_ops; i++) {
995 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
996 goto modrm_found;
997 }
998#ifdef ASM_DEBUG
999 tcc_error("bad op table");
1000#endif
1001 modrm_found:
1002 modrm_index = i;
1003 /* if a register is used in another operand then it is
1004 used instead of group */
1005 for(i = 0;i < nb_ops; i++) {
1006 int t = op_type[i];
1007 if (i != modrm_index &&
1008 (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
1009 modreg_index = i;
1010 break;
1011 }
1012 }
1013 }
1014#ifdef TCC_TARGET_X86_64
1015 asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index);
1016#endif
1017
1018 if (pa->instr_type & OPC_REG) {
1019 /* mov $im, %reg case */
1020 if (v == 0xb0 && s >= 1)
1021 v += 7;
1022 for(i = 0; i < nb_ops; i++) {
1023 if (op_type[i] & (OP_REG | OP_ST)) {
1024 v += ops[i].reg;
1025 break;
1026 }
1027 }
1028 }
1029 if (pa->instr_type & OPC_B)
1030 v += s >= 1;
1031 if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
1032 ElfSym *esym;
1033 int jmp_disp;
1034
1035 /* see if we can really generate the jump with a byte offset */
1036 esym = elfsym(ops[0].e.sym);
1037 if (!esym || esym->st_shndx != cur_text_section->sh_num)
1038 goto no_short_jump;
1039 jmp_disp = ops[0].e.v + esym->st_value - ind - 2 - (v >= 0xff);
1040 if (jmp_disp == (int8_t)jmp_disp) {
1041 /* OK to generate jump */
1042 ops[0].e.sym = 0;
1043 ops[0].e.v = jmp_disp;
1044 op_type[0] = OP_IM8S;
1045 } else {
1046 no_short_jump:
1047 /* long jump will be allowed. need to modify the
1048 opcode slightly */
1049 if (v == 0xeb) /* jmp */
1050 v = 0xe9;
1051 else if (v == 0x70) /* jcc */
1052 v += 0x0f10;
1053 else
1054 tcc_error("invalid displacement");
1055 }
1056 }
1057 if (OPCT_IS(pa->instr_type, OPC_TEST))
1058 v += test_bits[opcode - pa->sym];
1059 op1 = v >> 16;
1060 if (op1)
1061 g(op1);
1062 op1 = (v >> 8) & 0xff;
1063 if (op1)
1064 g(op1);
1065 g(v);
1066
1067 if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
1068 reg = (opcode - pa->sym) / NBWLX;
1069 if (reg == 6)
1070 reg = 7;
1071 } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
1072 reg = (opcode - pa->sym) / NBWLX;
1073 } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
1074 reg = (opcode - pa->sym) / 6;
1075 } else {
1076 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
1077 }
1078
1079 pc = 0;
1080 if (pa->instr_type & OPC_MODRM) {
1081 /* if a register is used in another operand then it is
1082 used instead of group */
1083 if (modreg_index >= 0)
1084 reg = ops[modreg_index].reg;
1085 pc = asm_modrm(reg, &ops[modrm_index]);
1086 }
1087
1088 /* emit constants */
1089#ifndef TCC_TARGET_X86_64
1090 if (!(pa->instr_type & OPC_0F)
1091 && (pa->opcode == 0x9a || pa->opcode == 0xea)) {
1092 /* ljmp or lcall kludge */
1093 gen_expr32(&ops[1].e);
1094 if (ops[0].e.sym)
1095 tcc_error("cannot relocate");
1096 gen_le16(ops[0].e.v);
1097 return;
1098 }
1099#endif
1100 for(i = 0;i < nb_ops; i++) {
1101 v = op_type[i];
1102 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
1103 /* if multiple sizes are given it means we must look
1104 at the op size */
1105 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
1106 if (s == 0)
1107 v = OP_IM8;
1108 else if (s == 1)
1109 v = OP_IM16;
1110 else if (s == 2 || (v & OP_IM64) == 0)
1111 v = OP_IM32;
1112 else
1113 v = OP_IM64;
1114 }
1115
1116 if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
1117 tcc_error("cannot relocate");
1118
1119 if (v & (OP_IM8 | OP_IM8S)) {
1120 g(ops[i].e.v);
1121 } else if (v & OP_IM16) {
1122 gen_le16(ops[i].e.v);
1123#ifdef TCC_TARGET_X86_64
1124 } else if (v & OP_IM64) {
1125 gen_expr64(&ops[i].e);
1126#endif
1127 } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
1128 gen_disp32(&ops[i].e);
1129 } else {
1130 gen_expr32(&ops[i].e);
1131 }
1132 }
1133 }
1134
1135 /* after immediate operands, adjust pc-relative address */
1136 if (pc)
1137 add32le(cur_text_section->data + pc - 4, pc - ind);
1138}
1139
1140/* return the constraint priority (we allocate first the lowest
1141 numbered constraints) */
1142static inline int constraint_priority(const char *str)
1143{
1144 int priority, c, pr;
1145
1146 /* we take the lowest priority */
1147 priority = 0;
1148 for(;;) {
1149 c = *str;
1150 if (c == '\0')
1151 break;
1152 str++;
1153 switch(c) {
1154 case 'A':
1155 pr = 0;
1156 break;
1157 case 'a':
1158 case 'b':
1159 case 'c':
1160 case 'd':
1161 case 'S':
1162 case 'D':
1163 pr = 1;
1164 break;
1165 case 'q':
1166 pr = 2;
1167 break;
1168 case 'r':
1169 case 'R':
1170 case 'p':
1171 pr = 3;
1172 break;
1173 case 'N':
1174 case 'M':
1175 case 'I':
1176 case 'e':
1177 case 'i':
1178 case 'm':
1179 case 'g':
1180 pr = 4;
1181 break;
1182 default:
1183 tcc_error("unknown constraint '%c'", c);
1184 pr = 0;
1185 }
1186 if (pr > priority)
1187 priority = pr;
1188 }
1189 return priority;
1190}
1191
1192static const char *skip_constraint_modifiers(const char *p)
1193{
1194 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1195 p++;
1196 return p;
1197}
1198
1199/* If T (a token) is of the form "%reg" returns the register
1200 number and type, otherwise return -1. */
1201ST_FUNC int asm_parse_regvar (int t)
1202{
1203 const char *s;
1204 Operand op;
1205 if (t < TOK_IDENT || (t & SYM_FIELD))
1206 return -1;
1207 s = table_ident[t - TOK_IDENT]->str;
1208 if (s[0] != '%')
1209 return -1;
1210 t = tok_alloc(s+1, strlen(s)-1)->tok;
1211 unget_tok(t);
1212 unget_tok('%');
1213 parse_operand(tcc_state, &op);
1214 /* Accept only integer regs for now. */
1215 if (op.type & OP_REG)
1216 return op.reg;
1217 else
1218 return -1;
1219}
1220
1221#define REG_OUT_MASK 0x01
1222#define REG_IN_MASK 0x02
1223
1224#define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1225
1226ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1227 int nb_operands, int nb_outputs,
1228 const uint8_t *clobber_regs,
1229 int *pout_reg)
1230{
1231 ASMOperand *op;
1232 int sorted_op[MAX_ASM_OPERANDS];
1233 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1234 const char *str;
1235 uint8_t regs_allocated[NB_ASM_REGS];
1236
1237 /* init fields */
1238 for(i=0;i<nb_operands;i++) {
1239 op = &operands[i];
1240 op->input_index = -1;
1241 op->ref_index = -1;
1242 op->reg = -1;
1243 op->is_memory = 0;
1244 op->is_rw = 0;
1245 }
1246 /* compute constraint priority and evaluate references to output
1247 constraints if input constraints */
1248 for(i=0;i<nb_operands;i++) {
1249 op = &operands[i];
1250 str = op->constraint;
1251 str = skip_constraint_modifiers(str);
1252 if (isnum(*str) || *str == '[') {
1253 /* this is a reference to another constraint */
1254 k = find_constraint(operands, nb_operands, str, NULL);
1255 if ((unsigned)k >= i || i < nb_outputs)
1256 tcc_error("invalid reference in constraint %d ('%s')",
1257 i, str);
1258 op->ref_index = k;
1259 if (operands[k].input_index >= 0)
1260 tcc_error("cannot reference twice the same operand");
1261 operands[k].input_index = i;
1262 op->priority = 5;
1263 } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL
1264 && op->vt->sym
1265 && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) {
1266 op->priority = 1;
1267 op->reg = reg;
1268 } else {
1269 op->priority = constraint_priority(str);
1270 }
1271 }
1272
1273 /* sort operands according to their priority */
1274 for(i=0;i<nb_operands;i++)
1275 sorted_op[i] = i;
1276 for(i=0;i<nb_operands - 1;i++) {
1277 for(j=i+1;j<nb_operands;j++) {
1278 p1 = operands[sorted_op[i]].priority;
1279 p2 = operands[sorted_op[j]].priority;
1280 if (p2 < p1) {
1281 tmp = sorted_op[i];
1282 sorted_op[i] = sorted_op[j];
1283 sorted_op[j] = tmp;
1284 }
1285 }
1286 }
1287
1288 for(i = 0;i < NB_ASM_REGS; i++) {
1289 if (clobber_regs[i])
1290 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1291 else
1292 regs_allocated[i] = 0;
1293 }
1294 /* esp cannot be used */
1295 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1296 /* ebp cannot be used yet */
1297 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1298
1299 /* allocate registers and generate corresponding asm moves */
1300 for(i=0;i<nb_operands;i++) {
1301 j = sorted_op[i];
1302 op = &operands[j];
1303 str = op->constraint;
1304 /* no need to allocate references */
1305 if (op->ref_index >= 0)
1306 continue;
1307 /* select if register is used for output, input or both */
1308 if (op->input_index >= 0) {
1309 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1310 } else if (j < nb_outputs) {
1311 reg_mask = REG_OUT_MASK;
1312 } else {
1313 reg_mask = REG_IN_MASK;
1314 }
1315 if (op->reg >= 0) {
1316 if (is_reg_allocated(op->reg))
1317 tcc_error("asm regvar requests register that's taken already");
1318 reg = op->reg;
1319 goto reg_found;
1320 }
1321 try_next:
1322 c = *str++;
1323 switch(c) {
1324 case '=':
1325 goto try_next;
1326 case '+':
1327 op->is_rw = 1;
1328 /* FALL THRU */
1329 case '&':
1330 if (j >= nb_outputs)
1331 tcc_error("'%c' modifier can only be applied to outputs", c);
1332 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1333 goto try_next;
1334 case 'A':
1335 /* allocate both eax and edx */
1336 if (is_reg_allocated(TREG_XAX) ||
1337 is_reg_allocated(TREG_XDX))
1338 goto try_next;
1339 op->is_llong = 1;
1340 op->reg = TREG_XAX;
1341 regs_allocated[TREG_XAX] |= reg_mask;
1342 regs_allocated[TREG_XDX] |= reg_mask;
1343 break;
1344 case 'a':
1345 reg = TREG_XAX;
1346 goto alloc_reg;
1347 case 'b':
1348 reg = 3;
1349 goto alloc_reg;
1350 case 'c':
1351 reg = TREG_XCX;
1352 goto alloc_reg;
1353 case 'd':
1354 reg = TREG_XDX;
1355 goto alloc_reg;
1356 case 'S':
1357 reg = 6;
1358 goto alloc_reg;
1359 case 'D':
1360 reg = 7;
1361 alloc_reg:
1362 if (is_reg_allocated(reg))
1363 goto try_next;
1364 goto reg_found;
1365 case 'q':
1366 /* eax, ebx, ecx or edx */
1367 for(reg = 0; reg < 4; reg++) {
1368 if (!is_reg_allocated(reg))
1369 goto reg_found;
1370 }
1371 goto try_next;
1372 case 'r':
1373 case 'R':
1374 case 'p': /* A general address, for x86(64) any register is acceptable*/
1375 /* any general register */
1376 for(reg = 0; reg < 8; reg++) {
1377 if (!is_reg_allocated(reg))
1378 goto reg_found;
1379 }
1380 goto try_next;
1381 reg_found:
1382 /* now we can reload in the register */
1383 op->is_llong = 0;
1384 op->reg = reg;
1385 regs_allocated[reg] |= reg_mask;
1386 break;
1387 case 'e':
1388 case 'i':
1389 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1390 goto try_next;
1391 break;
1392 case 'I':
1393 case 'N':
1394 case 'M':
1395 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1396 goto try_next;
1397 break;
1398 case 'm':
1399 case 'g':
1400 /* nothing special to do because the operand is already in
1401 memory, except if the pointer itself is stored in a
1402 memory variable (VT_LLOCAL case) */
1403 /* XXX: fix constant case */
1404 /* if it is a reference to a memory zone, it must lie
1405 in a register, so we reserve the register in the
1406 input registers and a load will be generated
1407 later */
1408 if (j < nb_outputs || c == 'm') {
1409 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1410 /* any general register */
1411 for(reg = 0; reg < 8; reg++) {
1412 if (!(regs_allocated[reg] & REG_IN_MASK))
1413 goto reg_found1;
1414 }
1415 goto try_next;
1416 reg_found1:
1417 /* now we can reload in the register */
1418 regs_allocated[reg] |= REG_IN_MASK;
1419 op->reg = reg;
1420 op->is_memory = 1;
1421 }
1422 }
1423 break;
1424 default:
1425 tcc_error("asm constraint %d ('%s') could not be satisfied",
1426 j, op->constraint);
1427 break;
1428 }
1429 /* if a reference is present for that operand, we assign it too */
1430 if (op->input_index >= 0) {
1431 operands[op->input_index].reg = op->reg;
1432 operands[op->input_index].is_llong = op->is_llong;
1433 }
1434 }
1435
1436 /* compute out_reg. It is used to store outputs registers to memory
1437 locations references by pointers (VT_LLOCAL case) */
1438 *pout_reg = -1;
1439 for(i=0;i<nb_operands;i++) {
1440 op = &operands[i];
1441 if (op->reg >= 0 &&
1442 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1443 !op->is_memory) {
1444 for(reg = 0; reg < 8; reg++) {
1445 if (!(regs_allocated[reg] & REG_OUT_MASK))
1446 goto reg_found2;
1447 }
1448 tcc_error("could not find free output register for reloading");
1449 reg_found2:
1450 *pout_reg = reg;
1451 break;
1452 }
1453 }
1454
1455 /* print sorted constraints */
1456#ifdef ASM_DEBUG
1457 for(i=0;i<nb_operands;i++) {
1458 j = sorted_op[i];
1459 op = &operands[j];
1460 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1461 j,
1462 op->id ? get_tok_str(op->id, NULL) : "",
1463 op->constraint,
1464 op->vt->r,
1465 op->reg);
1466 }
1467 if (*pout_reg >= 0)
1468 printf("out_reg=%d\n", *pout_reg);
1469#endif
1470}
1471
1472ST_FUNC void subst_asm_operand(CString *add_str,
1473 SValue *sv, int modifier)
1474{
1475 int r, reg, size, val;
1476 char buf[64];
1477
1478 r = sv->r;
1479 if ((r & VT_VALMASK) == VT_CONST) {
1480 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
1481 modifier != 'P')
1482 cstr_ccat(add_str, '$');
1483 if (r & VT_SYM) {
1484 const char *name = get_tok_str(sv->sym->v, NULL);
1485 if (sv->sym->v >= SYM_FIRST_ANOM) {
1486 /* In case of anonymous symbols ("L.42", used
1487 for static data labels) we can't find them
1488 in the C symbol table when later looking up
1489 this name. So enter them now into the asm label
1490 list when we still know the symbol. */
1491 get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym);
1492 }
1493 if (tcc_state->leading_underscore)
1494 cstr_ccat(add_str, '_');
1495 cstr_cat(add_str, name, -1);
1496 if ((uint32_t)sv->c.i == 0)
1497 goto no_offset;
1498 cstr_ccat(add_str, '+');
1499 }
1500 val = sv->c.i;
1501 if (modifier == 'n')
1502 val = -val;
1503 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1504 cstr_cat(add_str, buf, -1);
1505 no_offset:;
1506#ifdef TCC_TARGET_X86_64
1507 if (r & VT_LVAL)
1508 cstr_cat(add_str, "(%rip)", -1);
1509#endif
1510 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1511#ifdef TCC_TARGET_X86_64
1512 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1513#else
1514 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1515#endif
1516 cstr_cat(add_str, buf, -1);
1517 } else if (r & VT_LVAL) {
1518 reg = r & VT_VALMASK;
1519 if (reg >= VT_CONST)
1520 tcc_internal_error("");
1521 snprintf(buf, sizeof(buf), "(%%%s)",
1522#ifdef TCC_TARGET_X86_64
1523 get_tok_str(TOK_ASM_rax + reg, NULL)
1524#else
1525 get_tok_str(TOK_ASM_eax + reg, NULL)
1526#endif
1527 );
1528 cstr_cat(add_str, buf, -1);
1529 } else {
1530 /* register case */
1531 reg = r & VT_VALMASK;
1532 if (reg >= VT_CONST)
1533 tcc_internal_error("");
1534
1535 /* choose register operand size */
1536 if ((sv->type.t & VT_BTYPE) == VT_BYTE ||
1537 (sv->type.t & VT_BTYPE) == VT_BOOL)
1538 size = 1;
1539 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1540 size = 2;
1541#ifdef TCC_TARGET_X86_64
1542 else if ((sv->type.t & VT_BTYPE) == VT_LLONG ||
1543 (sv->type.t & VT_BTYPE) == VT_PTR)
1544 size = 8;
1545#endif
1546 else
1547 size = 4;
1548 if (size == 1 && reg >= 4)
1549 size = 4;
1550
1551 if (modifier == 'b') {
1552 if (reg >= 4)
1553 tcc_error("cannot use byte register");
1554 size = 1;
1555 } else if (modifier == 'h') {
1556 if (reg >= 4)
1557 tcc_error("cannot use byte register");
1558 size = -1;
1559 } else if (modifier == 'w') {
1560 size = 2;
1561 } else if (modifier == 'k') {
1562 size = 4;
1563#ifdef TCC_TARGET_X86_64
1564 } else if (modifier == 'q') {
1565 size = 8;
1566#endif
1567 }
1568
1569 switch(size) {
1570 case -1:
1571 reg = TOK_ASM_ah + reg;
1572 break;
1573 case 1:
1574 reg = TOK_ASM_al + reg;
1575 break;
1576 case 2:
1577 reg = TOK_ASM_ax + reg;
1578 break;
1579 default:
1580 reg = TOK_ASM_eax + reg;
1581 break;
1582#ifdef TCC_TARGET_X86_64
1583 case 8:
1584 reg = TOK_ASM_rax + reg;
1585 break;
1586#endif
1587 }
1588 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1589 cstr_cat(add_str, buf, -1);
1590 }
1591}
1592
1593/* generate prolog and epilog code for asm statement */
1594ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1595 int nb_outputs, int is_output,
1596 uint8_t *clobber_regs,
1597 int out_reg)
1598{
1599 uint8_t regs_allocated[NB_ASM_REGS];
1600 ASMOperand *op;
1601 int i, reg;
1602
1603 /* Strictly speaking %Xbp and %Xsp should be included in the
1604 call-preserved registers, but currently it doesn't matter. */
1605#ifdef TCC_TARGET_X86_64
1606#ifdef TCC_TARGET_PE
1607 static uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 };
1608#else
1609 static uint8_t reg_saved[] = { 3, 12, 13, 14, 15 };
1610#endif
1611#else
1612 static uint8_t reg_saved[] = { 3, 6, 7 };
1613#endif
1614
1615 /* mark all used registers */
1616 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1617 for(i = 0; i < nb_operands;i++) {
1618 op = &operands[i];
1619 if (op->reg >= 0)
1620 regs_allocated[op->reg] = 1;
1621 }
1622 if (!is_output) {
1623 /* generate reg save code */
1624 for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) {
1625 reg = reg_saved[i];
1626 if (regs_allocated[reg]) {
1627 if (reg >= 8)
1628 g(0x41), reg-=8;
1629 g(0x50 + reg);
1630 }
1631 }
1632
1633 /* generate load code */
1634 for(i = 0; i < nb_operands; i++) {
1635 op = &operands[i];
1636 if (op->reg >= 0) {
1637 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1638 op->is_memory) {
1639 /* memory reference case (for both input and
1640 output cases) */
1641 SValue sv;
1642 sv = *op->vt;
1643 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
1644 sv.type.t = VT_PTR;
1645 load(op->reg, &sv);
1646 } else if (i >= nb_outputs || op->is_rw) {
1647 /* load value in register */
1648 load(op->reg, op->vt);
1649 if (op->is_llong) {
1650 SValue sv;
1651 sv = *op->vt;
1652 sv.c.i += 4;
1653 load(TREG_XDX, &sv);
1654 }
1655 }
1656 }
1657 }
1658 } else {
1659 /* generate save code */
1660 for(i = 0 ; i < nb_outputs; i++) {
1661 op = &operands[i];
1662 if (op->reg >= 0) {
1663 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1664 if (!op->is_memory) {
1665 SValue sv;
1666 sv = *op->vt;
1667 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1668 sv.type.t = VT_PTR;
1669 load(out_reg, &sv);
1670
1671 sv = *op->vt;
1672 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1673 store(op->reg, &sv);
1674 }
1675 } else {
1676 store(op->reg, op->vt);
1677 if (op->is_llong) {
1678 SValue sv;
1679 sv = *op->vt;
1680 sv.c.i += 4;
1681 store(TREG_XDX, &sv);
1682 }
1683 }
1684 }
1685 }
1686 /* generate reg restore code */
1687 for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) {
1688 reg = reg_saved[i];
1689 if (regs_allocated[reg]) {
1690 if (reg >= 8)
1691 g(0x41), reg-=8;
1692 g(0x58 + reg);
1693 }
1694 }
1695 }
1696}
1697
1698ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1699{
1700 int reg;
1701 TokenSym *ts;
1702#ifdef TCC_TARGET_X86_64
1703 unsigned int type;
1704#endif
1705
1706 if (!strcmp(str, "memory") ||
1707 !strcmp(str, "cc") ||
1708 !strcmp(str, "flags"))
1709 return;
1710 ts = tok_alloc(str, strlen(str));
1711 reg = ts->tok;
1712 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1713 reg -= TOK_ASM_eax;
1714 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1715 reg -= TOK_ASM_ax;
1716#ifdef TCC_TARGET_X86_64
1717 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1718 reg -= TOK_ASM_rax;
1719 } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) {
1720 ;
1721#endif
1722 } else {
1723 tcc_error("invalid clobber register '%s'", str);
1724 }
1725 clobber_regs[reg] = 1;
1726}
1727