1 | /* |
2 | * x86-64 code generator for TCC |
3 | * |
4 | * Copyright (c) 2008 Shinichiro Hamaji |
5 | * |
6 | * Based on i386-gen.c by Fabrice Bellard |
7 | * |
8 | * This library is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Lesser General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2 of the License, or (at your option) any later version. |
12 | * |
13 | * This library is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with this library; if not, write to the Free Software |
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 | */ |
22 | |
23 | #ifdef TARGET_DEFS_ONLY |
24 | |
25 | /* number of available registers */ |
26 | #define NB_REGS 25 |
27 | #define NB_ASM_REGS 16 |
28 | #define CONFIG_TCC_ASM |
29 | |
30 | /* a register can belong to several classes. The classes must be |
31 | sorted from more general to more precise (see gv2() code which does |
32 | assumptions on it). */ |
33 | #define RC_INT 0x0001 /* generic integer register */ |
34 | #define RC_FLOAT 0x0002 /* generic float register */ |
35 | #define RC_RAX 0x0004 |
36 | #define RC_RCX 0x0008 |
37 | #define RC_RDX 0x0010 |
38 | #define RC_ST0 0x0080 /* only for long double */ |
39 | #define RC_R8 0x0100 |
40 | #define RC_R9 0x0200 |
41 | #define RC_R10 0x0400 |
42 | #define RC_R11 0x0800 |
43 | #define RC_XMM0 0x1000 |
44 | #define RC_XMM1 0x2000 |
45 | #define RC_XMM2 0x4000 |
46 | #define RC_XMM3 0x8000 |
47 | #define RC_XMM4 0x10000 |
48 | #define RC_XMM5 0x20000 |
49 | #define RC_XMM6 0x40000 |
50 | #define RC_XMM7 0x80000 |
51 | #define RC_IRET RC_RAX /* function return: integer register */ |
52 | #define RC_IRE2 RC_RDX /* function return: second integer register */ |
53 | #define RC_FRET RC_XMM0 /* function return: float register */ |
54 | #define RC_FRE2 RC_XMM1 /* function return: second float register */ |
55 | |
56 | /* pretty names for the registers */ |
57 | enum { |
58 | TREG_RAX = 0, |
59 | TREG_RCX = 1, |
60 | TREG_RDX = 2, |
61 | TREG_RSP = 4, |
62 | TREG_RSI = 6, |
63 | TREG_RDI = 7, |
64 | |
65 | TREG_R8 = 8, |
66 | TREG_R9 = 9, |
67 | TREG_R10 = 10, |
68 | TREG_R11 = 11, |
69 | |
70 | TREG_XMM0 = 16, |
71 | TREG_XMM1 = 17, |
72 | TREG_XMM2 = 18, |
73 | TREG_XMM3 = 19, |
74 | TREG_XMM4 = 20, |
75 | TREG_XMM5 = 21, |
76 | TREG_XMM6 = 22, |
77 | TREG_XMM7 = 23, |
78 | |
79 | TREG_ST0 = 24, |
80 | |
81 | TREG_MEM = 0x20 |
82 | }; |
83 | |
84 | #define REX_BASE(reg) (((reg) >> 3) & 1) |
85 | #define REG_VALUE(reg) ((reg) & 7) |
86 | |
87 | /* return registers for function */ |
88 | #define REG_IRET TREG_RAX /* single word int return register */ |
89 | #define REG_IRE2 TREG_RDX /* second word return register (for long long) */ |
90 | #define REG_FRET TREG_XMM0 /* float return register */ |
91 | #define REG_FRE2 TREG_XMM1 /* second float return register */ |
92 | |
93 | /* defined if function parameters must be evaluated in reverse order */ |
94 | #define INVERT_FUNC_PARAMS |
95 | |
96 | /* pointer size, in bytes */ |
97 | #define PTR_SIZE 8 |
98 | |
99 | /* long double size and alignment, in bytes */ |
100 | #define LDOUBLE_SIZE 16 |
101 | #define LDOUBLE_ALIGN 16 |
102 | /* maximum alignment (for aligned attribute support) */ |
103 | #define MAX_ALIGN 16 |
104 | |
105 | /* define if return values need to be extended explicitely |
106 | at caller side (for interfacing with non-TCC compilers) */ |
107 | #define PROMOTE_RET |
108 | /******************************************************/ |
109 | #else /* ! TARGET_DEFS_ONLY */ |
110 | /******************************************************/ |
111 | #define USING_GLOBALS |
112 | #include "tcc.h" |
113 | #include <assert.h> |
114 | |
115 | ST_DATA const int reg_classes[NB_REGS] = { |
116 | /* eax */ RC_INT | RC_RAX, |
117 | /* ecx */ RC_INT | RC_RCX, |
118 | /* edx */ RC_INT | RC_RDX, |
119 | 0, |
120 | 0, |
121 | 0, |
122 | 0, |
123 | 0, |
124 | RC_R8, |
125 | RC_R9, |
126 | RC_R10, |
127 | RC_R11, |
128 | 0, |
129 | 0, |
130 | 0, |
131 | 0, |
132 | /* xmm0 */ RC_FLOAT | RC_XMM0, |
133 | /* xmm1 */ RC_FLOAT | RC_XMM1, |
134 | /* xmm2 */ RC_FLOAT | RC_XMM2, |
135 | /* xmm3 */ RC_FLOAT | RC_XMM3, |
136 | /* xmm4 */ RC_FLOAT | RC_XMM4, |
137 | /* xmm5 */ RC_FLOAT | RC_XMM5, |
138 | /* xmm6 an xmm7 are included so gv() can be used on them, |
139 | but they are not tagged with RC_FLOAT because they are |
140 | callee saved on Windows */ |
141 | RC_XMM6, |
142 | RC_XMM7, |
143 | /* st0 */ RC_ST0 |
144 | }; |
145 | |
146 | static unsigned long func_sub_sp_offset; |
147 | static int func_ret_sub; |
148 | |
149 | #if defined(CONFIG_TCC_BCHECK) |
150 | static addr_t func_bound_offset; |
151 | static unsigned long func_bound_ind; |
152 | ST_DATA int func_bound_add_epilog; |
153 | #endif |
154 | |
155 | #ifdef TCC_TARGET_PE |
156 | static int func_scratch, func_alloca; |
157 | #endif |
158 | |
159 | /* XXX: make it faster ? */ |
160 | ST_FUNC void g(int c) |
161 | { |
162 | int ind1; |
163 | if (nocode_wanted) |
164 | return; |
165 | ind1 = ind + 1; |
166 | if (ind1 > cur_text_section->data_allocated) |
167 | section_realloc(cur_text_section, ind1); |
168 | cur_text_section->data[ind] = c; |
169 | ind = ind1; |
170 | } |
171 | |
172 | ST_FUNC void o(unsigned int c) |
173 | { |
174 | while (c) { |
175 | g(c); |
176 | c = c >> 8; |
177 | } |
178 | } |
179 | |
180 | ST_FUNC void gen_le16(int v) |
181 | { |
182 | g(v); |
183 | g(v >> 8); |
184 | } |
185 | |
186 | ST_FUNC void gen_le32(int c) |
187 | { |
188 | g(c); |
189 | g(c >> 8); |
190 | g(c >> 16); |
191 | g(c >> 24); |
192 | } |
193 | |
194 | ST_FUNC void gen_le64(int64_t c) |
195 | { |
196 | g(c); |
197 | g(c >> 8); |
198 | g(c >> 16); |
199 | g(c >> 24); |
200 | g(c >> 32); |
201 | g(c >> 40); |
202 | g(c >> 48); |
203 | g(c >> 56); |
204 | } |
205 | |
206 | static void orex(int ll, int r, int r2, int b) |
207 | { |
208 | if ((r & VT_VALMASK) >= VT_CONST) |
209 | r = 0; |
210 | if ((r2 & VT_VALMASK) >= VT_CONST) |
211 | r2 = 0; |
212 | if (ll || REX_BASE(r) || REX_BASE(r2)) |
213 | o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3)); |
214 | o(b); |
215 | } |
216 | |
217 | /* output a symbol and patch all calls to it */ |
218 | ST_FUNC void gsym_addr(int t, int a) |
219 | { |
220 | while (t) { |
221 | unsigned char *ptr = cur_text_section->data + t; |
222 | uint32_t n = read32le(ptr); /* next value */ |
223 | write32le(ptr, a < 0 ? -a : a - t - 4); |
224 | t = n; |
225 | } |
226 | } |
227 | |
228 | static int is64_type(int t) |
229 | { |
230 | return ((t & VT_BTYPE) == VT_PTR || |
231 | (t & VT_BTYPE) == VT_FUNC || |
232 | (t & VT_BTYPE) == VT_LLONG); |
233 | } |
234 | |
235 | /* instruction + 4 bytes data. Return the address of the data */ |
236 | static int oad(int c, int s) |
237 | { |
238 | int t; |
239 | if (nocode_wanted) |
240 | return s; |
241 | o(c); |
242 | t = ind; |
243 | gen_le32(s); |
244 | return t; |
245 | } |
246 | |
247 | /* generate jmp to a label */ |
248 | #define gjmp2(instr,lbl) oad(instr,lbl) |
249 | |
250 | ST_FUNC void gen_addr32(int r, Sym *sym, int c) |
251 | { |
252 | if (r & VT_SYM) |
253 | greloca(cur_text_section, sym, ind, R_X86_64_32S, c), c=0; |
254 | gen_le32(c); |
255 | } |
256 | |
257 | /* output constant with relocation if 'r & VT_SYM' is true */ |
258 | ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c) |
259 | { |
260 | if (r & VT_SYM) |
261 | greloca(cur_text_section, sym, ind, R_X86_64_64, c), c=0; |
262 | gen_le64(c); |
263 | } |
264 | |
265 | /* output constant with relocation if 'r & VT_SYM' is true */ |
266 | ST_FUNC void gen_addrpc32(int r, Sym *sym, int c) |
267 | { |
268 | if (r & VT_SYM) |
269 | greloca(cur_text_section, sym, ind, R_X86_64_PC32, c-4), c=4; |
270 | gen_le32(c-4); |
271 | } |
272 | |
273 | /* output got address with relocation */ |
274 | static void gen_gotpcrel(int r, Sym *sym, int c) |
275 | { |
276 | #ifdef TCC_TARGET_PE |
277 | tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n" , |
278 | get_tok_str(sym->v, NULL), c, r, |
279 | cur_text_section->data[ind-3], |
280 | cur_text_section->data[ind-2], |
281 | cur_text_section->data[ind-1] |
282 | ); |
283 | #endif |
284 | greloca(cur_text_section, sym, ind, R_X86_64_GOTPCREL, -4); |
285 | gen_le32(0); |
286 | if (c) { |
287 | /* we use add c, %xxx for displacement */ |
288 | orex(1, r, 0, 0x81); |
289 | o(0xc0 + REG_VALUE(r)); |
290 | gen_le32(c); |
291 | } |
292 | } |
293 | |
294 | static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got) |
295 | { |
296 | op_reg = REG_VALUE(op_reg) << 3; |
297 | if ((r & VT_VALMASK) == VT_CONST) { |
298 | /* constant memory reference */ |
299 | if (!(r & VT_SYM)) { |
300 | /* Absolute memory reference */ |
301 | o(0x04 | op_reg); /* [sib] | destreg */ |
302 | oad(0x25, c); /* disp32 */ |
303 | } else { |
304 | o(0x05 | op_reg); /* (%rip)+disp32 | destreg */ |
305 | if (is_got) { |
306 | gen_gotpcrel(r, sym, c); |
307 | } else { |
308 | gen_addrpc32(r, sym, c); |
309 | } |
310 | } |
311 | } else if ((r & VT_VALMASK) == VT_LOCAL) { |
312 | /* currently, we use only ebp as base */ |
313 | if (c == (char)c) { |
314 | /* short reference */ |
315 | o(0x45 | op_reg); |
316 | g(c); |
317 | } else { |
318 | oad(0x85 | op_reg, c); |
319 | } |
320 | } else if ((r & VT_VALMASK) >= TREG_MEM) { |
321 | if (c) { |
322 | g(0x80 | op_reg | REG_VALUE(r)); |
323 | gen_le32(c); |
324 | } else { |
325 | g(0x00 | op_reg | REG_VALUE(r)); |
326 | } |
327 | } else { |
328 | g(0x00 | op_reg | REG_VALUE(r)); |
329 | } |
330 | } |
331 | |
332 | /* generate a modrm reference. 'op_reg' contains the additional 3 |
333 | opcode bits */ |
334 | static void gen_modrm(int op_reg, int r, Sym *sym, int c) |
335 | { |
336 | gen_modrm_impl(op_reg, r, sym, c, 0); |
337 | } |
338 | |
339 | /* generate a modrm reference. 'op_reg' contains the additional 3 |
340 | opcode bits */ |
341 | static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c) |
342 | { |
343 | int is_got; |
344 | is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC); |
345 | orex(1, r, op_reg, opcode); |
346 | gen_modrm_impl(op_reg, r, sym, c, is_got); |
347 | } |
348 | |
349 | |
350 | /* load 'r' from value 'sv' */ |
351 | void load(int r, SValue *sv) |
352 | { |
353 | int v, t, ft, fc, fr; |
354 | SValue v1; |
355 | |
356 | #ifdef TCC_TARGET_PE |
357 | SValue v2; |
358 | sv = pe_getimport(sv, &v2); |
359 | #endif |
360 | |
361 | fr = sv->r; |
362 | ft = sv->type.t & ~VT_DEFSIGN; |
363 | fc = sv->c.i; |
364 | if (fc != sv->c.i && (fr & VT_SYM)) |
365 | tcc_error("64 bit addend in load" ); |
366 | |
367 | ft &= ~(VT_VOLATILE | VT_CONSTANT); |
368 | |
369 | #ifndef TCC_TARGET_PE |
370 | /* we use indirect access via got */ |
371 | if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) && |
372 | (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) { |
373 | /* use the result register as a temporal register */ |
374 | int tr = r | TREG_MEM; |
375 | if (is_float(ft)) { |
376 | /* we cannot use float registers as a temporal register */ |
377 | tr = get_reg(RC_INT) | TREG_MEM; |
378 | } |
379 | gen_modrm64(0x8b, tr, fr, sv->sym, 0); |
380 | |
381 | /* load from the temporal register */ |
382 | fr = tr | VT_LVAL; |
383 | } |
384 | #endif |
385 | |
386 | v = fr & VT_VALMASK; |
387 | if (fr & VT_LVAL) { |
388 | int b, ll; |
389 | if (v == VT_LLOCAL) { |
390 | v1.type.t = VT_PTR; |
391 | v1.r = VT_LOCAL | VT_LVAL; |
392 | v1.c.i = fc; |
393 | fr = r; |
394 | if (!(reg_classes[fr] & (RC_INT|RC_R11))) |
395 | fr = get_reg(RC_INT); |
396 | load(fr, &v1); |
397 | } |
398 | if (fc != sv->c.i) { |
399 | /* If the addends doesn't fit into a 32bit signed |
400 | we must use a 64bit move. We've checked above |
401 | that this doesn't have a sym associated. */ |
402 | v1.type.t = VT_LLONG; |
403 | v1.r = VT_CONST; |
404 | v1.c.i = sv->c.i; |
405 | fr = r; |
406 | if (!(reg_classes[fr] & (RC_INT|RC_R11))) |
407 | fr = get_reg(RC_INT); |
408 | load(fr, &v1); |
409 | fc = 0; |
410 | } |
411 | ll = 0; |
412 | /* Like GCC we can load from small enough properly sized |
413 | structs and unions as well. |
414 | XXX maybe move to generic operand handling, but should |
415 | occur only with asm, so tccasm.c might also be a better place */ |
416 | if ((ft & VT_BTYPE) == VT_STRUCT) { |
417 | int align; |
418 | switch (type_size(&sv->type, &align)) { |
419 | case 1: ft = VT_BYTE; break; |
420 | case 2: ft = VT_SHORT; break; |
421 | case 4: ft = VT_INT; break; |
422 | case 8: ft = VT_LLONG; break; |
423 | default: |
424 | tcc_error("invalid aggregate type for register load" ); |
425 | break; |
426 | } |
427 | } |
428 | if ((ft & VT_BTYPE) == VT_FLOAT) { |
429 | b = 0x6e0f66; |
430 | r = REG_VALUE(r); /* movd */ |
431 | } else if ((ft & VT_BTYPE) == VT_DOUBLE) { |
432 | b = 0x7e0ff3; /* movq */ |
433 | r = REG_VALUE(r); |
434 | } else if ((ft & VT_BTYPE) == VT_LDOUBLE) { |
435 | b = 0xdb, r = 5; /* fldt */ |
436 | } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) { |
437 | b = 0xbe0f; /* movsbl */ |
438 | } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) { |
439 | b = 0xb60f; /* movzbl */ |
440 | } else if ((ft & VT_TYPE) == VT_SHORT) { |
441 | b = 0xbf0f; /* movswl */ |
442 | } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) { |
443 | b = 0xb70f; /* movzwl */ |
444 | } else if ((ft & VT_TYPE) == (VT_VOID)) { |
445 | /* Can happen with zero size structs */ |
446 | return; |
447 | } else { |
448 | assert(((ft & VT_BTYPE) == VT_INT) |
449 | || ((ft & VT_BTYPE) == VT_LLONG) |
450 | || ((ft & VT_BTYPE) == VT_PTR) |
451 | || ((ft & VT_BTYPE) == VT_FUNC) |
452 | ); |
453 | ll = is64_type(ft); |
454 | b = 0x8b; |
455 | } |
456 | if (ll) { |
457 | gen_modrm64(b, r, fr, sv->sym, fc); |
458 | } else { |
459 | orex(ll, fr, r, b); |
460 | gen_modrm(r, fr, sv->sym, fc); |
461 | } |
462 | } else { |
463 | if (v == VT_CONST) { |
464 | if (fr & VT_SYM) { |
465 | #ifdef TCC_TARGET_PE |
466 | orex(1,0,r,0x8d); |
467 | o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */ |
468 | gen_addrpc32(fr, sv->sym, fc); |
469 | #else |
470 | if (sv->sym->type.t & VT_STATIC) { |
471 | orex(1,0,r,0x8d); |
472 | o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */ |
473 | gen_addrpc32(fr, sv->sym, fc); |
474 | } else { |
475 | orex(1,0,r,0x8b); |
476 | o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */ |
477 | gen_gotpcrel(r, sv->sym, fc); |
478 | } |
479 | #endif |
480 | } else if (is64_type(ft)) { |
481 | orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */ |
482 | gen_le64(sv->c.i); |
483 | } else { |
484 | orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */ |
485 | gen_le32(fc); |
486 | } |
487 | } else if (v == VT_LOCAL) { |
488 | orex(1,0,r,0x8d); /* lea xxx(%ebp), r */ |
489 | gen_modrm(r, VT_LOCAL, sv->sym, fc); |
490 | } else if (v == VT_CMP) { |
491 | if (fc & 0x100) |
492 | { |
493 | v = vtop->cmp_r; |
494 | fc &= ~0x100; |
495 | /* This was a float compare. If the parity bit is |
496 | set the result was unordered, meaning false for everything |
497 | except TOK_NE, and true for TOK_NE. */ |
498 | orex(0, r, 0, 0xb0 + REG_VALUE(r)); /* mov $0/1,%al */ |
499 | g(v ^ fc ^ (v == TOK_NE)); |
500 | o(0x037a + (REX_BASE(r) << 8)); |
501 | } |
502 | orex(0,r,0, 0x0f); /* setxx %br */ |
503 | o(fc); |
504 | o(0xc0 + REG_VALUE(r)); |
505 | orex(0,r,0, 0x0f); |
506 | o(0xc0b6 + REG_VALUE(r) * 0x900); /* movzbl %al, %eax */ |
507 | } else if (v == VT_JMP || v == VT_JMPI) { |
508 | t = v & 1; |
509 | orex(0,r,0,0); |
510 | oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */ |
511 | o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */ |
512 | gsym(fc); |
513 | orex(0,r,0,0); |
514 | oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */ |
515 | } else if (v != r) { |
516 | if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) { |
517 | if (v == TREG_ST0) { |
518 | /* gen_cvt_ftof(VT_DOUBLE); */ |
519 | o(0xf0245cdd); /* fstpl -0x10(%rsp) */ |
520 | /* movsd -0x10(%rsp),%xmmN */ |
521 | o(0x100ff2); |
522 | o(0x44 + REG_VALUE(r)*8); /* %xmmN */ |
523 | o(0xf024); |
524 | } else { |
525 | assert((v >= TREG_XMM0) && (v <= TREG_XMM7)); |
526 | if ((ft & VT_BTYPE) == VT_FLOAT) { |
527 | o(0x100ff3); |
528 | } else { |
529 | assert((ft & VT_BTYPE) == VT_DOUBLE); |
530 | o(0x100ff2); |
531 | } |
532 | o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8); |
533 | } |
534 | } else if (r == TREG_ST0) { |
535 | assert((v >= TREG_XMM0) && (v <= TREG_XMM7)); |
536 | /* gen_cvt_ftof(VT_LDOUBLE); */ |
537 | /* movsd %xmmN,-0x10(%rsp) */ |
538 | o(0x110ff2); |
539 | o(0x44 + REG_VALUE(r)*8); /* %xmmN */ |
540 | o(0xf024); |
541 | o(0xf02444dd); /* fldl -0x10(%rsp) */ |
542 | } else { |
543 | orex(is64_type(ft), r, v, 0x89); |
544 | o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */ |
545 | } |
546 | } |
547 | } |
548 | } |
549 | |
550 | /* store register 'r' in lvalue 'v' */ |
551 | void store(int r, SValue *v) |
552 | { |
553 | int fr, bt, ft, fc; |
554 | int op64 = 0; |
555 | /* store the REX prefix in this variable when PIC is enabled */ |
556 | int pic = 0; |
557 | |
558 | #ifdef TCC_TARGET_PE |
559 | SValue v2; |
560 | v = pe_getimport(v, &v2); |
561 | #endif |
562 | |
563 | fr = v->r & VT_VALMASK; |
564 | ft = v->type.t; |
565 | fc = v->c.i; |
566 | if (fc != v->c.i && (fr & VT_SYM)) |
567 | tcc_error("64 bit addend in store" ); |
568 | ft &= ~(VT_VOLATILE | VT_CONSTANT); |
569 | bt = ft & VT_BTYPE; |
570 | |
571 | #ifndef TCC_TARGET_PE |
572 | /* we need to access the variable via got */ |
573 | if (fr == VT_CONST && (v->r & VT_SYM)) { |
574 | /* mov xx(%rip), %r11 */ |
575 | o(0x1d8b4c); |
576 | gen_gotpcrel(TREG_R11, v->sym, v->c.i); |
577 | pic = is64_type(bt) ? 0x49 : 0x41; |
578 | } |
579 | #endif |
580 | |
581 | /* XXX: incorrect if float reg to reg */ |
582 | if (bt == VT_FLOAT) { |
583 | o(0x66); |
584 | o(pic); |
585 | o(0x7e0f); /* movd */ |
586 | r = REG_VALUE(r); |
587 | } else if (bt == VT_DOUBLE) { |
588 | o(0x66); |
589 | o(pic); |
590 | o(0xd60f); /* movq */ |
591 | r = REG_VALUE(r); |
592 | } else if (bt == VT_LDOUBLE) { |
593 | o(0xc0d9); /* fld %st(0) */ |
594 | o(pic); |
595 | o(0xdb); /* fstpt */ |
596 | r = 7; |
597 | } else { |
598 | if (bt == VT_SHORT) |
599 | o(0x66); |
600 | o(pic); |
601 | if (bt == VT_BYTE || bt == VT_BOOL) |
602 | orex(0, 0, r, 0x88); |
603 | else if (is64_type(bt)) |
604 | op64 = 0x89; |
605 | else |
606 | orex(0, 0, r, 0x89); |
607 | } |
608 | if (pic) { |
609 | /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */ |
610 | if (op64) |
611 | o(op64); |
612 | o(3 + (r << 3)); |
613 | } else if (op64) { |
614 | if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) { |
615 | gen_modrm64(op64, r, v->r, v->sym, fc); |
616 | } else if (fr != r) { |
617 | orex(1, fr, r, op64); |
618 | o(0xc0 + fr + r * 8); /* mov r, fr */ |
619 | } |
620 | } else { |
621 | if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) { |
622 | gen_modrm(r, v->r, v->sym, fc); |
623 | } else if (fr != r) { |
624 | o(0xc0 + fr + r * 8); /* mov r, fr */ |
625 | } |
626 | } |
627 | } |
628 | |
629 | /* 'is_jmp' is '1' if it is a jump */ |
630 | static void gcall_or_jmp(int is_jmp) |
631 | { |
632 | int r; |
633 | if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST && |
634 | ((vtop->r & VT_SYM) && (vtop->c.i-4) == (int)(vtop->c.i-4))) { |
635 | /* constant symbolic case -> simple relocation */ |
636 | #ifdef TCC_TARGET_PE |
637 | greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32, (int)(vtop->c.i-4)); |
638 | #else |
639 | greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32, (int)(vtop->c.i-4)); |
640 | #endif |
641 | oad(0xe8 + is_jmp, 0); /* call/jmp im */ |
642 | } else { |
643 | /* otherwise, indirect call */ |
644 | r = TREG_R11; |
645 | load(r, vtop); |
646 | o(0x41); /* REX */ |
647 | o(0xff); /* call/jmp *r */ |
648 | o(0xd0 + REG_VALUE(r) + (is_jmp << 4)); |
649 | } |
650 | } |
651 | |
652 | #if defined(CONFIG_TCC_BCHECK) |
653 | |
654 | static void gen_bounds_call(int v) |
655 | { |
656 | Sym *sym = external_global_sym(v, &func_old_type); |
657 | oad(0xe8, 0); |
658 | #ifdef TCC_TARGET_PE |
659 | greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4); |
660 | #else |
661 | greloca(cur_text_section, sym, ind-4, R_X86_64_PLT32, -4); |
662 | #endif |
663 | } |
664 | |
665 | #ifdef TCC_TARGET_PE |
666 | # define TREG_FASTCALL_1 TREG_RCX |
667 | #else |
668 | # define TREG_FASTCALL_1 TREG_RDI |
669 | #endif |
670 | |
671 | static void gen_bounds_prolog(void) |
672 | { |
673 | /* leave some room for bound checking code */ |
674 | func_bound_offset = lbounds_section->data_offset; |
675 | func_bound_ind = ind; |
676 | func_bound_add_epilog = 0; |
677 | o(0xb848 + TREG_FASTCALL_1 * 0x100); /*lbound section pointer */ |
678 | gen_le64 (0); |
679 | oad(0xb8, 0); /* call to function */ |
680 | } |
681 | |
682 | static void gen_bounds_epilog(void) |
683 | { |
684 | addr_t saved_ind; |
685 | addr_t *bounds_ptr; |
686 | Sym *sym_data; |
687 | int offset_modified = func_bound_offset != lbounds_section->data_offset; |
688 | |
689 | if (!offset_modified && !func_bound_add_epilog) |
690 | return; |
691 | |
692 | /* add end of table info */ |
693 | bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t)); |
694 | *bounds_ptr = 0; |
695 | |
696 | sym_data = get_sym_ref(&char_pointer_type, lbounds_section, |
697 | func_bound_offset, lbounds_section->data_offset); |
698 | |
699 | /* generate bound local allocation */ |
700 | if (offset_modified) { |
701 | saved_ind = ind; |
702 | ind = func_bound_ind; |
703 | greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0); |
704 | ind = ind + 10; |
705 | gen_bounds_call(TOK___bound_local_new); |
706 | ind = saved_ind; |
707 | } |
708 | |
709 | /* generate bound check local freeing */ |
710 | o(0x5250); /* save returned value, if any */ |
711 | greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0); |
712 | o(0xb848 + TREG_FASTCALL_1 * 0x100); /* mov xxx, %rcx/di */ |
713 | gen_le64 (0); |
714 | gen_bounds_call(TOK___bound_local_delete); |
715 | o(0x585a); /* restore returned value, if any */ |
716 | } |
717 | #endif |
718 | |
719 | #ifdef TCC_TARGET_PE |
720 | |
721 | #define REGN 4 |
722 | static const uint8_t arg_regs[REGN] = { |
723 | TREG_RCX, TREG_RDX, TREG_R8, TREG_R9 |
724 | }; |
725 | |
726 | /* Prepare arguments in R10 and R11 rather than RCX and RDX |
727 | because gv() will not ever use these */ |
728 | static int arg_prepare_reg(int idx) { |
729 | if (idx == 0 || idx == 1) |
730 | /* idx=0: r10, idx=1: r11 */ |
731 | return idx + 10; |
732 | else |
733 | return arg_regs[idx]; |
734 | } |
735 | |
736 | /* Generate function call. The function address is pushed first, then |
737 | all the parameters in call order. This functions pops all the |
738 | parameters and the function address. */ |
739 | |
740 | static void gen_offs_sp(int b, int r, int d) |
741 | { |
742 | orex(1,0,r & 0x100 ? 0 : r, b); |
743 | if (d == (char)d) { |
744 | o(0x2444 | (REG_VALUE(r) << 3)); |
745 | g(d); |
746 | } else { |
747 | o(0x2484 | (REG_VALUE(r) << 3)); |
748 | gen_le32(d); |
749 | } |
750 | } |
751 | |
752 | static int using_regs(int size) |
753 | { |
754 | return !(size > 8 || (size & (size - 1))); |
755 | } |
756 | |
757 | /* Return the number of registers needed to return the struct, or 0 if |
758 | returning via struct pointer. */ |
759 | ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) |
760 | { |
761 | int size, align; |
762 | *ret_align = 1; // Never have to re-align return values for x86-64 |
763 | *regsize = 8; |
764 | size = type_size(vt, &align); |
765 | if (!using_regs(size)) |
766 | return 0; |
767 | if (size == 8) |
768 | ret->t = VT_LLONG; |
769 | else if (size == 4) |
770 | ret->t = VT_INT; |
771 | else if (size == 2) |
772 | ret->t = VT_SHORT; |
773 | else |
774 | ret->t = VT_BYTE; |
775 | ret->ref = NULL; |
776 | return 1; |
777 | } |
778 | |
779 | static int is_sse_float(int t) { |
780 | int bt; |
781 | bt = t & VT_BTYPE; |
782 | return bt == VT_DOUBLE || bt == VT_FLOAT; |
783 | } |
784 | |
785 | static int gfunc_arg_size(CType *type) { |
786 | int align; |
787 | if (type->t & (VT_ARRAY|VT_BITFIELD)) |
788 | return 8; |
789 | return type_size(type, &align); |
790 | } |
791 | |
792 | void gfunc_call(int nb_args) |
793 | { |
794 | int size, r, args_size, i, d, bt, struct_size; |
795 | int arg; |
796 | |
797 | #ifdef CONFIG_TCC_BCHECK |
798 | if (tcc_state->do_bounds_check) |
799 | gbound_args(nb_args); |
800 | #endif |
801 | |
802 | args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE; |
803 | arg = nb_args; |
804 | |
805 | /* for struct arguments, we need to call memcpy and the function |
806 | call breaks register passing arguments we are preparing. |
807 | So, we process arguments which will be passed by stack first. */ |
808 | struct_size = args_size; |
809 | for(i = 0; i < nb_args; i++) { |
810 | SValue *sv; |
811 | |
812 | --arg; |
813 | sv = &vtop[-i]; |
814 | bt = (sv->type.t & VT_BTYPE); |
815 | size = gfunc_arg_size(&sv->type); |
816 | |
817 | if (using_regs(size)) |
818 | continue; /* arguments smaller than 8 bytes passed in registers or on stack */ |
819 | |
820 | if (bt == VT_STRUCT) { |
821 | /* align to stack align size */ |
822 | size = (size + 15) & ~15; |
823 | /* generate structure store */ |
824 | r = get_reg(RC_INT); |
825 | gen_offs_sp(0x8d, r, struct_size); |
826 | struct_size += size; |
827 | |
828 | /* generate memcpy call */ |
829 | vset(&sv->type, r | VT_LVAL, 0); |
830 | vpushv(sv); |
831 | vstore(); |
832 | --vtop; |
833 | } else if (bt == VT_LDOUBLE) { |
834 | gv(RC_ST0); |
835 | gen_offs_sp(0xdb, 0x107, struct_size); |
836 | struct_size += 16; |
837 | } |
838 | } |
839 | |
840 | if (func_scratch < struct_size) |
841 | func_scratch = struct_size; |
842 | |
843 | arg = nb_args; |
844 | struct_size = args_size; |
845 | |
846 | for(i = 0; i < nb_args; i++) { |
847 | --arg; |
848 | bt = (vtop->type.t & VT_BTYPE); |
849 | |
850 | size = gfunc_arg_size(&vtop->type); |
851 | if (!using_regs(size)) { |
852 | /* align to stack align size */ |
853 | size = (size + 15) & ~15; |
854 | if (arg >= REGN) { |
855 | d = get_reg(RC_INT); |
856 | gen_offs_sp(0x8d, d, struct_size); |
857 | gen_offs_sp(0x89, d, arg*8); |
858 | } else { |
859 | d = arg_prepare_reg(arg); |
860 | gen_offs_sp(0x8d, d, struct_size); |
861 | } |
862 | struct_size += size; |
863 | } else { |
864 | if (is_sse_float(vtop->type.t)) { |
865 | if (tcc_state->nosse) |
866 | tcc_error("SSE disabled" ); |
867 | if (arg >= REGN) { |
868 | gv(RC_XMM0); |
869 | /* movq %xmm0, j*8(%rsp) */ |
870 | gen_offs_sp(0xd60f66, 0x100, arg*8); |
871 | } else { |
872 | /* Load directly to xmmN register */ |
873 | gv(RC_XMM0 << arg); |
874 | d = arg_prepare_reg(arg); |
875 | /* mov %xmmN, %rxx */ |
876 | o(0x66); |
877 | orex(1,d,0, 0x7e0f); |
878 | o(0xc0 + arg*8 + REG_VALUE(d)); |
879 | } |
880 | } else { |
881 | if (bt == VT_STRUCT) { |
882 | vtop->type.ref = NULL; |
883 | vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT |
884 | : size > 1 ? VT_SHORT : VT_BYTE; |
885 | } |
886 | |
887 | r = gv(RC_INT); |
888 | if (arg >= REGN) { |
889 | gen_offs_sp(0x89, r, arg*8); |
890 | } else { |
891 | d = arg_prepare_reg(arg); |
892 | orex(1,d,r,0x89); /* mov */ |
893 | o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); |
894 | } |
895 | } |
896 | } |
897 | vtop--; |
898 | } |
899 | save_regs(0); |
900 | /* Copy R10 and R11 into RCX and RDX, respectively */ |
901 | if (nb_args > 0) { |
902 | o(0xd1894c); /* mov %r10, %rcx */ |
903 | if (nb_args > 1) { |
904 | o(0xda894c); /* mov %r11, %rdx */ |
905 | } |
906 | } |
907 | |
908 | gcall_or_jmp(0); |
909 | |
910 | if ((vtop->r & VT_SYM) && vtop->sym->v == TOK_alloca) { |
911 | /* need to add the "func_scratch" area after alloca */ |
912 | o(0x48); func_alloca = oad(0x05, func_alloca); /* add $NN, %rax */ |
913 | #ifdef CONFIG_TCC_BCHECK |
914 | if (tcc_state->do_bounds_check) |
915 | gen_bounds_call(TOK___bound_alloca_nr); /* new region */ |
916 | #endif |
917 | } |
918 | vtop--; |
919 | } |
920 | |
921 | |
922 | #define FUNC_PROLOG_SIZE 11 |
923 | |
924 | /* generate function prolog of type 't' */ |
925 | void gfunc_prolog(Sym *func_sym) |
926 | { |
927 | CType *func_type = &func_sym->type; |
928 | int addr, reg_param_index, bt, size; |
929 | Sym *sym; |
930 | CType *type; |
931 | |
932 | func_ret_sub = 0; |
933 | func_scratch = 32; |
934 | func_alloca = 0; |
935 | loc = 0; |
936 | |
937 | addr = PTR_SIZE * 2; |
938 | ind += FUNC_PROLOG_SIZE; |
939 | func_sub_sp_offset = ind; |
940 | reg_param_index = 0; |
941 | |
942 | sym = func_type->ref; |
943 | |
944 | /* if the function returns a structure, then add an |
945 | implicit pointer parameter */ |
946 | size = gfunc_arg_size(&func_vt); |
947 | if (!using_regs(size)) { |
948 | gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); |
949 | func_vc = addr; |
950 | reg_param_index++; |
951 | addr += 8; |
952 | } |
953 | |
954 | /* define parameters */ |
955 | while ((sym = sym->next) != NULL) { |
956 | type = &sym->type; |
957 | bt = type->t & VT_BTYPE; |
958 | size = gfunc_arg_size(type); |
959 | if (!using_regs(size)) { |
960 | if (reg_param_index < REGN) { |
961 | gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); |
962 | } |
963 | sym_push(sym->v & ~SYM_FIELD, type, |
964 | VT_LLOCAL | VT_LVAL, addr); |
965 | } else { |
966 | if (reg_param_index < REGN) { |
967 | /* save arguments passed by register */ |
968 | if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) { |
969 | if (tcc_state->nosse) |
970 | tcc_error("SSE disabled" ); |
971 | o(0xd60f66); /* movq */ |
972 | gen_modrm(reg_param_index, VT_LOCAL, NULL, addr); |
973 | } else { |
974 | gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); |
975 | } |
976 | } |
977 | sym_push(sym->v & ~SYM_FIELD, type, |
978 | VT_LOCAL | VT_LVAL, addr); |
979 | } |
980 | addr += 8; |
981 | reg_param_index++; |
982 | } |
983 | |
984 | while (reg_param_index < REGN) { |
985 | if (func_var) { |
986 | gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); |
987 | addr += 8; |
988 | } |
989 | reg_param_index++; |
990 | } |
991 | #ifdef CONFIG_TCC_BCHECK |
992 | if (tcc_state->do_bounds_check) |
993 | gen_bounds_prolog(); |
994 | #endif |
995 | } |
996 | |
997 | /* generate function epilog */ |
998 | void gfunc_epilog(void) |
999 | { |
1000 | int v, saved_ind; |
1001 | |
1002 | /* align local size to word & save local variables */ |
1003 | func_scratch = (func_scratch + 15) & -16; |
1004 | loc = (loc & -16) - func_scratch; |
1005 | |
1006 | #ifdef CONFIG_TCC_BCHECK |
1007 | if (tcc_state->do_bounds_check) |
1008 | gen_bounds_epilog(); |
1009 | #endif |
1010 | |
1011 | o(0xc9); /* leave */ |
1012 | if (func_ret_sub == 0) { |
1013 | o(0xc3); /* ret */ |
1014 | } else { |
1015 | o(0xc2); /* ret n */ |
1016 | g(func_ret_sub); |
1017 | g(func_ret_sub >> 8); |
1018 | } |
1019 | |
1020 | saved_ind = ind; |
1021 | ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; |
1022 | v = -loc; |
1023 | |
1024 | if (v >= 4096) { |
1025 | Sym *sym = external_global_sym(TOK___chkstk, &func_old_type); |
1026 | oad(0xb8, v); /* mov stacksize, %eax */ |
1027 | oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */ |
1028 | greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4); |
1029 | o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */ |
1030 | } else { |
1031 | o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ |
1032 | o(0xec8148); /* sub rsp, stacksize */ |
1033 | gen_le32(v); |
1034 | } |
1035 | |
1036 | /* add the "func_scratch" area after each alloca seen */ |
1037 | gsym_addr(func_alloca, -func_scratch); |
1038 | |
1039 | cur_text_section->data_offset = saved_ind; |
1040 | pe_add_unwind_data(ind, saved_ind, v); |
1041 | ind = cur_text_section->data_offset; |
1042 | } |
1043 | |
1044 | #else |
1045 | |
1046 | static void gadd_sp(int val) |
1047 | { |
1048 | if (val == (char)val) { |
1049 | o(0xc48348); |
1050 | g(val); |
1051 | } else { |
1052 | oad(0xc48148, val); /* add $xxx, %rsp */ |
1053 | } |
1054 | } |
1055 | |
1056 | typedef enum X86_64_Mode { |
1057 | x86_64_mode_none, |
1058 | x86_64_mode_memory, |
1059 | x86_64_mode_integer, |
1060 | x86_64_mode_sse, |
1061 | x86_64_mode_x87 |
1062 | } X86_64_Mode; |
1063 | |
1064 | static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) |
1065 | { |
1066 | if (a == b) |
1067 | return a; |
1068 | else if (a == x86_64_mode_none) |
1069 | return b; |
1070 | else if (b == x86_64_mode_none) |
1071 | return a; |
1072 | else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory)) |
1073 | return x86_64_mode_memory; |
1074 | else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer)) |
1075 | return x86_64_mode_integer; |
1076 | else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87)) |
1077 | return x86_64_mode_memory; |
1078 | else |
1079 | return x86_64_mode_sse; |
1080 | } |
1081 | |
1082 | static X86_64_Mode classify_x86_64_inner(CType *ty) |
1083 | { |
1084 | X86_64_Mode mode; |
1085 | Sym *f; |
1086 | |
1087 | switch (ty->t & VT_BTYPE) { |
1088 | case VT_VOID: return x86_64_mode_none; |
1089 | |
1090 | case VT_INT: |
1091 | case VT_BYTE: |
1092 | case VT_SHORT: |
1093 | case VT_LLONG: |
1094 | case VT_BOOL: |
1095 | case VT_PTR: |
1096 | case VT_FUNC: |
1097 | return x86_64_mode_integer; |
1098 | |
1099 | case VT_FLOAT: |
1100 | case VT_DOUBLE: return x86_64_mode_sse; |
1101 | |
1102 | case VT_LDOUBLE: return x86_64_mode_x87; |
1103 | |
1104 | case VT_STRUCT: |
1105 | f = ty->ref; |
1106 | |
1107 | mode = x86_64_mode_none; |
1108 | for (f = f->next; f; f = f->next) |
1109 | mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type)); |
1110 | |
1111 | return mode; |
1112 | } |
1113 | assert(0); |
1114 | return 0; |
1115 | } |
1116 | |
1117 | static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count) |
1118 | { |
1119 | X86_64_Mode mode; |
1120 | int size, align, ret_t = 0; |
1121 | |
1122 | if (ty->t & (VT_BITFIELD|VT_ARRAY)) { |
1123 | *psize = 8; |
1124 | *palign = 8; |
1125 | *reg_count = 1; |
1126 | ret_t = ty->t; |
1127 | mode = x86_64_mode_integer; |
1128 | } else { |
1129 | size = type_size(ty, &align); |
1130 | *psize = (size + 7) & ~7; |
1131 | *palign = (align + 7) & ~7; |
1132 | |
1133 | if (size > 16) { |
1134 | mode = x86_64_mode_memory; |
1135 | } else { |
1136 | mode = classify_x86_64_inner(ty); |
1137 | switch (mode) { |
1138 | case x86_64_mode_integer: |
1139 | if (size > 8) { |
1140 | *reg_count = 2; |
1141 | ret_t = VT_QLONG; |
1142 | } else { |
1143 | *reg_count = 1; |
1144 | if (size > 4) |
1145 | ret_t = VT_LLONG; |
1146 | else if (size > 2) |
1147 | ret_t = VT_INT; |
1148 | else if (size > 1) |
1149 | ret_t = VT_SHORT; |
1150 | else |
1151 | ret_t = VT_BYTE; |
1152 | if ((ty->t & VT_BTYPE) == VT_STRUCT || (ty->t & VT_UNSIGNED)) |
1153 | ret_t |= VT_UNSIGNED; |
1154 | } |
1155 | break; |
1156 | |
1157 | case x86_64_mode_x87: |
1158 | *reg_count = 1; |
1159 | ret_t = VT_LDOUBLE; |
1160 | break; |
1161 | |
1162 | case x86_64_mode_sse: |
1163 | if (size > 8) { |
1164 | *reg_count = 2; |
1165 | ret_t = VT_QFLOAT; |
1166 | } else { |
1167 | *reg_count = 1; |
1168 | ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT; |
1169 | } |
1170 | break; |
1171 | default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/ |
1172 | } |
1173 | } |
1174 | } |
1175 | |
1176 | if (ret) { |
1177 | ret->ref = NULL; |
1178 | ret->t = ret_t; |
1179 | } |
1180 | |
1181 | return mode; |
1182 | } |
1183 | |
1184 | ST_FUNC int classify_x86_64_va_arg(CType *ty) |
1185 | { |
1186 | /* This definition must be synced with stdarg.h */ |
1187 | enum __va_arg_type { |
1188 | __va_gen_reg, __va_float_reg, __va_stack |
1189 | }; |
1190 | int size, align, reg_count; |
1191 | X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, ®_count); |
1192 | switch (mode) { |
1193 | default: return __va_stack; |
1194 | case x86_64_mode_integer: return __va_gen_reg; |
1195 | case x86_64_mode_sse: return __va_float_reg; |
1196 | } |
1197 | } |
1198 | |
1199 | /* Return the number of registers needed to return the struct, or 0 if |
1200 | returning via struct pointer. */ |
1201 | ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) |
1202 | { |
1203 | int size, align, reg_count; |
1204 | *ret_align = 1; // Never have to re-align return values for x86-64 |
1205 | *regsize = 8; |
1206 | return (classify_x86_64_arg(vt, ret, &size, &align, ®_count) != x86_64_mode_memory); |
1207 | } |
1208 | |
1209 | #define REGN 6 |
1210 | static const uint8_t arg_regs[REGN] = { |
1211 | TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9 |
1212 | }; |
1213 | |
1214 | static int arg_prepare_reg(int idx) { |
1215 | if (idx == 2 || idx == 3) |
1216 | /* idx=2: r10, idx=3: r11 */ |
1217 | return idx + 8; |
1218 | else |
1219 | return arg_regs[idx]; |
1220 | } |
1221 | |
1222 | /* Generate function call. The function address is pushed first, then |
1223 | all the parameters in call order. This functions pops all the |
1224 | parameters and the function address. */ |
1225 | void gfunc_call(int nb_args) |
1226 | { |
1227 | X86_64_Mode mode; |
1228 | CType type; |
1229 | int size, align, r, args_size, stack_adjust, i, reg_count, k; |
1230 | int nb_reg_args = 0; |
1231 | int nb_sse_args = 0; |
1232 | int sse_reg, gen_reg; |
1233 | char *onstack = tcc_malloc((nb_args + 1) * sizeof (char)); |
1234 | |
1235 | #ifdef CONFIG_TCC_BCHECK |
1236 | if (tcc_state->do_bounds_check) |
1237 | gbound_args(nb_args); |
1238 | #endif |
1239 | |
1240 | /* calculate the number of integer/float register arguments, remember |
1241 | arguments to be passed via stack (in onstack[]), and also remember |
1242 | if we have to align the stack pointer to 16 (onstack[i] == 2). Needs |
1243 | to be done in a left-to-right pass over arguments. */ |
1244 | stack_adjust = 0; |
1245 | for(i = nb_args - 1; i >= 0; i--) { |
1246 | mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); |
1247 | if (size == 0) continue; |
1248 | if (mode == x86_64_mode_sse && nb_sse_args + reg_count <= 8) { |
1249 | nb_sse_args += reg_count; |
1250 | onstack[i] = 0; |
1251 | } else if (mode == x86_64_mode_integer && nb_reg_args + reg_count <= REGN) { |
1252 | nb_reg_args += reg_count; |
1253 | onstack[i] = 0; |
1254 | } else if (mode == x86_64_mode_none) { |
1255 | onstack[i] = 0; |
1256 | } else { |
1257 | if (align == 16 && (stack_adjust &= 15)) { |
1258 | onstack[i] = 2; |
1259 | stack_adjust = 0; |
1260 | } else |
1261 | onstack[i] = 1; |
1262 | stack_adjust += size; |
1263 | } |
1264 | } |
1265 | |
1266 | if (nb_sse_args && tcc_state->nosse) |
1267 | tcc_error("SSE disabled but floating point arguments passed" ); |
1268 | |
1269 | /* fetch cpu flag before generating any code */ |
1270 | if ((vtop->r & VT_VALMASK) == VT_CMP) |
1271 | gv(RC_INT); |
1272 | |
1273 | /* for struct arguments, we need to call memcpy and the function |
1274 | call breaks register passing arguments we are preparing. |
1275 | So, we process arguments which will be passed by stack first. */ |
1276 | gen_reg = nb_reg_args; |
1277 | sse_reg = nb_sse_args; |
1278 | args_size = 0; |
1279 | stack_adjust &= 15; |
1280 | for (i = k = 0; i < nb_args;) { |
1281 | mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); |
1282 | if (size) { |
1283 | if (!onstack[i + k]) { |
1284 | ++i; |
1285 | continue; |
1286 | } |
1287 | /* Possibly adjust stack to align SSE boundary. We're processing |
1288 | args from right to left while allocating happens left to right |
1289 | (stack grows down), so the adjustment needs to happen _after_ |
1290 | an argument that requires it. */ |
1291 | if (stack_adjust) { |
1292 | o(0x50); /* push %rax; aka sub $8,%rsp */ |
1293 | args_size += 8; |
1294 | stack_adjust = 0; |
1295 | } |
1296 | if (onstack[i + k] == 2) |
1297 | stack_adjust = 1; |
1298 | } |
1299 | |
1300 | vrotb(i+1); |
1301 | |
1302 | switch (vtop->type.t & VT_BTYPE) { |
1303 | case VT_STRUCT: |
1304 | /* allocate the necessary size on stack */ |
1305 | o(0x48); |
1306 | oad(0xec81, size); /* sub $xxx, %rsp */ |
1307 | /* generate structure store */ |
1308 | r = get_reg(RC_INT); |
1309 | orex(1, r, 0, 0x89); /* mov %rsp, r */ |
1310 | o(0xe0 + REG_VALUE(r)); |
1311 | vset(&vtop->type, r | VT_LVAL, 0); |
1312 | vswap(); |
1313 | vstore(); |
1314 | break; |
1315 | |
1316 | case VT_LDOUBLE: |
1317 | gv(RC_ST0); |
1318 | oad(0xec8148, size); /* sub $xxx, %rsp */ |
1319 | o(0x7cdb); /* fstpt 0(%rsp) */ |
1320 | g(0x24); |
1321 | g(0x00); |
1322 | break; |
1323 | |
1324 | case VT_FLOAT: |
1325 | case VT_DOUBLE: |
1326 | assert(mode == x86_64_mode_sse); |
1327 | r = gv(RC_FLOAT); |
1328 | o(0x50); /* push $rax */ |
1329 | /* movq %xmmN, (%rsp) */ |
1330 | o(0xd60f66); |
1331 | o(0x04 + REG_VALUE(r)*8); |
1332 | o(0x24); |
1333 | break; |
1334 | |
1335 | default: |
1336 | assert(mode == x86_64_mode_integer); |
1337 | /* simple type */ |
1338 | /* XXX: implicit cast ? */ |
1339 | r = gv(RC_INT); |
1340 | orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */ |
1341 | break; |
1342 | } |
1343 | args_size += size; |
1344 | |
1345 | vpop(); |
1346 | --nb_args; |
1347 | k++; |
1348 | } |
1349 | |
1350 | tcc_free(onstack); |
1351 | |
1352 | /* XXX This should be superfluous. */ |
1353 | save_regs(0); /* save used temporary registers */ |
1354 | |
1355 | /* then, we prepare register passing arguments. |
1356 | Note that we cannot set RDX and RCX in this loop because gv() |
1357 | may break these temporary registers. Let's use R10 and R11 |
1358 | instead of them */ |
1359 | assert(gen_reg <= REGN); |
1360 | assert(sse_reg <= 8); |
1361 | for(i = 0; i < nb_args; i++) { |
1362 | mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, ®_count); |
1363 | if (size == 0) continue; |
1364 | /* Alter stack entry type so that gv() knows how to treat it */ |
1365 | vtop->type = type; |
1366 | if (mode == x86_64_mode_sse) { |
1367 | if (reg_count == 2) { |
1368 | sse_reg -= 2; |
1369 | gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */ |
1370 | if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */ |
1371 | /* movaps %xmm1, %xmmN */ |
1372 | o(0x280f); |
1373 | o(0xc1 + ((sse_reg+1) << 3)); |
1374 | /* movaps %xmm0, %xmmN */ |
1375 | o(0x280f); |
1376 | o(0xc0 + (sse_reg << 3)); |
1377 | } |
1378 | } else { |
1379 | assert(reg_count == 1); |
1380 | --sse_reg; |
1381 | /* Load directly to register */ |
1382 | gv(RC_XMM0 << sse_reg); |
1383 | } |
1384 | } else if (mode == x86_64_mode_integer) { |
1385 | /* simple type */ |
1386 | /* XXX: implicit cast ? */ |
1387 | int d; |
1388 | gen_reg -= reg_count; |
1389 | r = gv(RC_INT); |
1390 | d = arg_prepare_reg(gen_reg); |
1391 | orex(1,d,r,0x89); /* mov */ |
1392 | o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); |
1393 | if (reg_count == 2) { |
1394 | d = arg_prepare_reg(gen_reg+1); |
1395 | orex(1,d,vtop->r2,0x89); /* mov */ |
1396 | o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d)); |
1397 | } |
1398 | } |
1399 | vtop--; |
1400 | } |
1401 | assert(gen_reg == 0); |
1402 | assert(sse_reg == 0); |
1403 | |
1404 | /* We shouldn't have many operands on the stack anymore, but the |
1405 | call address itself is still there, and it might be in %eax |
1406 | (or edx/ecx) currently, which the below writes would clobber. |
1407 | So evict all remaining operands here. */ |
1408 | save_regs(0); |
1409 | |
1410 | /* Copy R10 and R11 into RDX and RCX, respectively */ |
1411 | if (nb_reg_args > 2) { |
1412 | o(0xd2894c); /* mov %r10, %rdx */ |
1413 | if (nb_reg_args > 3) { |
1414 | o(0xd9894c); /* mov %r11, %rcx */ |
1415 | } |
1416 | } |
1417 | |
1418 | if (vtop->type.ref->f.func_type != FUNC_NEW) /* implies FUNC_OLD or FUNC_ELLIPSIS */ |
1419 | oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */ |
1420 | gcall_or_jmp(0); |
1421 | if (args_size) |
1422 | gadd_sp(args_size); |
1423 | vtop--; |
1424 | } |
1425 | |
1426 | #define FUNC_PROLOG_SIZE 11 |
1427 | |
1428 | static void push_arg_reg(int i) { |
1429 | loc -= 8; |
1430 | gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc); |
1431 | } |
1432 | |
1433 | /* generate function prolog of type 't' */ |
1434 | void gfunc_prolog(Sym *func_sym) |
1435 | { |
1436 | CType *func_type = &func_sym->type; |
1437 | X86_64_Mode mode; |
1438 | int i, addr, align, size, reg_count; |
1439 | int param_addr = 0, reg_param_index, sse_param_index; |
1440 | Sym *sym; |
1441 | CType *type; |
1442 | |
1443 | sym = func_type->ref; |
1444 | addr = PTR_SIZE * 2; |
1445 | loc = 0; |
1446 | ind += FUNC_PROLOG_SIZE; |
1447 | func_sub_sp_offset = ind; |
1448 | func_ret_sub = 0; |
1449 | |
1450 | if (func_var) { |
1451 | int seen_reg_num, seen_sse_num, seen_stack_size; |
1452 | seen_reg_num = seen_sse_num = 0; |
1453 | /* frame pointer and return address */ |
1454 | seen_stack_size = PTR_SIZE * 2; |
1455 | /* count the number of seen parameters */ |
1456 | sym = func_type->ref; |
1457 | while ((sym = sym->next) != NULL) { |
1458 | type = &sym->type; |
1459 | mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count); |
1460 | switch (mode) { |
1461 | default: |
1462 | stack_arg: |
1463 | seen_stack_size = ((seen_stack_size + align - 1) & -align) + size; |
1464 | break; |
1465 | |
1466 | case x86_64_mode_integer: |
1467 | if (seen_reg_num + reg_count > REGN) |
1468 | goto stack_arg; |
1469 | seen_reg_num += reg_count; |
1470 | break; |
1471 | |
1472 | case x86_64_mode_sse: |
1473 | if (seen_sse_num + reg_count > 8) |
1474 | goto stack_arg; |
1475 | seen_sse_num += reg_count; |
1476 | break; |
1477 | } |
1478 | } |
1479 | |
1480 | loc -= 24; |
1481 | /* movl $0x????????, -0x18(%rbp) */ |
1482 | o(0xe845c7); |
1483 | gen_le32(seen_reg_num * 8); |
1484 | /* movl $0x????????, -0x14(%rbp) */ |
1485 | o(0xec45c7); |
1486 | gen_le32(seen_sse_num * 16 + 48); |
1487 | /* leaq $0x????????, %r11 */ |
1488 | o(0x9d8d4c); |
1489 | gen_le32(seen_stack_size); |
1490 | /* movq %r11, -0x10(%rbp) */ |
1491 | o(0xf05d894c); |
1492 | /* leaq $-192(%rbp), %r11 */ |
1493 | o(0x9d8d4c); |
1494 | gen_le32(-176 - 24); |
1495 | /* movq %r11, -0x8(%rbp) */ |
1496 | o(0xf85d894c); |
1497 | |
1498 | /* save all register passing arguments */ |
1499 | for (i = 0; i < 8; i++) { |
1500 | loc -= 16; |
1501 | if (!tcc_state->nosse) { |
1502 | o(0xd60f66); /* movq */ |
1503 | gen_modrm(7 - i, VT_LOCAL, NULL, loc); |
1504 | } |
1505 | /* movq $0, loc+8(%rbp) */ |
1506 | o(0x85c748); |
1507 | gen_le32(loc + 8); |
1508 | gen_le32(0); |
1509 | } |
1510 | for (i = 0; i < REGN; i++) { |
1511 | push_arg_reg(REGN-1-i); |
1512 | } |
1513 | } |
1514 | |
1515 | sym = func_type->ref; |
1516 | reg_param_index = 0; |
1517 | sse_param_index = 0; |
1518 | |
1519 | /* if the function returns a structure, then add an |
1520 | implicit pointer parameter */ |
1521 | mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, ®_count); |
1522 | if (mode == x86_64_mode_memory) { |
1523 | push_arg_reg(reg_param_index); |
1524 | func_vc = loc; |
1525 | reg_param_index++; |
1526 | } |
1527 | /* define parameters */ |
1528 | while ((sym = sym->next) != NULL) { |
1529 | type = &sym->type; |
1530 | mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count); |
1531 | switch (mode) { |
1532 | case x86_64_mode_sse: |
1533 | if (tcc_state->nosse) |
1534 | tcc_error("SSE disabled but floating point arguments used" ); |
1535 | if (sse_param_index + reg_count <= 8) { |
1536 | /* save arguments passed by register */ |
1537 | loc -= reg_count * 8; |
1538 | param_addr = loc; |
1539 | for (i = 0; i < reg_count; ++i) { |
1540 | o(0xd60f66); /* movq */ |
1541 | gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8); |
1542 | ++sse_param_index; |
1543 | } |
1544 | } else { |
1545 | addr = (addr + align - 1) & -align; |
1546 | param_addr = addr; |
1547 | addr += size; |
1548 | } |
1549 | break; |
1550 | |
1551 | case x86_64_mode_memory: |
1552 | case x86_64_mode_x87: |
1553 | addr = (addr + align - 1) & -align; |
1554 | param_addr = addr; |
1555 | addr += size; |
1556 | break; |
1557 | |
1558 | case x86_64_mode_integer: { |
1559 | if (reg_param_index + reg_count <= REGN) { |
1560 | /* save arguments passed by register */ |
1561 | loc -= reg_count * 8; |
1562 | param_addr = loc; |
1563 | for (i = 0; i < reg_count; ++i) { |
1564 | gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8); |
1565 | ++reg_param_index; |
1566 | } |
1567 | } else { |
1568 | addr = (addr + align - 1) & -align; |
1569 | param_addr = addr; |
1570 | addr += size; |
1571 | } |
1572 | break; |
1573 | } |
1574 | default: break; /* nothing to be done for x86_64_mode_none */ |
1575 | } |
1576 | sym_push(sym->v & ~SYM_FIELD, type, |
1577 | VT_LOCAL | VT_LVAL, param_addr); |
1578 | } |
1579 | |
1580 | #ifdef CONFIG_TCC_BCHECK |
1581 | if (tcc_state->do_bounds_check) |
1582 | gen_bounds_prolog(); |
1583 | #endif |
1584 | } |
1585 | |
1586 | /* generate function epilog */ |
1587 | void gfunc_epilog(void) |
1588 | { |
1589 | int v, saved_ind; |
1590 | |
1591 | #ifdef CONFIG_TCC_BCHECK |
1592 | if (tcc_state->do_bounds_check) |
1593 | gen_bounds_epilog(); |
1594 | #endif |
1595 | o(0xc9); /* leave */ |
1596 | if (func_ret_sub == 0) { |
1597 | o(0xc3); /* ret */ |
1598 | } else { |
1599 | o(0xc2); /* ret n */ |
1600 | g(func_ret_sub); |
1601 | g(func_ret_sub >> 8); |
1602 | } |
1603 | /* align local size to word & save local variables */ |
1604 | v = (-loc + 15) & -16; |
1605 | saved_ind = ind; |
1606 | ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; |
1607 | o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ |
1608 | o(0xec8148); /* sub rsp, stacksize */ |
1609 | gen_le32(v); |
1610 | ind = saved_ind; |
1611 | } |
1612 | |
1613 | #endif /* not PE */ |
1614 | |
1615 | ST_FUNC void gen_fill_nops(int bytes) |
1616 | { |
1617 | while (bytes--) |
1618 | g(0x90); |
1619 | } |
1620 | |
1621 | /* generate a jump to a label */ |
1622 | int gjmp(int t) |
1623 | { |
1624 | return gjmp2(0xe9, t); |
1625 | } |
1626 | |
1627 | /* generate a jump to a fixed address */ |
1628 | void gjmp_addr(int a) |
1629 | { |
1630 | int r; |
1631 | r = a - ind - 2; |
1632 | if (r == (char)r) { |
1633 | g(0xeb); |
1634 | g(r); |
1635 | } else { |
1636 | oad(0xe9, a - ind - 5); |
1637 | } |
1638 | } |
1639 | |
1640 | ST_FUNC int gjmp_append(int n, int t) |
1641 | { |
1642 | void *p; |
1643 | /* insert vtop->c jump list in t */ |
1644 | if (n) { |
1645 | uint32_t n1 = n, n2; |
1646 | while ((n2 = read32le(p = cur_text_section->data + n1))) |
1647 | n1 = n2; |
1648 | write32le(p, t); |
1649 | t = n; |
1650 | } |
1651 | return t; |
1652 | } |
1653 | |
1654 | ST_FUNC int gjmp_cond(int op, int t) |
1655 | { |
1656 | if (op & 0x100) |
1657 | { |
1658 | /* This was a float compare. If the parity flag is set |
1659 | the result was unordered. For anything except != this |
1660 | means false and we don't jump (anding both conditions). |
1661 | For != this means true (oring both). |
1662 | Take care about inverting the test. We need to jump |
1663 | to our target if the result was unordered and test wasn't NE, |
1664 | otherwise if unordered we don't want to jump. */ |
1665 | int v = vtop->cmp_r; |
1666 | op &= ~0x100; |
1667 | if (op ^ v ^ (v != TOK_NE)) |
1668 | o(0x067a); /* jp +6 */ |
1669 | else |
1670 | { |
1671 | g(0x0f); |
1672 | t = gjmp2(0x8a, t); /* jp t */ |
1673 | } |
1674 | } |
1675 | g(0x0f); |
1676 | t = gjmp2(op - 16, t); |
1677 | return t; |
1678 | } |
1679 | |
1680 | /* generate an integer binary operation */ |
1681 | void gen_opi(int op) |
1682 | { |
1683 | int r, fr, opc, c; |
1684 | int ll, uu, cc; |
1685 | |
1686 | ll = is64_type(vtop[-1].type.t); |
1687 | uu = (vtop[-1].type.t & VT_UNSIGNED) != 0; |
1688 | cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST; |
1689 | |
1690 | switch(op) { |
1691 | case '+': |
1692 | case TOK_ADDC1: /* add with carry generation */ |
1693 | opc = 0; |
1694 | gen_op8: |
1695 | if (cc && (!ll || (int)vtop->c.i == vtop->c.i)) { |
1696 | /* constant case */ |
1697 | vswap(); |
1698 | r = gv(RC_INT); |
1699 | vswap(); |
1700 | c = vtop->c.i; |
1701 | if (c == (char)c) { |
1702 | /* XXX: generate inc and dec for smaller code ? */ |
1703 | orex(ll, r, 0, 0x83); |
1704 | o(0xc0 | (opc << 3) | REG_VALUE(r)); |
1705 | g(c); |
1706 | } else { |
1707 | orex(ll, r, 0, 0x81); |
1708 | oad(0xc0 | (opc << 3) | REG_VALUE(r), c); |
1709 | } |
1710 | } else { |
1711 | gv2(RC_INT, RC_INT); |
1712 | r = vtop[-1].r; |
1713 | fr = vtop[0].r; |
1714 | orex(ll, r, fr, (opc << 3) | 0x01); |
1715 | o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8); |
1716 | } |
1717 | vtop--; |
1718 | if (op >= TOK_ULT && op <= TOK_GT) |
1719 | vset_VT_CMP(op); |
1720 | break; |
1721 | case '-': |
1722 | case TOK_SUBC1: /* sub with carry generation */ |
1723 | opc = 5; |
1724 | goto gen_op8; |
1725 | case TOK_ADDC2: /* add with carry use */ |
1726 | opc = 2; |
1727 | goto gen_op8; |
1728 | case TOK_SUBC2: /* sub with carry use */ |
1729 | opc = 3; |
1730 | goto gen_op8; |
1731 | case '&': |
1732 | opc = 4; |
1733 | goto gen_op8; |
1734 | case '^': |
1735 | opc = 6; |
1736 | goto gen_op8; |
1737 | case '|': |
1738 | opc = 1; |
1739 | goto gen_op8; |
1740 | case '*': |
1741 | gv2(RC_INT, RC_INT); |
1742 | r = vtop[-1].r; |
1743 | fr = vtop[0].r; |
1744 | orex(ll, fr, r, 0xaf0f); /* imul fr, r */ |
1745 | o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8); |
1746 | vtop--; |
1747 | break; |
1748 | case TOK_SHL: |
1749 | opc = 4; |
1750 | goto gen_shift; |
1751 | case TOK_SHR: |
1752 | opc = 5; |
1753 | goto gen_shift; |
1754 | case TOK_SAR: |
1755 | opc = 7; |
1756 | gen_shift: |
1757 | opc = 0xc0 | (opc << 3); |
1758 | if (cc) { |
1759 | /* constant case */ |
1760 | vswap(); |
1761 | r = gv(RC_INT); |
1762 | vswap(); |
1763 | orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */ |
1764 | o(opc | REG_VALUE(r)); |
1765 | g(vtop->c.i & (ll ? 63 : 31)); |
1766 | } else { |
1767 | /* we generate the shift in ecx */ |
1768 | gv2(RC_INT, RC_RCX); |
1769 | r = vtop[-1].r; |
1770 | orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */ |
1771 | o(opc | REG_VALUE(r)); |
1772 | } |
1773 | vtop--; |
1774 | break; |
1775 | case TOK_UDIV: |
1776 | case TOK_UMOD: |
1777 | uu = 1; |
1778 | goto divmod; |
1779 | case '/': |
1780 | case '%': |
1781 | case TOK_PDIV: |
1782 | uu = 0; |
1783 | divmod: |
1784 | /* first operand must be in eax */ |
1785 | /* XXX: need better constraint for second operand */ |
1786 | gv2(RC_RAX, RC_RCX); |
1787 | r = vtop[-1].r; |
1788 | fr = vtop[0].r; |
1789 | vtop--; |
1790 | save_reg(TREG_RDX); |
1791 | orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */ |
1792 | orex(ll, fr, 0, 0xf7); /* div fr, %eax */ |
1793 | o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr)); |
1794 | if (op == '%' || op == TOK_UMOD) |
1795 | r = TREG_RDX; |
1796 | else |
1797 | r = TREG_RAX; |
1798 | vtop->r = r; |
1799 | break; |
1800 | default: |
1801 | opc = 7; |
1802 | goto gen_op8; |
1803 | } |
1804 | } |
1805 | |
1806 | void gen_opl(int op) |
1807 | { |
1808 | gen_opi(op); |
1809 | } |
1810 | |
1811 | /* generate a floating point operation 'v = t1 op t2' instruction. The |
1812 | two operands are guaranteed to have the same floating point type */ |
1813 | /* XXX: need to use ST1 too */ |
1814 | void gen_opf(int op) |
1815 | { |
1816 | int a, ft, fc, swapped, r; |
1817 | int float_type = |
1818 | (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT; |
1819 | |
1820 | /* convert constants to memory references */ |
1821 | if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { |
1822 | vswap(); |
1823 | gv(float_type); |
1824 | vswap(); |
1825 | } |
1826 | if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) |
1827 | gv(float_type); |
1828 | |
1829 | /* must put at least one value in the floating point register */ |
1830 | if ((vtop[-1].r & VT_LVAL) && |
1831 | (vtop[0].r & VT_LVAL)) { |
1832 | vswap(); |
1833 | gv(float_type); |
1834 | vswap(); |
1835 | } |
1836 | swapped = 0; |
1837 | /* swap the stack if needed so that t1 is the register and t2 is |
1838 | the memory reference */ |
1839 | if (vtop[-1].r & VT_LVAL) { |
1840 | vswap(); |
1841 | swapped = 1; |
1842 | } |
1843 | if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { |
1844 | if (op >= TOK_ULT && op <= TOK_GT) { |
1845 | /* load on stack second operand */ |
1846 | load(TREG_ST0, vtop); |
1847 | save_reg(TREG_RAX); /* eax is used by FP comparison code */ |
1848 | if (op == TOK_GE || op == TOK_GT) |
1849 | swapped = !swapped; |
1850 | else if (op == TOK_EQ || op == TOK_NE) |
1851 | swapped = 0; |
1852 | if (swapped) |
1853 | o(0xc9d9); /* fxch %st(1) */ |
1854 | if (op == TOK_EQ || op == TOK_NE) |
1855 | o(0xe9da); /* fucompp */ |
1856 | else |
1857 | o(0xd9de); /* fcompp */ |
1858 | o(0xe0df); /* fnstsw %ax */ |
1859 | if (op == TOK_EQ) { |
1860 | o(0x45e480); /* and $0x45, %ah */ |
1861 | o(0x40fC80); /* cmp $0x40, %ah */ |
1862 | } else if (op == TOK_NE) { |
1863 | o(0x45e480); /* and $0x45, %ah */ |
1864 | o(0x40f480); /* xor $0x40, %ah */ |
1865 | op = TOK_NE; |
1866 | } else if (op == TOK_GE || op == TOK_LE) { |
1867 | o(0x05c4f6); /* test $0x05, %ah */ |
1868 | op = TOK_EQ; |
1869 | } else { |
1870 | o(0x45c4f6); /* test $0x45, %ah */ |
1871 | op = TOK_EQ; |
1872 | } |
1873 | vtop--; |
1874 | vset_VT_CMP(op); |
1875 | } else { |
1876 | /* no memory reference possible for long double operations */ |
1877 | load(TREG_ST0, vtop); |
1878 | swapped = !swapped; |
1879 | |
1880 | switch(op) { |
1881 | default: |
1882 | case '+': |
1883 | a = 0; |
1884 | break; |
1885 | case '-': |
1886 | a = 4; |
1887 | if (swapped) |
1888 | a++; |
1889 | break; |
1890 | case '*': |
1891 | a = 1; |
1892 | break; |
1893 | case '/': |
1894 | a = 6; |
1895 | if (swapped) |
1896 | a++; |
1897 | break; |
1898 | } |
1899 | ft = vtop->type.t; |
1900 | fc = vtop->c.i; |
1901 | o(0xde); /* fxxxp %st, %st(1) */ |
1902 | o(0xc1 + (a << 3)); |
1903 | vtop--; |
1904 | } |
1905 | } else { |
1906 | if (op >= TOK_ULT && op <= TOK_GT) { |
1907 | /* if saved lvalue, then we must reload it */ |
1908 | r = vtop->r; |
1909 | fc = vtop->c.i; |
1910 | if ((r & VT_VALMASK) == VT_LLOCAL) { |
1911 | SValue v1; |
1912 | r = get_reg(RC_INT); |
1913 | v1.type.t = VT_PTR; |
1914 | v1.r = VT_LOCAL | VT_LVAL; |
1915 | v1.c.i = fc; |
1916 | load(r, &v1); |
1917 | fc = 0; |
1918 | vtop->r = r = r | VT_LVAL; |
1919 | } |
1920 | |
1921 | if (op == TOK_EQ || op == TOK_NE) { |
1922 | swapped = 0; |
1923 | } else { |
1924 | if (op == TOK_LE || op == TOK_LT) |
1925 | swapped = !swapped; |
1926 | if (op == TOK_LE || op == TOK_GE) { |
1927 | op = 0x93; /* setae */ |
1928 | } else { |
1929 | op = 0x97; /* seta */ |
1930 | } |
1931 | } |
1932 | |
1933 | if (swapped) { |
1934 | gv(RC_FLOAT); |
1935 | vswap(); |
1936 | } |
1937 | assert(!(vtop[-1].r & VT_LVAL)); |
1938 | |
1939 | if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) |
1940 | o(0x66); |
1941 | if (op == TOK_EQ || op == TOK_NE) |
1942 | o(0x2e0f); /* ucomisd */ |
1943 | else |
1944 | o(0x2f0f); /* comisd */ |
1945 | |
1946 | if (vtop->r & VT_LVAL) { |
1947 | gen_modrm(vtop[-1].r, r, vtop->sym, fc); |
1948 | } else { |
1949 | o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8); |
1950 | } |
1951 | |
1952 | vtop--; |
1953 | vset_VT_CMP(op | 0x100); |
1954 | vtop->cmp_r = op; |
1955 | } else { |
1956 | assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE); |
1957 | switch(op) { |
1958 | default: |
1959 | case '+': |
1960 | a = 0; |
1961 | break; |
1962 | case '-': |
1963 | a = 4; |
1964 | break; |
1965 | case '*': |
1966 | a = 1; |
1967 | break; |
1968 | case '/': |
1969 | a = 6; |
1970 | break; |
1971 | } |
1972 | ft = vtop->type.t; |
1973 | fc = vtop->c.i; |
1974 | assert((ft & VT_BTYPE) != VT_LDOUBLE); |
1975 | |
1976 | r = vtop->r; |
1977 | /* if saved lvalue, then we must reload it */ |
1978 | if ((vtop->r & VT_VALMASK) == VT_LLOCAL) { |
1979 | SValue v1; |
1980 | r = get_reg(RC_INT); |
1981 | v1.type.t = VT_PTR; |
1982 | v1.r = VT_LOCAL | VT_LVAL; |
1983 | v1.c.i = fc; |
1984 | load(r, &v1); |
1985 | fc = 0; |
1986 | vtop->r = r = r | VT_LVAL; |
1987 | } |
1988 | |
1989 | assert(!(vtop[-1].r & VT_LVAL)); |
1990 | if (swapped) { |
1991 | assert(vtop->r & VT_LVAL); |
1992 | gv(RC_FLOAT); |
1993 | vswap(); |
1994 | } |
1995 | |
1996 | if ((ft & VT_BTYPE) == VT_DOUBLE) { |
1997 | o(0xf2); |
1998 | } else { |
1999 | o(0xf3); |
2000 | } |
2001 | o(0x0f); |
2002 | o(0x58 + a); |
2003 | |
2004 | if (vtop->r & VT_LVAL) { |
2005 | gen_modrm(vtop[-1].r, r, vtop->sym, fc); |
2006 | } else { |
2007 | o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8); |
2008 | } |
2009 | |
2010 | vtop--; |
2011 | } |
2012 | } |
2013 | } |
2014 | |
2015 | /* convert integers to fp 't' type. Must handle 'int', 'unsigned int' |
2016 | and 'long long' cases. */ |
2017 | void gen_cvt_itof(int t) |
2018 | { |
2019 | if ((t & VT_BTYPE) == VT_LDOUBLE) { |
2020 | save_reg(TREG_ST0); |
2021 | gv(RC_INT); |
2022 | if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { |
2023 | /* signed long long to float/double/long double (unsigned case |
2024 | is handled generically) */ |
2025 | o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ |
2026 | o(0x242cdf); /* fildll (%rsp) */ |
2027 | o(0x08c48348); /* add $8, %rsp */ |
2028 | } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == |
2029 | (VT_INT | VT_UNSIGNED)) { |
2030 | /* unsigned int to float/double/long double */ |
2031 | o(0x6a); /* push $0 */ |
2032 | g(0x00); |
2033 | o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ |
2034 | o(0x242cdf); /* fildll (%rsp) */ |
2035 | o(0x10c48348); /* add $16, %rsp */ |
2036 | } else { |
2037 | /* int to float/double/long double */ |
2038 | o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ |
2039 | o(0x2404db); /* fildl (%rsp) */ |
2040 | o(0x08c48348); /* add $8, %rsp */ |
2041 | } |
2042 | vtop->r = TREG_ST0; |
2043 | } else { |
2044 | int r = get_reg(RC_FLOAT); |
2045 | gv(RC_INT); |
2046 | o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0)); |
2047 | if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == |
2048 | (VT_INT | VT_UNSIGNED) || |
2049 | (vtop->type.t & VT_BTYPE) == VT_LLONG) { |
2050 | o(0x48); /* REX */ |
2051 | } |
2052 | o(0x2a0f); |
2053 | o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */ |
2054 | vtop->r = r; |
2055 | } |
2056 | } |
2057 | |
2058 | /* convert from one floating point type to another */ |
2059 | void gen_cvt_ftof(int t) |
2060 | { |
2061 | int ft, bt, tbt; |
2062 | |
2063 | ft = vtop->type.t; |
2064 | bt = ft & VT_BTYPE; |
2065 | tbt = t & VT_BTYPE; |
2066 | |
2067 | if (bt == VT_FLOAT) { |
2068 | gv(RC_FLOAT); |
2069 | if (tbt == VT_DOUBLE) { |
2070 | o(0x140f); /* unpcklps */ |
2071 | o(0xc0 + REG_VALUE(vtop->r)*9); |
2072 | o(0x5a0f); /* cvtps2pd */ |
2073 | o(0xc0 + REG_VALUE(vtop->r)*9); |
2074 | } else if (tbt == VT_LDOUBLE) { |
2075 | save_reg(RC_ST0); |
2076 | /* movss %xmm0,-0x10(%rsp) */ |
2077 | o(0x110ff3); |
2078 | o(0x44 + REG_VALUE(vtop->r)*8); |
2079 | o(0xf024); |
2080 | o(0xf02444d9); /* flds -0x10(%rsp) */ |
2081 | vtop->r = TREG_ST0; |
2082 | } |
2083 | } else if (bt == VT_DOUBLE) { |
2084 | gv(RC_FLOAT); |
2085 | if (tbt == VT_FLOAT) { |
2086 | o(0x140f66); /* unpcklpd */ |
2087 | o(0xc0 + REG_VALUE(vtop->r)*9); |
2088 | o(0x5a0f66); /* cvtpd2ps */ |
2089 | o(0xc0 + REG_VALUE(vtop->r)*9); |
2090 | } else if (tbt == VT_LDOUBLE) { |
2091 | save_reg(RC_ST0); |
2092 | /* movsd %xmm0,-0x10(%rsp) */ |
2093 | o(0x110ff2); |
2094 | o(0x44 + REG_VALUE(vtop->r)*8); |
2095 | o(0xf024); |
2096 | o(0xf02444dd); /* fldl -0x10(%rsp) */ |
2097 | vtop->r = TREG_ST0; |
2098 | } |
2099 | } else { |
2100 | int r; |
2101 | gv(RC_ST0); |
2102 | r = get_reg(RC_FLOAT); |
2103 | if (tbt == VT_DOUBLE) { |
2104 | o(0xf0245cdd); /* fstpl -0x10(%rsp) */ |
2105 | /* movsd -0x10(%rsp),%xmm0 */ |
2106 | o(0x100ff2); |
2107 | o(0x44 + REG_VALUE(r)*8); |
2108 | o(0xf024); |
2109 | vtop->r = r; |
2110 | } else if (tbt == VT_FLOAT) { |
2111 | o(0xf0245cd9); /* fstps -0x10(%rsp) */ |
2112 | /* movss -0x10(%rsp),%xmm0 */ |
2113 | o(0x100ff3); |
2114 | o(0x44 + REG_VALUE(r)*8); |
2115 | o(0xf024); |
2116 | vtop->r = r; |
2117 | } |
2118 | } |
2119 | } |
2120 | |
2121 | /* convert fp to int 't' type */ |
2122 | void gen_cvt_ftoi(int t) |
2123 | { |
2124 | int ft, bt, size, r; |
2125 | ft = vtop->type.t; |
2126 | bt = ft & VT_BTYPE; |
2127 | if (bt == VT_LDOUBLE) { |
2128 | gen_cvt_ftof(VT_DOUBLE); |
2129 | bt = VT_DOUBLE; |
2130 | } |
2131 | |
2132 | gv(RC_FLOAT); |
2133 | if (t != VT_INT) |
2134 | size = 8; |
2135 | else |
2136 | size = 4; |
2137 | |
2138 | r = get_reg(RC_INT); |
2139 | if (bt == VT_FLOAT) { |
2140 | o(0xf3); |
2141 | } else if (bt == VT_DOUBLE) { |
2142 | o(0xf2); |
2143 | } else { |
2144 | assert(0); |
2145 | } |
2146 | orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */ |
2147 | o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8); |
2148 | vtop->r = r; |
2149 | } |
2150 | |
2151 | // Generate sign extension from 32 to 64 bits: |
2152 | ST_FUNC void gen_cvt_sxtw(void) |
2153 | { |
2154 | int r = gv(RC_INT); |
2155 | /* x86_64 specific: movslq */ |
2156 | o(0x6348); |
2157 | o(0xc0 + (REG_VALUE(r) << 3) + REG_VALUE(r)); |
2158 | } |
2159 | |
2160 | /* char/short to int conversion */ |
2161 | ST_FUNC void gen_cvt_csti(int t) |
2162 | { |
2163 | int r, sz, xl, ll; |
2164 | r = gv(RC_INT); |
2165 | sz = !(t & VT_UNSIGNED); |
2166 | xl = (t & VT_BTYPE) == VT_SHORT; |
2167 | ll = (vtop->type.t & VT_BTYPE) == VT_LLONG; |
2168 | orex(ll, r, 0, 0xc0b60f /* mov[sz] %a[xl], %eax */ |
2169 | | (sz << 3 | xl) << 8 |
2170 | | (REG_VALUE(r) << 3 | REG_VALUE(r)) << 16 |
2171 | ); |
2172 | } |
2173 | |
2174 | /* computed goto support */ |
2175 | void ggoto(void) |
2176 | { |
2177 | gcall_or_jmp(1); |
2178 | vtop--; |
2179 | } |
2180 | |
2181 | /* Save the stack pointer onto the stack and return the location of its address */ |
2182 | ST_FUNC void gen_vla_sp_save(int addr) { |
2183 | /* mov %rsp,addr(%rbp)*/ |
2184 | gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr); |
2185 | } |
2186 | |
2187 | /* Restore the SP from a location on the stack */ |
2188 | ST_FUNC void gen_vla_sp_restore(int addr) { |
2189 | gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr); |
2190 | } |
2191 | |
2192 | #ifdef TCC_TARGET_PE |
2193 | /* Save result of gen_vla_alloc onto the stack */ |
2194 | ST_FUNC void gen_vla_result(int addr) { |
2195 | /* mov %rax,addr(%rbp)*/ |
2196 | gen_modrm64(0x89, TREG_RAX, VT_LOCAL, NULL, addr); |
2197 | } |
2198 | #endif |
2199 | |
2200 | /* Subtract from the stack pointer, and push the resulting value onto the stack */ |
2201 | ST_FUNC void gen_vla_alloc(CType *type, int align) { |
2202 | int use_call = 0; |
2203 | |
2204 | #if defined(CONFIG_TCC_BCHECK) |
2205 | use_call = tcc_state->do_bounds_check; |
2206 | #endif |
2207 | #ifdef TCC_TARGET_PE /* alloca does more than just adjust %rsp on Windows */ |
2208 | use_call = 1; |
2209 | #endif |
2210 | if (use_call) |
2211 | { |
2212 | vpush_global_sym(&func_old_type, TOK_alloca); |
2213 | vswap(); /* Move alloca ref past allocation size */ |
2214 | gfunc_call(1); |
2215 | } |
2216 | else { |
2217 | int r; |
2218 | r = gv(RC_INT); /* allocation size */ |
2219 | /* sub r,%rsp */ |
2220 | o(0x2b48); |
2221 | o(0xe0 | REG_VALUE(r)); |
2222 | /* We align to 16 bytes rather than align */ |
2223 | /* and ~15, %rsp */ |
2224 | o(0xf0e48348); |
2225 | vpop(); |
2226 | } |
2227 | } |
2228 | |
2229 | |
2230 | /* end of x86-64 code generator */ |
2231 | /*************************************************************/ |
2232 | #endif /* ! TARGET_DEFS_ONLY */ |
2233 | /******************************************************/ |
2234 | |