1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include "qword.h"
5
6#include "ia32_insn.h"
7#include "ia32_opcode_tables.h"
8
9#include "ia32_reg.h"
10#include "ia32_operand.h"
11#include "ia32_implicit.h"
12#include "ia32_settings.h"
13
14#include "libdis.h"
15
16extern ia32_table_desc_t ia32_tables[];
17extern ia32_settings_t ia32_settings;
18
19#define IS_SP( op ) (op->type == op_register && \
20 (op->data.reg.id == REG_ESP_INDEX || \
21 op->data.reg.alias == REG_ESP_INDEX) )
22#define IS_IMM( op ) (op->type == op_immediate )
23
24#ifdef WIN32
25# define INLINE
26#else
27# define INLINE inline
28#endif
29
30/* for calculating stack modification based on an operand */
31static INLINE int32_t long_from_operand( x86_op_t *op ) {
32
33 if (! IS_IMM(op) ) {
34 return 0L;
35 }
36
37 switch ( op->datatype ) {
38 case op_byte:
39 return (int32_t) op->data.sbyte;
40 case op_word:
41 return (int32_t) op->data.sword;
42 case op_qword:
43 return (int32_t) op->data.sqword;
44 case op_dword:
45 return op->data.sdword;
46 default:
47 /* these are not used in stack insn */
48 break;
49 }
50
51 return 0L;
52}
53
54
55/* determine what this insn does to the stack */
56static void ia32_stack_mod(x86_insn_t *insn) {
57 x86_op_t *dest, *src = NULL;
58
59 if (! insn || ! insn->operands ) {
60 return;
61 }
62
63 dest = &insn->operands->op;
64 if ( dest ) {
65 src = &insn->operands->next->op;
66 }
67
68 insn->stack_mod = 0;
69 insn->stack_mod_val = 0;
70
71 switch ( insn->type ) {
72 case insn_call:
73 case insn_callcc:
74 insn->stack_mod = 1;
75 insn->stack_mod_val = insn->addr_size * -1;
76 break;
77 case insn_push:
78 insn->stack_mod = 1;
79 insn->stack_mod_val = insn->addr_size * -1;
80 break;
81 case insn_return:
82 insn->stack_mod = 1;
83 insn->stack_mod_val = insn->addr_size;
84 case insn_int: case insn_intcc:
85 case insn_iret:
86 break;
87 case insn_pop:
88 insn->stack_mod = 1;
89 if (! IS_SP( dest ) ) {
90 insn->stack_mod_val = insn->op_size;
91 } /* else we don't know the stack change in a pop esp */
92 break;
93 case insn_enter:
94 insn->stack_mod = 1;
95 insn->stack_mod_val = 0; /* TODO : FIX */
96 break;
97 case insn_leave:
98 insn->stack_mod = 1;
99 insn->stack_mod_val = 0; /* TODO : FIX */
100 break;
101 case insn_pushregs:
102 insn->stack_mod = 1;
103 insn->stack_mod_val = 0; /* TODO : FIX */
104 break;
105 case insn_popregs:
106 insn->stack_mod = 1;
107 insn->stack_mod_val = 0; /* TODO : FIX */
108 break;
109 case insn_pushflags:
110 insn->stack_mod = 1;
111 insn->stack_mod_val = 0; /* TODO : FIX */
112 break;
113 case insn_popflags:
114 insn->stack_mod = 1;
115 insn->stack_mod_val = 0; /* TODO : FIX */
116 break;
117 case insn_add:
118 if ( IS_SP( dest ) ) {
119 insn->stack_mod = 1;
120 insn->stack_mod_val = long_from_operand( src );
121 }
122 break;
123 case insn_sub:
124 if ( IS_SP( dest ) ) {
125 insn->stack_mod = 1;
126 insn->stack_mod_val = long_from_operand( src );
127 insn->stack_mod_val *= -1;
128 }
129 break;
130 case insn_inc:
131 if ( IS_SP( dest ) ) {
132 insn->stack_mod = 1;
133 insn->stack_mod_val = 1;
134 }
135 break;
136 case insn_dec:
137 if ( IS_SP( dest ) ) {
138 insn->stack_mod = 1;
139 insn->stack_mod_val = 1;
140 }
141 break;
142 case insn_mov: case insn_movcc:
143 case insn_xchg: case insn_xchgcc:
144 case insn_mul: case insn_div:
145 case insn_shl: case insn_shr:
146 case insn_rol: case insn_ror:
147 case insn_and: case insn_or:
148 case insn_not: case insn_neg:
149 case insn_xor:
150 if ( IS_SP( dest ) ) {
151 insn->stack_mod = 1;
152 }
153 break;
154 default:
155 break;
156 }
157 if (! strcmp("enter", insn->mnemonic) ) {
158 insn->stack_mod = 1;
159 } else if (! strcmp("leave", insn->mnemonic) ) {
160 insn->stack_mod = 1;
161 }
162
163 /* for mov, etc we return 0 -- unknown stack mod */
164
165 return;
166}
167
168/* get the cpu details for this insn from cpu flags int */
169static void ia32_handle_cpu( x86_insn_t *insn, unsigned int cpu ) {
170 insn->cpu = (enum x86_insn_cpu) CPU_MODEL(cpu);
171 insn->isa = (enum x86_insn_isa) (ISA_SUBSET(cpu)) >> 16;
172 return;
173}
174
175/* handle mnemonic type and group */
176static void ia32_handle_mnemtype(x86_insn_t *insn, unsigned int mnemtype) {
177 unsigned int type = mnemtype & ~INS_FLAG_MASK;
178 insn->group = (enum x86_insn_group) (INS_GROUP(type)) >> 12;
179 insn->type = (enum x86_insn_type) INS_TYPE(type);
180
181 return;
182}
183
184static void ia32_handle_notes(x86_insn_t *insn, unsigned int notes) {
185 insn->note = (enum x86_insn_note) notes;
186 return;
187}
188
189static void ia32_handle_eflags( x86_insn_t *insn, unsigned int eflags) {
190 unsigned int flags;
191
192 /* handle flags effected */
193 flags = INS_FLAGS_TEST(eflags);
194 /* handle weird OR cases */
195 /* these are either JLE (ZF | SF<>OF) or JBE (CF | ZF) */
196 if (flags & INS_TEST_OR) {
197 flags &= ~INS_TEST_OR;
198 if ( flags & INS_TEST_ZERO ) {
199 flags &= ~INS_TEST_ZERO;
200 if ( flags & INS_TEST_CARRY ) {
201 flags &= ~INS_TEST_CARRY ;
202 flags |= (int)insn_carry_or_zero_set;
203 } else if ( flags & INS_TEST_SFNEOF ) {
204 flags &= ~INS_TEST_SFNEOF;
205 flags |= (int)insn_zero_set_or_sign_ne_oflow;
206 }
207 }
208 }
209 insn->flags_tested = (enum x86_flag_status) flags;
210
211 insn->flags_set = (enum x86_flag_status) INS_FLAGS_SET(eflags) >> 16;
212
213 return;
214}
215
216static void ia32_handle_prefix( x86_insn_t *insn, unsigned int prefixes ) {
217
218 insn->prefix = (enum x86_insn_prefix) prefixes & PREFIX_MASK; // >> 20;
219 if (! (insn->prefix & PREFIX_PRINT_MASK) ) {
220 /* no printable prefixes */
221 insn->prefix = insn_no_prefix;
222 }
223
224 /* concat all prefix strings */
225 if ( (unsigned int)insn->prefix & PREFIX_LOCK ) {
226 strncat(insn->prefix_string, "lock ", 32 -
227 strlen(insn->prefix_string));
228 }
229
230 if ( (unsigned int)insn->prefix & PREFIX_REPNZ ) {
231 strncat(insn->prefix_string, "repnz ", 32 -
232 strlen(insn->prefix_string));
233 } else if ( (unsigned int)insn->prefix & PREFIX_REPZ ) {
234 strncat(insn->prefix_string, "repz ", 32 -
235 strlen(insn->prefix_string));
236 }
237
238 return;
239}
240
241
242static void reg_32_to_16( x86_op_t *op, x86_insn_t *insn, void *arg ) {
243
244 /* if this is a 32-bit register and it is a general register ... */
245 if ( op->type == op_register && op->data.reg.size == 4 &&
246 (op->data.reg.type & reg_gen) ) {
247 /* WORD registers are 8 indices off from DWORD registers */
248 ia32_handle_register( &(op->data.reg),
249 op->data.reg.id + 8 );
250 }
251}
252
253static void handle_insn_metadata( x86_insn_t *insn, ia32_insn_t *raw_insn ) {
254 ia32_handle_mnemtype( insn, raw_insn->mnem_flag );
255 ia32_handle_notes( insn, raw_insn->notes );
256 ia32_handle_eflags( insn, raw_insn->flags_effected );
257 ia32_handle_cpu( insn, raw_insn->cpu );
258 ia32_stack_mod( insn );
259}
260
261static size_t ia32_decode_insn( unsigned char *buf, size_t buf_len,
262 ia32_insn_t *raw_insn, x86_insn_t *insn,
263 unsigned int prefixes ) {
264 size_t size, op_size;
265 unsigned char modrm;
266
267 /* this should never happen, but just in case... */
268 if ( raw_insn->mnem_flag == INS_INVALID ) {
269 return 0;
270 }
271
272 if (ia32_settings.options & opt_16_bit) {
273 insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 4 : 2;
274 insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 4 : 2;
275 } else {
276 insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 2 : 4;
277 insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 2 : 4;
278 }
279
280
281 /* ++++ 1. Copy mnemonic and mnemonic-flags to CODE struct */
282 if ((ia32_settings.options & opt_att_mnemonics) && raw_insn->mnemonic_att[0]) {
283 strncpy( insn->mnemonic, raw_insn->mnemonic_att, 16 );
284 }
285 else {
286 strncpy( insn->mnemonic, raw_insn->mnemonic, 16 );
287 }
288 ia32_handle_prefix( insn, prefixes );
289
290 handle_insn_metadata( insn, raw_insn );
291
292 /* prefetch the next byte in case it is a modr/m byte -- saves
293 * worrying about whether the 'mod/rm' operand or the 'reg' operand
294 * occurs first */
295 modrm = GET_BYTE( buf, buf_len );
296
297 /* ++++ 2. Decode Explicit Operands */
298 /* Intel uses up to 3 explicit operands in its instructions;
299 * the first is 'dest', the second is 'src', and the third
300 * is an additional source value (usually an immediate value,
301 * e.g. in the MUL instructions). These three explicit operands
302 * are encoded in the opcode tables, even if they are not used
303 * by the instruction. Additional implicit operands are stored
304 * in a supplemental table and are handled later. */
305
306 op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->dest,
307 raw_insn->dest_flag, prefixes, modrm );
308 /* advance buffer, increase size if necessary */
309 buf += op_size;
310 buf_len -= op_size;
311 size = op_size;
312
313 op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->src,
314 raw_insn->src_flag, prefixes, modrm );
315 buf += op_size;
316 buf_len -= op_size;
317 size += op_size;
318
319 op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->aux,
320 raw_insn->aux_flag, prefixes, modrm );
321 size += op_size;
322
323
324 /* ++++ 3. Decode Implicit Operands */
325 /* apply implicit operands */
326 ia32_insn_implicit_ops( insn, raw_insn->implicit_ops );
327 /* we have one small inelegant hack here, to deal with
328 * the two prefixes that have implicit operands. If Intel
329 * adds more, we'll change the algorithm to suit :) */
330 if ( (prefixes & PREFIX_REPZ) || (prefixes & PREFIX_REPNZ) ) {
331 ia32_insn_implicit_ops( insn, IDX_IMPLICIT_REP );
332 }
333
334
335 /* 16-bit hack: foreach operand, if 32-bit reg, make 16-bit reg */
336 if ( insn->op_size == 2 ) {
337 x86_operand_foreach( insn, reg_32_to_16, NULL, op_any );
338 }
339
340 return size;
341}
342
343
344/* convenience routine */
345#define USES_MOD_RM(flag) \
346 (flag == ADDRMETH_E || flag == ADDRMETH_M || flag == ADDRMETH_Q || \
347 flag == ADDRMETH_W || flag == ADDRMETH_R)
348
349static int uses_modrm_flag( unsigned int flag ) {
350 unsigned int meth;
351 if ( flag == ARG_NONE ) {
352 return 0;
353 }
354 meth = (flag & ADDRMETH_MASK);
355 if ( USES_MOD_RM(meth) ) {
356 return 1;
357 }
358
359 return 0;
360}
361
362/* This routine performs the actual byte-by-byte opcode table lookup.
363 * Originally it was pretty simple: get a byte, adjust it to a proper
364 * index into the table, then check the table row at that index to
365 * determine what to do next. But is anything that simple with Intel?
366 * This is now a huge, convoluted mess, mostly of bitter comments. */
367/* buf: pointer to next byte to read from stream
368 * buf_len: length of buf
369 * table: index of table to use for lookups
370 * raw_insn: output pointer that receives opcode definition
371 * prefixes: output integer that is encoded with prefixes in insn
372 * returns : number of bytes consumed from stream during lookup */
373size_t ia32_table_lookup( unsigned char *buf, size_t buf_len,
374 unsigned int table, ia32_insn_t **raw_insn,
375 unsigned int *prefixes ) {
376 unsigned char *next, op = buf[0]; /* byte value -- 'opcode' */
377 size_t size = 1, sub_size = 0, next_len;
378 ia32_table_desc_t *table_desc;
379 unsigned int subtable, prefix = 0, recurse_table = 0;
380
381 table_desc = &ia32_tables[table];
382
383 op = GET_BYTE( buf, buf_len );
384
385 if ( table_desc->type == tbl_fpu && op > table_desc->maxlim) {
386 /* one of the fucking FPU tables out of the 00-BH range */
387 /* OK,. this is a bit of a hack -- the proper way would
388 * have been to use subtables in the 00-BF FPU opcode tables,
389 * but that is rather wasteful of space... */
390 table_desc = &ia32_tables[table +1];
391 }
392
393 /* PERFORM TABLE LOOKUP */
394
395 /* ModR/M trick: shift extension bits into lowest bits of byte */
396 /* Note: non-ModR/M tables have a shift value of 0 */
397 op >>= table_desc->shift;
398
399 /* ModR/M trick: mask out high bits to turn extension into an index */
400 /* Note: non-ModR/M tables have a mask value of 0xFF */
401 op &= table_desc->mask;
402
403
404 /* Sparse table trick: check that byte is <= max value */
405 /* Note: full (256-entry) tables have a maxlim of 155 */
406 if ( op > table_desc->maxlim ) {
407 /* this is a partial table, truncated at the tail,
408 and op is out of range! */
409 return INVALID_INSN;
410 }
411
412 /* Sparse table trick: check that byte is >= min value */
413 /* Note: full (256-entry) tables have a minlim of 0 */
414 if ( table_desc->minlim > op ) {
415 /* this is a partial table, truncated at the head,
416 and op is out of range! */
417 return INVALID_INSN;
418 }
419 /* adjust op to be an offset from table index 0 */
420 op -= table_desc->minlim;
421
422 /* Yay! 'op' is now fully adjusted to be an index into 'table' */
423 *raw_insn = &(table_desc->table[op]);
424 //printf("BYTE %X TABLE %d OP %X\n", buf[0], table, op );
425
426 if ( (*raw_insn)->mnem_flag & INS_FLAG_PREFIX ) {
427 prefix = (*raw_insn)->mnem_flag & PREFIX_MASK;
428 }
429
430
431 /* handle escape to a multibyte/coproc/extension/etc table */
432 /* NOTE: if insn is a prefix and has a subtable, then we
433 * only recurse if this is the first prefix byte --
434 * that is, if *prefixes is 0.
435 * NOTE also that suffix tables are handled later */
436 subtable = (*raw_insn)->table;
437
438 if ( subtable && ia32_tables[subtable].type != tbl_suffix &&
439 (! prefix || ! *prefixes) ) {
440
441 if ( ia32_tables[subtable].type == tbl_ext_ext ||
442 ia32_tables[subtable].type == tbl_fpu_ext ) {
443 /* opcode extension: reuse current byte in buffer */
444 next = buf;
445 next_len = buf_len;
446 } else {
447 /* "normal" opcode: advance to next byte in buffer */
448 if ( buf_len > 1 ) {
449 next = &buf[1];
450 next_len = buf_len - 1;
451 }
452 else {
453 // buffer is truncated
454 return INVALID_INSN;
455 }
456 }
457 /* we encountered a multibyte opcode: recurse using the
458 * table specified in the opcode definition */
459 sub_size = ia32_table_lookup( next, next_len, subtable,
460 raw_insn, prefixes );
461
462 /* SSE/prefix hack: if the original opcode def was a
463 * prefix that specified a subtable, and the subtable
464 * lookup returned a valid insn, then we have encountered
465 * an SSE opcode definition; otherwise, we pretend we
466 * never did the subtable lookup, and deal with the
467 * prefix normally later */
468 if ( prefix && ( sub_size == INVALID_INSN ||
469 INS_TYPE((*raw_insn)->mnem_flag) == INS_INVALID ) ) {
470 /* this is a prefix, not an SSE insn :
471 * lookup next byte in main table,
472 * subsize will be reset during the
473 * main table lookup */
474 recurse_table = 1;
475 } else {
476 /* this is either a subtable (two-byte) insn
477 * or an invalid insn: either way, set prefix
478 * to NULL and end the opcode lookup */
479 prefix = 0;
480 // short-circuit lookup on invalid insn
481 if (sub_size == INVALID_INSN) return INVALID_INSN;
482 }
483 } else if ( prefix ) {
484 recurse_table = 1;
485 }
486
487 /* by default, we assume that we have the opcode definition,
488 * and there is no need to recurse on the same table, but
489 * if we do then a prefix was encountered... */
490 if ( recurse_table ) {
491 /* this must have been a prefix: use the same table for
492 * lookup of the next byte */
493 sub_size = ia32_table_lookup( &buf[1], buf_len - 1, table,
494 raw_insn, prefixes );
495
496 // short-circuit lookup on invalid insn
497 if (sub_size == INVALID_INSN) return INVALID_INSN;
498
499 /* a bit of a hack for branch hints */
500 if ( prefix & BRANCH_HINT_MASK ) {
501 if ( INS_GROUP((*raw_insn)->mnem_flag) == INS_EXEC ) {
502 /* segment override prefixes are invalid for
503 * all branch instructions, so delete them */
504 prefix &= ~PREFIX_REG_MASK;
505 } else {
506 prefix &= ~BRANCH_HINT_MASK;
507 }
508 }
509
510 /* apply prefix to instruction */
511
512 /* TODO: implement something enforcing prefix groups */
513 (*prefixes) |= prefix;
514 }
515
516 /* if this lookup was in a ModR/M table, then an opcode byte is
517 * NOT consumed: subtract accordingly. NOTE that if none of the
518 * operands used the ModR/M, then we need to consume the byte
519 * here, but ONLY in the 'top-level' opcode extension table */
520
521 if ( table_desc->type == tbl_ext_ext ) {
522 /* extensions-to-extensions never consume a byte */
523 --size;
524 } else if ( (table_desc->type == tbl_extension ||
525 table_desc->type == tbl_fpu ||
526 table_desc->type == tbl_fpu_ext ) &&
527 /* extensions that have an operand encoded in ModR/M
528 * never consume a byte */
529 (uses_modrm_flag((*raw_insn)->dest_flag) ||
530 uses_modrm_flag((*raw_insn)->src_flag) ) ) {
531 --size;
532 }
533
534 size += sub_size;
535
536 return size;
537}
538
539static size_t handle_insn_suffix( unsigned char *buf, size_t buf_len,
540 ia32_insn_t *raw_insn, x86_insn_t * insn ) {
541 ia32_insn_t *sfx_insn;
542 size_t size;
543 unsigned int prefixes = 0;
544
545 size = ia32_table_lookup( buf, buf_len, raw_insn->table, &sfx_insn,
546 &prefixes );
547 if (size == INVALID_INSN || sfx_insn->mnem_flag == INS_INVALID ) {
548 return 0;
549 }
550
551 strncpy( insn->mnemonic, sfx_insn->mnemonic, 16 );
552 handle_insn_metadata( insn, sfx_insn );
553
554 return 1;
555}
556
557/* invalid instructions are handled by returning 0 [error] from the
558 * function, setting the size of the insn to 1 byte, and copying
559 * the byte at the start of the invalid insn into the x86_insn_t.
560 * if the caller is saving the x86_insn_t for invalid instructions,
561 * instead of discarding them, this will maintain a consistent
562 * address space in the x86_insn_ts */
563
564/* this function is called by the controlling disassembler, so its name and
565 * calling convention cannot be changed */
566/* buf points to the loc of the current opcode (start of the
567 * instruction) in the instruction stream. The instruction
568 * stream is assumed to be a buffer of bytes read directly
569 * from the file for the purpose of disassembly; a mem-mapped
570 * file is ideal for * this.
571 * insn points to a code structure to be filled by instr_decode
572 * returns the size of the decoded instruction in bytes */
573size_t ia32_disasm_addr( unsigned char * buf, size_t buf_len,
574 x86_insn_t *insn ) {
575 ia32_insn_t *raw_insn = NULL;
576 unsigned int prefixes = 0;
577 size_t size, sfx_size;
578
579 if ( (ia32_settings.options & opt_ignore_nulls) && buf_len > 3 &&
580 !buf[0] && !buf[1] && !buf[2] && !buf[3]) {
581 /* IF IGNORE_NULLS is set AND
582 * first 4 bytes in the intruction stream are NULL
583 * THEN return 0 (END_OF_DISASSEMBLY) */
584 /* TODO: set errno */
585 MAKE_INVALID( insn, buf );
586 return 0; /* 4 00 bytes in a row? This isn't code! */
587 }
588
589 /* Perform recursive table lookup starting with main table (0) */
590 size = ia32_table_lookup(buf, buf_len, idx_Main, &raw_insn, &prefixes);
591 if ( size == INVALID_INSN || size > buf_len || raw_insn->mnem_flag == INS_INVALID ) {
592 MAKE_INVALID( insn, buf );
593 /* TODO: set errno */
594 return 0;
595 }
596
597 /* We now have the opcode itself figured out: we can decode
598 * the rest of the instruction. */
599 size += ia32_decode_insn( &buf[size], buf_len - size, raw_insn, insn,
600 prefixes );
601 if ( raw_insn->mnem_flag & INS_FLAG_SUFFIX ) {
602 /* AMD 3DNow! suffix -- get proper operand type here */
603 sfx_size = handle_insn_suffix( &buf[size], buf_len - size,
604 raw_insn, insn );
605 if (! sfx_size ) {
606 /* TODO: set errno */
607 MAKE_INVALID( insn, buf );
608 return 0;
609 }
610
611 size += sfx_size;
612 }
613
614 if (! size ) {
615 /* invalid insn */
616 MAKE_INVALID( insn, buf );
617 return 0;
618 }
619
620
621 insn->size = size;
622 return size; /* return size of instruction in bytes */
623}
624