1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4
5#include "libdis.h"
6#include <inttypes.h>
7
8#ifdef _MSC_VER
9 #define snprintf _snprintf
10 #define inline __inline
11#endif
12
13
14/*
15 * concatenation macros. STRNCATF concatenates a format string, buf
16 * only with one argument.
17 */
18#define STRNCAT( buf, str, len ) do { \
19 int _i = strlen(str), _blen = strlen(buf), _len = len - 1; \
20 if ( len ) { \
21 strncat( buf, str, _len ); \
22 if ( _len <= _i ) { \
23 buf[_blen+_len] = '\0'; \
24 len = 0; \
25 } else { \
26 len -= _i; \
27 } \
28 } \
29} while( 0 )
30
31#define STRNCATF( buf, fmt, data, len ) do { \
32 char _tmp[MAX_OP_STRING]; \
33 \
34 snprintf( _tmp, sizeof _tmp, fmt, data ); \
35 STRNCAT( buf, _tmp, len ); \
36} while( 0 )
37
38
39#define PRINT_DISPLACEMENT( ea ) do { \
40 if ( ea->disp_size && ea->disp ) { \
41 if ( ea->disp_sign ) { \
42 STRNCATF( buf, "-0x%" PRIX32, -ea->disp, len ); \
43 } else { \
44 STRNCATF( buf, "0x%" PRIX32, ea->disp, len ); \
45 } \
46 } \
47} while( 0 )
48
49static const char *prefix_strings[] = {
50 "", /* no prefix */
51 "repz ", /* the trailing spaces make it easy to prepend to mnemonic */
52 "repnz ",
53 "lock ",
54 "branch delay " /* unused in x86 */
55};
56
57static int format_insn_prefix_str( enum x86_insn_prefix prefix, char *buf,
58 int len ) {
59
60 int len_orig = len;
61
62 /* concat all prefix strings */
63 if ( prefix & 1 ) { STRNCAT( buf, prefix_strings[1], len ); }
64 if ( prefix & 2 ) { STRNCAT( buf, prefix_strings[2], len ); }
65 if ( prefix & 4 ) { STRNCAT( buf, prefix_strings[3], len ); }
66 if ( prefix & 8 ) { STRNCAT( buf, prefix_strings[4], len ); }
67
68 /* return the number of characters added */
69 return (len_orig - len);
70}
71
72/*
73 * sprint's an operand's data to string str.
74 */
75static void get_operand_data_str( x86_op_t *op, char *str, int len ){
76
77 if ( op->flags & op_signed ) {
78 switch ( op->datatype ) {
79 case op_byte:
80 snprintf( str, len, "%" PRId8, op->data.sbyte );
81 return;
82 case op_word:
83 snprintf( str, len, "%" PRId16, op->data.sword );
84 return;
85 case op_qword:
86 snprintf( str, len, "%" PRId64, op->data.sqword );
87 return;
88 default:
89 snprintf( str, len, "%" PRId32, op->data.sdword );
90 return;
91 }
92 }
93
94 //else
95 switch ( op->datatype ) {
96 case op_byte:
97 snprintf( str, len, "0x%02" PRIX8, op->data.byte );
98 return;
99 case op_word:
100 snprintf( str, len, "0x%04" PRIX16, op->data.word );
101 return;
102 case op_qword:
103 snprintf( str, len, "0x%08" PRIX64,op->data.sqword );
104 return;
105 default:
106 snprintf( str, len, "0x%08" PRIX32, op->data.dword );
107 return;
108 }
109}
110
111/*
112 * sprints register types to a string. the register types can be ORed
113 * together.
114 */
115static void get_operand_regtype_str( int regtype, char *str, int len )
116{
117 static struct {
118 const char *name;
119 int value;
120 } operand_regtypes[] = {
121 {"reg_gen" , 0x00001},
122 {"reg_in" , 0x00002},
123 {"reg_out" , 0x00004},
124 {"reg_local" , 0x00008},
125 {"reg_fpu" , 0x00010},
126 {"reg_seg" , 0x00020},
127 {"reg_simd" , 0x00040},
128 {"reg_sys" , 0x00080},
129 {"reg_sp" , 0x00100},
130 {"reg_fp" , 0x00200},
131 {"reg_pc" , 0x00400},
132 {"reg_retaddr", 0x00800},
133 {"reg_cond" , 0x01000},
134 {"reg_zero" , 0x02000},
135 {"reg_ret" , 0x04000},
136 {"reg_src" , 0x10000},
137 {"reg_dest" , 0x20000},
138 {"reg_count" , 0x40000},
139 {NULL, 0}, //end
140 };
141
142 unsigned int i;
143
144 memset( str, 0, len );
145
146 //go thru every type in the enum
147 for ( i = 0; operand_regtypes[i].name; i++ ) {
148 //skip if type is not set
149 if(! (regtype & operand_regtypes[i].value) )
150 continue;
151
152 //not the first time around
153 if( str[0] ) {
154 STRNCAT( str, " ", len );
155 }
156
157 STRNCAT(str, operand_regtypes[i].name, len );
158 }
159}
160
161static int format_expr( x86_ea_t *ea, char *buf, int len,
162 enum x86_asm_format format ) {
163 char str[MAX_OP_STRING];
164
165 if ( format == att_syntax ) {
166 if (ea->base.name[0] || ea->index.name[0] || ea->scale) {
167 PRINT_DISPLACEMENT(ea);
168 STRNCAT( buf, "(", len );
169
170 if ( ea->base.name[0]) {
171 STRNCATF( buf, "%%%s", ea->base.name, len );
172 }
173 if ( ea->index.name[0]) {
174 STRNCATF( buf, ",%%%s", ea->index.name, len );
175 if ( ea->scale > 1 ) {
176 STRNCATF( buf, ",%d", ea->scale, len );
177 }
178 }
179 /* handle the syntactic exception */
180 if ( ! ea->base.name[0] &&
181 ! ea->index.name[0] ) {
182 STRNCATF( buf, ",%d", ea->scale, len );
183 }
184
185 STRNCAT( buf, ")", len );
186 } else
187 STRNCATF( buf, "0x%" PRIX32, ea->disp, len );
188
189 } else if ( format == xml_syntax ){
190
191 if ( ea->base.name[0]) {
192 STRNCAT (buf, "\t\t\t<base>\n", len);
193
194 get_operand_regtype_str (ea->base.type, str,
195 sizeof str);
196 STRNCAT (buf, "\t\t\t\t<register ", len);
197 STRNCATF (buf, "name=\"%s\" ", ea->base.name, len);
198 STRNCATF (buf, "type=\"%s\" ", str, len);
199 STRNCATF (buf, "size=%d/>\n", ea->base.size, len);
200
201 STRNCAT (buf, "\t\t\t</base>\n", len);
202 }
203
204 if ( ea->index.name[0]) {
205 STRNCAT (buf, "\t\t\t<index>\n", len);
206
207 get_operand_regtype_str (ea->index.type, str,
208 sizeof str);
209
210 STRNCAT (buf, "\t\t\t\t<register ", len);
211 STRNCATF (buf, "name=\"%s\" ", ea->index.name, len);
212 STRNCATF (buf, "type=\"%s\" ", str, len);
213 STRNCATF (buf, "size=%d/>\n", ea->index.size, len);
214
215 STRNCAT (buf, "\t\t\t</index>\n", len);
216 }
217
218 //scale
219 STRNCAT (buf, "\t\t\t<scale>\n", len);
220 STRNCAT (buf, "\t\t\t\t<immediate ", len);
221 STRNCATF (buf, "value=\"%d\"/>\n", ea->scale, len);
222 STRNCAT (buf, "\t\t\t</scale>\n", len);
223
224 if ( ea->disp_size ) {
225
226 STRNCAT (buf, "\t\t\t<displacement>\n", len);
227
228 if ( ea->disp_size > 1 && ! ea->disp_sign ) {
229 STRNCAT (buf, "\t\t\t\t<address ", len);
230 STRNCATF (buf, "value=\"0x%" PRIX32 "\"/>\n", ea->disp,
231 len);
232 } else {
233 STRNCAT (buf, "\t\t\t\t<immediate ", len);
234 STRNCATF (buf, "value=%" PRId32 "/>\n", ea->disp, len);
235 }
236
237 STRNCAT (buf, "\t\t\t</displacement>\n", len);
238 }
239
240 } else if ( format == raw_syntax ) {
241
242 PRINT_DISPLACEMENT(ea);
243 STRNCAT( buf, "(", len );
244
245 STRNCATF( buf, "%s,", ea->base.name, len );
246 STRNCATF( buf, "%s,", ea->index.name, len );
247 STRNCATF( buf, "%d", ea->scale, len );
248 STRNCAT( buf, ")", len );
249
250 } else {
251
252 STRNCAT( buf, "[", len );
253
254 if ( ea->base.name[0] ) {
255 STRNCAT( buf, ea->base.name, len );
256 if ( ea->index.name[0] ||
257 (ea->disp_size && ! ea->disp_sign) ) {
258 STRNCAT( buf, "+", len );
259 }
260 }
261 if ( ea->index.name[0] ) {
262 STRNCAT( buf, ea->index.name, len );
263 if ( ea->scale > 1 )
264 {
265 STRNCATF( buf, "*%" PRId32, ea->scale, len );
266 }
267 if ( ea->disp_size && ! ea->disp_sign )
268 {
269 STRNCAT( buf, "+", len );
270 }
271 }
272
273 if ( ea->disp_size || (! ea->index.name[0] &&
274 ! ea->base.name[0] ) )
275 {
276 PRINT_DISPLACEMENT(ea);
277 }
278
279 STRNCAT( buf, "]", len );
280 }
281
282 return( strlen(buf) );
283}
284
285static int format_seg( x86_op_t *op, char *buf, int len,
286 enum x86_asm_format format ) {
287 int len_orig = len;
288 const char *reg = "";
289
290 if (! op || ! buf || ! len || ! op->flags) {
291 return(0);
292 }
293 if ( op->type != op_offset && op->type != op_expression ){
294 return(0);
295 }
296 if (! ((int) op->flags & 0xF00) ) {
297 return(0);
298 }
299
300 switch (op->flags & 0xF00) {
301 case op_es_seg: reg = "es"; break;
302 case op_cs_seg: reg = "cs"; break;
303 case op_ss_seg: reg = "ss"; break;
304 case op_ds_seg: reg = "ds"; break;
305 case op_fs_seg: reg = "fs"; break;
306 case op_gs_seg: reg = "gs"; break;
307 default:
308 break;
309 }
310
311 if (! reg[0] ) {
312 return( 0 );
313 }
314
315 switch( format ) {
316 case xml_syntax:
317 STRNCAT( buf, "\t\t\t<segment ", len );
318 STRNCATF( buf, "value=\"%s\"/>\n", reg, len );
319 break;
320 case att_syntax:
321 STRNCATF( buf, "%%%s:", reg, len );
322 break;
323
324 default:
325 STRNCATF( buf, "%s:", reg, len );
326 break;
327 }
328
329 return( len_orig - len ); /* return length of appended string */
330}
331
332static const char *get_operand_datatype_str( x86_op_t *op ){
333
334 static const char *types[] = {
335 "sbyte", /* 0 */
336 "sword",
337 "sqword",
338 "sdword",
339 "sdqword", /* 4 */
340 "byte",
341 "word",
342 "qword",
343 "dword", /* 8 */
344 "dqword",
345 "sreal",
346 "dreal",
347 "extreal", /* 12 */
348 "bcd",
349 "ssimd",
350 "dsimd",
351 "sssimd", /* 16 */
352 "sdsimd",
353 "descr32",
354 "descr16",
355 "pdescr32", /* 20 */
356 "pdescr16",
357 "bounds16",
358 "bounds32",
359 "fpu_env16",
360 "fpu_env32", /* 25 */
361 "fpu_state16",
362 "fpu_state32",
363 "fp_reg_set"
364 };
365
366 /* handle signed values first */
367 if ( op->flags & op_signed ) {
368 switch (op->datatype) {
369 case op_byte: return types[0];
370 case op_word: return types[1];
371 case op_qword: return types[2];
372 case op_dqword: return types[4];
373 default: return types[3];
374 }
375 }
376
377 switch (op->datatype) {
378 case op_byte: return types[5];
379 case op_word: return types[6];
380 case op_qword: return types[7];
381 case op_dqword: return types[9];
382 case op_sreal: return types[10];
383 case op_dreal: return types[11];
384 case op_extreal: return types[12];
385 case op_bcd: return types[13];
386 case op_ssimd: return types[14];
387 case op_dsimd: return types[15];
388 case op_sssimd: return types[16];
389 case op_sdsimd: return types[17];
390 case op_descr32: return types[18];
391 case op_descr16: return types[19];
392 case op_pdescr32: return types[20];
393 case op_pdescr16: return types[21];
394 case op_bounds16: return types[22];
395 case op_bounds32: return types[23];
396 case op_fpustate16: return types[24];
397 case op_fpustate32: return types[25];
398 case op_fpuenv16: return types[26];
399 case op_fpuenv32: return types[27];
400 case op_fpregset: return types[28];
401 default: return types[8];
402 }
403}
404
405static int format_insn_eflags_str( enum x86_flag_status flags, char *buf,
406 int len) {
407
408 static struct {
409 const char *name;
410 int value;
411 } insn_flags[] = {
412 { "carry_set ", 0x0001 },
413 { "zero_set ", 0x0002 },
414 { "oflow_set ", 0x0004 },
415 { "dir_set ", 0x0008 },
416 { "sign_set ", 0x0010 },
417 { "parity_set ", 0x0020 },
418 { "carry_or_zero_set ", 0x0040 },
419 { "zero_set_or_sign_ne_oflow ", 0x0080 },
420 { "carry_clear ", 0x0100 },
421 { "zero_clear ", 0x0200 },
422 { "oflow_clear ", 0x0400 },
423 { "dir_clear ", 0x0800 },
424 { "sign_clear ", 0x1000 },
425 { "parity_clear ", 0x2000 },
426 { "sign_eq_oflow ", 0x4000 },
427 { "sign_ne_oflow ", 0x8000 },
428 { NULL, 0x0000 }, //end
429 };
430
431 unsigned int i;
432 int len_orig = len;
433
434 for (i = 0; insn_flags[i].name; i++) {
435 if (! (flags & insn_flags[i].value) )
436 continue;
437
438 STRNCAT( buf, insn_flags[i].name, len );
439 }
440
441 return( len_orig - len );
442}
443
444static const char *get_insn_group_str( enum x86_insn_group gp ) {
445
446 static const char *types[] = {
447 "", // 0
448 "controlflow",// 1
449 "arithmetic", // 2
450 "logic", // 3
451 "stack", // 4
452 "comparison", // 5
453 "move", // 6
454 "string", // 7
455 "bit_manip", // 8
456 "flag_manip", // 9
457 "fpu", // 10
458 "", // 11
459 "", // 12
460 "interrupt", // 13
461 "system", // 14
462 "other", // 15
463 };
464
465 if ( gp > sizeof (types)/sizeof(types[0]) )
466 return "";
467
468 return types[gp];
469}
470
471static const char *get_insn_type_str( enum x86_insn_type type ) {
472
473 static struct {
474 const char *name;
475 int value;
476 } types[] = {
477 /* insn_controlflow */
478 { "jmp", 0x1001 },
479 { "jcc", 0x1002 },
480 { "call", 0x1003 },
481 { "callcc", 0x1004 },
482 { "return", 0x1005 },
483 { "loop", 0x1006 },
484 /* insn_arithmetic */
485 { "add", 0x2001 },
486 { "sub", 0x2002 },
487 { "mul", 0x2003 },
488 { "div", 0x2004 },
489 { "inc", 0x2005 },
490 { "dec", 0x2006 },
491 { "shl", 0x2007 },
492 { "shr", 0x2008 },
493 { "rol", 0x2009 },
494 { "ror", 0x200A },
495 /* insn_logic */
496 { "and", 0x3001 },
497 { "or", 0x3002 },
498 { "xor", 0x3003 },
499 { "not", 0x3004 },
500 { "neg", 0x3005 },
501 /* insn_stack */
502 { "push", 0x4001 },
503 { "pop", 0x4002 },
504 { "pushregs", 0x4003 },
505 { "popregs", 0x4004 },
506 { "pushflags", 0x4005 },
507 { "popflags", 0x4006 },
508 { "enter", 0x4007 },
509 { "leave", 0x4008 },
510 /* insn_comparison */
511 { "test", 0x5001 },
512 { "cmp", 0x5002 },
513 /* insn_move */
514 { "mov", 0x6001 }, /* move */
515 { "movcc", 0x6002 }, /* conditional move */
516 { "xchg", 0x6003 }, /* exchange */
517 { "xchgcc", 0x6004 }, /* conditional exchange */
518 /* insn_string */
519 { "strcmp", 0x7001 },
520 { "strload", 0x7002 },
521 { "strmov", 0x7003 },
522 { "strstore", 0x7004 },
523 { "translate", 0x7005 }, /* xlat */
524 /* insn_bit_manip */
525 { "bittest", 0x8001 },
526 { "bitset", 0x8002 },
527 { "bitclear", 0x8003 },
528 /* insn_flag_manip */
529 { "clear_carry", 0x9001 },
530 { "clear_zero", 0x9002 },
531 { "clear_oflow", 0x9003 },
532 { "clear_dir", 0x9004 },
533 { "clear_sign", 0x9005 },
534 { "clear_parity", 0x9006 },
535 { "set_carry", 0x9007 },
536 { "set_zero", 0x9008 },
537 { "set_oflow", 0x9009 },
538 { "set_dir", 0x900A },
539 { "set_sign", 0x900B },
540 { "set_parity", 0x900C },
541 { "tog_carry", 0x9010 },
542 { "tog_zero", 0x9020 },
543 { "tog_oflow", 0x9030 },
544 { "tog_dir", 0x9040 },
545 { "tog_sign", 0x9050 },
546 { "tog_parity", 0x9060 },
547 /* insn_fpu */
548 { "fmov", 0xA001 },
549 { "fmovcc", 0xA002 },
550 { "fneg", 0xA003 },
551 { "fabs", 0xA004 },
552 { "fadd", 0xA005 },
553 { "fsub", 0xA006 },
554 { "fmul", 0xA007 },
555 { "fdiv", 0xA008 },
556 { "fsqrt", 0xA009 },
557 { "fcmp", 0xA00A },
558 { "fcos", 0xA00C },
559 { "fldpi", 0xA00D },
560 { "fldz", 0xA00E },
561 { "ftan", 0xA00F },
562 { "fsine", 0xA010 },
563 { "fsys", 0xA020 },
564 /* insn_interrupt */
565 { "int", 0xD001 },
566 { "intcc", 0xD002 }, /* not present in x86 ISA */
567 { "iret", 0xD003 },
568 { "bound", 0xD004 },
569 { "debug", 0xD005 },
570 { "trace", 0xD006 },
571 { "invalid_op", 0xD007 },
572 { "oflow", 0xD008 },
573 /* insn_system */
574 { "halt", 0xE001 },
575 { "in", 0xE002 }, /* input from port/bus */
576 { "out", 0xE003 }, /* output to port/bus */
577 { "cpuid", 0xE004 },
578 /* insn_other */
579 { "nop", 0xF001 },
580 { "bcdconv", 0xF002 }, /* convert to or from BCD */
581 { "szconv", 0xF003 }, /* change size of operand */
582 { NULL, 0 }, //end
583 };
584
585 unsigned int i;
586
587 //go thru every type in the enum
588 for ( i = 0; types[i].name; i++ ) {
589 if ( types[i].value == type )
590 return types[i].name;
591 }
592
593 return "";
594}
595
596static const char *get_insn_cpu_str( enum x86_insn_cpu cpu ) {
597 static const char *intel[] = {
598 "", // 0
599 "8086", // 1
600 "80286", // 2
601 "80386", // 3
602 "80387", // 4
603 "80486", // 5
604 "Pentium", // 6
605 "Pentium Pro", // 7
606 "Pentium 2", // 8
607 "Pentium 3", // 9
608 "Pentium 4" // 10
609 };
610
611 if ( cpu < sizeof(intel)/sizeof(intel[0]) ) {
612 return intel[cpu];
613 } else if ( cpu == 16 ) {
614 return "K6";
615 } else if ( cpu == 32 ) {
616 return "K7";
617 } else if ( cpu == 48 ) {
618 return "Athlon";
619 }
620
621 return "";
622}
623
624static const char *get_insn_isa_str( enum x86_insn_isa isa ) {
625 static const char *subset[] = {
626 NULL, // 0
627 "General Purpose", // 1
628 "Floating Point", // 2
629 "FPU Management", // 3
630 "MMX", // 4
631 "SSE", // 5
632 "SSE2", // 6
633 "SSE3", // 7
634 "3DNow!", // 8
635 "System" // 9
636 };
637
638 if ( isa > sizeof (subset)/sizeof(subset[0]) ) {
639 return "";
640 }
641
642 return subset[isa];
643}
644
645static int format_operand_att( x86_op_t *op, x86_insn_t *insn, char *buf,
646 int len){
647
648 char str[MAX_OP_STRING];
649
650 memset (str, 0, sizeof str);
651
652 switch ( op->type ) {
653 case op_register:
654 STRNCATF( buf, "%%%s", op->data.reg.name, len );
655 break;
656
657 case op_immediate:
658 get_operand_data_str( op, str, sizeof str );
659 STRNCATF( buf, "$%s", str, len );
660 break;
661
662 case op_relative_near:
663 STRNCATF( buf, "0x%08X",
664 (unsigned int)(op->data.sbyte +
665 insn->addr + insn->size), len );
666 break;
667
668 case op_relative_far:
669 if (op->datatype == op_word) {
670 STRNCATF( buf, "0x%08X",
671 (unsigned int)(op->data.sword +
672 insn->addr + insn->size), len );
673 } else {
674 STRNCATF( buf, "0x%08X",
675 (unsigned int)(op->data.sdword +
676 insn->addr + insn->size), len );
677 }
678 break;
679
680 case op_absolute:
681 /* ATT uses the syntax $section, $offset */
682 STRNCATF( buf, "$0x%04" PRIX16 ", ", op->data.absolute.segment,
683 len );
684 if (op->datatype == op_descr16) {
685 STRNCATF( buf, "$0x%04" PRIX16,
686 op->data.absolute.offset.off16, len );
687 } else {
688 STRNCATF( buf, "$0x%08" PRIX32,
689 op->data.absolute.offset.off32, len );
690 }
691 break;
692 case op_offset:
693 /* ATT requires a '*' before JMP/CALL ops */
694 if (insn->type == insn_jmp || insn->type == insn_call)
695 STRNCAT( buf, "*", len );
696
697 len -= format_seg( op, buf, len, att_syntax );
698 STRNCATF( buf, "0x%08" PRIX32, op->data.sdword, len );
699 break;
700
701 case op_expression:
702 /* ATT requires a '*' before JMP/CALL ops */
703 if (insn->type == insn_jmp || insn->type == insn_call)
704 STRNCAT( buf, "*", len );
705
706 len -= format_seg( op, buf, len, att_syntax );
707 len -= format_expr( &op->data.expression, buf, len,
708 att_syntax );
709 break;
710 case op_unused:
711 case op_unknown:
712 /* return 0-truncated buffer */
713 break;
714 }
715
716 return ( strlen( buf ) );
717}
718
719static int format_operand_native( x86_op_t *op, x86_insn_t *insn, char *buf,
720 int len){
721
722 char str[MAX_OP_STRING];
723
724 switch (op->type) {
725 case op_register:
726 STRNCAT( buf, op->data.reg.name, len );
727 break;
728
729 case op_immediate:
730 get_operand_data_str( op, str, sizeof str );
731 STRNCAT( buf, str, len );
732 break;
733
734 case op_relative_near:
735 STRNCATF( buf, "0x%08" PRIX32,
736 (unsigned int)(op->data.sbyte +
737 insn->addr + insn->size), len );
738 break;
739
740 case op_relative_far:
741 if ( op->datatype == op_word ) {
742 STRNCATF( buf, "0x%08" PRIX32,
743 (unsigned int)(op->data.sword +
744 insn->addr + insn->size), len );
745 break;
746 } else {
747 STRNCATF( buf, "0x%08" PRIX32, op->data.sdword +
748 insn->addr + insn->size, len );
749 }
750 break;
751
752 case op_absolute:
753 STRNCATF( buf, "$0x%04" PRIX16 ":", op->data.absolute.segment,
754 len );
755 if (op->datatype == op_descr16) {
756 STRNCATF( buf, "0x%04" PRIX16,
757 op->data.absolute.offset.off16, len );
758 } else {
759 STRNCATF( buf, "0x%08" PRIX32,
760 op->data.absolute.offset.off32, len );
761 }
762 break;
763
764 case op_offset:
765 len -= format_seg( op, buf, len, native_syntax );
766 STRNCATF( buf, "[0x%08" PRIX32 "]", op->data.sdword, len );
767 break;
768
769 case op_expression:
770 len -= format_seg( op, buf, len, native_syntax );
771 len -= format_expr( &op->data.expression, buf, len,
772 native_syntax );
773 break;
774 case op_unused:
775 case op_unknown:
776 /* return 0-truncated buffer */
777 break;
778 }
779
780 return( strlen( buf ) );
781}
782
783static int format_operand_xml( x86_op_t *op, x86_insn_t *insn, char *buf,
784 int len){
785
786 char str[MAX_OP_STRING] = "\0";
787
788 switch (op->type) {
789 case op_register:
790
791 get_operand_regtype_str( op->data.reg.type, str,
792 sizeof str );
793
794 STRNCAT( buf, "\t\t<register ", len );
795 STRNCATF( buf, "name=\"%s\" ", op->data.reg.name, len );
796 STRNCATF( buf, "type=\"%s\" ", str, len );
797 STRNCATF( buf, "size=%d/>\n", op->data.reg.size, len );
798 break;
799
800 case op_immediate:
801
802 get_operand_data_str( op, str, sizeof str );
803
804 STRNCAT( buf, "\t\t<immediate ", len );
805 STRNCATF( buf, "type=\"%s\" ",
806 get_operand_datatype_str (op), len );
807 STRNCATF( buf, "value=\"%s\"/>\n", str, len );
808 break;
809
810 case op_relative_near:
811 STRNCAT( buf, "\t\t<relative_offset ", len );
812
813 STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n",
814 (unsigned int)(op->data.sbyte +
815 insn->addr + insn->size), len );
816 break;
817
818 case op_relative_far:
819 STRNCAT( buf, "\t\t<relative_offset ", len );
820
821 if (op->datatype == op_word) {
822 STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n",
823 (unsigned int)(op->data.sword +
824 insn->addr + insn->size), len);
825 break;
826 } else {
827
828 STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n",
829 op->data.sdword + insn->addr + insn->size,
830 len );
831 }
832 break;
833
834 case op_absolute:
835
836 STRNCATF( buf,
837 "\t\t<absolute_address segment=\"0x%04" PRIX16 "\"",
838 op->data.absolute.segment, len );
839
840 if (op->datatype == op_descr16) {
841 STRNCATF( buf, "offset=\"0x%04" PRIX16 "\">",
842 op->data.absolute.offset.off16, len );
843 } else {
844 STRNCATF( buf, "offset=\"0x%08" PRIX32 "\">",
845 op->data.absolute.offset.off32, len );
846 }
847
848 STRNCAT( buf, "\t\t</absolute_address>\n", len );
849 break;
850
851 case op_expression:
852
853
854 STRNCAT( buf, "\t\t<address_expression>\n", len );
855
856 len -= format_seg( op, buf, len, xml_syntax );
857 len -= format_expr( &op->data.expression, buf, len,
858 xml_syntax );
859
860 STRNCAT( buf, "\t\t</address_expression>\n", len );
861 break;
862
863 case op_offset:
864
865 STRNCAT( buf, "\t\t<segment_offset>\n", len );
866
867 len -= format_seg( op, buf, len, xml_syntax );
868
869 STRNCAT( buf, "\t\t\t<address ", len);
870 STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n",
871 op->data.sdword, len );
872 STRNCAT( buf, "\t\t</segment_offset>\n", len );
873 break;
874
875 case op_unused:
876 case op_unknown:
877 /* return 0-truncated buffer */
878 break;
879 }
880
881 return( strlen( buf ) );
882}
883
884static int format_operand_raw( x86_op_t *op, x86_insn_t *insn, char *buf,
885 int len){
886
887 char str[MAX_OP_RAW_STRING];
888 const char *datatype = get_operand_datatype_str(op);
889
890 switch (op->type) {
891 case op_register:
892
893 get_operand_regtype_str( op->data.reg.type, str,
894 sizeof str );
895
896 STRNCAT( buf, "reg|", len );
897 STRNCATF( buf, "%s|", datatype, len );
898 STRNCATF( buf, "%s:", op->data.reg.name, len );
899 STRNCATF( buf, "%s:", str, len );
900 STRNCATF( buf, "%d|", op->data.reg.size, len );
901 break;
902
903 case op_immediate:
904
905 get_operand_data_str( op, str, sizeof str );
906
907 STRNCAT( buf, "immediate|", len );
908 STRNCATF( buf, "%s|", datatype, len );
909 STRNCATF( buf, "%s|", str, len );
910 break;
911
912 case op_relative_near:
913 /* NOTE: in raw format, we print the
914 * relative offset, not the actual
915 * address of the jump target */
916
917 STRNCAT( buf, "relative|", len );
918 STRNCATF( buf, "%s|", datatype, len );
919 STRNCATF( buf, "%" PRId8 "|", op->data.sbyte, len );
920 break;
921
922 case op_relative_far:
923
924 STRNCAT( buf, "relative|", len );
925 STRNCATF( buf, "%s|", datatype, len );
926
927 if (op->datatype == op_word) {
928 STRNCATF( buf, "%" PRId16 "|", op->data.sword, len);
929 break;
930 } else {
931 STRNCATF( buf, "%" PRId32 "|", op->data.sdword, len );
932 }
933 break;
934
935 case op_absolute:
936
937 STRNCAT( buf, "absolute_address|", len );
938 STRNCATF( buf, "%s|", datatype, len );
939
940 STRNCATF( buf, "$0x%04" PRIX16 ":", op->data.absolute.segment,
941 len );
942 if (op->datatype == op_descr16) {
943 STRNCATF( buf, "0x%04" PRIX16 "|",
944 op->data.absolute.offset.off16, len );
945 } else {
946 STRNCATF( buf, "0x%08" PRIX32 "|",
947 op->data.absolute.offset.off32, len );
948 }
949
950 break;
951
952 case op_expression:
953
954 STRNCAT( buf, "address_expression|", len );
955 STRNCATF( buf, "%s|", datatype, len );
956
957 len -= format_seg( op, buf, len, native_syntax );
958 len -= format_expr( &op->data.expression, buf, len,
959 raw_syntax );
960
961 STRNCAT( buf, "|", len );
962 break;
963
964 case op_offset:
965
966 STRNCAT( buf, "segment_offset|", len );
967 STRNCATF( buf, "%s|", datatype, len );
968
969 len -= format_seg( op, buf, len, xml_syntax );
970
971 STRNCATF( buf, "%08" PRIX32 "|", op->data.sdword, len );
972 break;
973
974 case op_unused:
975 case op_unknown:
976 /* return 0-truncated buffer */
977 break;
978 }
979
980 return( strlen( buf ) );
981}
982
983int x86_format_operand( x86_op_t *op, char *buf, int len,
984 enum x86_asm_format format ){
985 x86_insn_t *insn;
986
987 if ( ! op || ! buf || len < 1 ) {
988 return(0);
989 }
990
991 /* insn is stored in x86_op_t since .21-pre3 */
992 insn = (x86_insn_t *) op->insn;
993
994 memset( buf, 0, len );
995
996 switch ( format ) {
997 case att_syntax:
998 return format_operand_att( op, insn, buf, len );
999 case xml_syntax:
1000 return format_operand_xml( op, insn, buf, len );
1001 case raw_syntax:
1002 return format_operand_raw( op, insn, buf, len );
1003 case native_syntax:
1004 case intel_syntax:
1005 default:
1006 return format_operand_native( op, insn, buf, len );
1007 }
1008}
1009
1010#define is_imm_jmp(op) (op->type == op_absolute || \
1011 op->type == op_immediate || \
1012 op->type == op_offset)
1013#define is_memory_op(op) (op->type == op_absolute || \
1014 op->type == op_expression || \
1015 op->type == op_offset)
1016
1017static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) {
1018 int size = 0;
1019 const char *suffix;
1020
1021 if (! insn || ! buf || ! len )
1022 return(0);
1023
1024 memset( buf, 0, len );
1025
1026 /* do long jump/call prefix */
1027 if ( insn->type == insn_jmp || insn->type == insn_call ) {
1028 if (! is_imm_jmp( x86_operand_1st(insn) ) ||
1029 (x86_operand_1st(insn))->datatype != op_byte ) {
1030 /* far jump/call, use "l" prefix */
1031 STRNCAT( buf, "l", len );
1032 }
1033 STRNCAT( buf, insn->mnemonic, len );
1034
1035 return ( strlen( buf ) );
1036 }
1037
1038 /* do mnemonic */
1039 STRNCAT( buf, insn->mnemonic, len );
1040
1041 /* do suffixes for memory operands */
1042 if (!(insn->note & insn_note_nosuffix) &&
1043 (insn->group == insn_arithmetic ||
1044 insn->group == insn_logic ||
1045 insn->group == insn_move ||
1046 insn->group == insn_stack ||
1047 insn->group == insn_string ||
1048 insn->group == insn_comparison ||
1049 insn->type == insn_in ||
1050 insn->type == insn_out
1051 )) {
1052 if ( x86_operand_count( insn, op_explicit ) > 0 &&
1053 is_memory_op( x86_operand_1st(insn) ) ){
1054 size = x86_operand_size( x86_operand_1st( insn ) );
1055 } else if ( x86_operand_count( insn, op_explicit ) > 1 &&
1056 is_memory_op( x86_operand_2nd(insn) ) ){
1057 size = x86_operand_size( x86_operand_2nd( insn ) );
1058 }
1059 }
1060
1061 if ( size == 1 ) suffix = "b";
1062 else if ( size == 2 ) suffix = "w";
1063 else if ( size == 4 ) suffix = "l";
1064 else if ( size == 8 ) suffix = "q";
1065 else suffix = "";
1066
1067 STRNCAT( buf, suffix, len );
1068 return ( strlen( buf ) );
1069}
1070
1071int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len,
1072 enum x86_asm_format format){
1073 char str[MAX_OP_STRING];
1074
1075 memset( buf, 0, len );
1076 STRNCAT( buf, insn->prefix_string, len );
1077 if ( format == att_syntax ) {
1078 format_att_mnemonic( insn, str, sizeof str );
1079 STRNCAT( buf, str, len );
1080 } else {
1081 STRNCAT( buf, insn->mnemonic, len );
1082 }
1083
1084 return( strlen( buf ) );
1085}
1086
1087struct op_string { char *buf; size_t len; };
1088
1089static void format_op_raw( x86_op_t *op, x86_insn_t *insn, void *arg ) {
1090 struct op_string * opstr = (struct op_string *) arg;
1091
1092 format_operand_raw(op, insn, opstr->buf, opstr->len);
1093}
1094
1095static int format_insn_note(x86_insn_t *insn, char *buf, int len){
1096 char note[32] = {0};
1097 int len_orig = len, note_len = 32;
1098
1099 if ( insn->note & insn_note_ring0 ) {
1100 STRNCATF( note, "%s", "Ring0 ", note_len );
1101 }
1102 if ( insn->note & insn_note_smm ) {
1103 STRNCATF( note, "%s", "SMM ", note_len );
1104 }
1105 if ( insn->note & insn_note_serial ) {
1106 STRNCATF(note, "%s", "Serialize ", note_len );
1107 }
1108 STRNCATF( buf, "%s|", note, len );
1109
1110 return( len_orig - len );
1111}
1112
1113static int format_raw_insn( x86_insn_t *insn, char *buf, int len ){
1114 struct op_string opstr = { buf, len };
1115 int i;
1116
1117 /* RAW style:
1118 * ADDRESS|OFFSET|SIZE|BYTES|
1119 * PREFIX|PREFIX_STRING|GROUP|TYPE|NOTES|
1120 * MNEMONIC|CPU|ISA|FLAGS_SET|FLAGS_TESTED|
1121 * STACK_MOD|STACK_MOD_VAL
1122 * [|OP_TYPE|OP_DATATYPE|OP_ACCESS|OP_FLAGS|OP]*
1123 *
1124 * Register values are encoded as:
1125 * NAME:TYPE:SIZE
1126 *
1127 * Effective addresses are encoded as:
1128 * disp(base_reg,index_reg,scale)
1129 */
1130 STRNCATF( buf, "0x%08" PRIX32 "|", insn->addr , len );
1131 STRNCATF( buf, "0x%08" PRIX32 "|", insn->offset, len );
1132 STRNCATF( buf, "%d|" , insn->size , len );
1133
1134 /* print bytes */
1135 for ( i = 0; i < insn->size; i++ ) {
1136 STRNCATF( buf, "%02X ", insn->bytes[i], len );
1137 }
1138 STRNCAT( buf, "|", len );
1139
1140 len -= format_insn_prefix_str( insn->prefix, buf, len );
1141 STRNCATF( buf, "|%s|", insn->prefix_string , len );
1142 STRNCATF( buf, "%s|", get_insn_group_str( insn->group ), len );
1143 STRNCATF( buf, "%s|", get_insn_type_str( insn->type ) , len );
1144 STRNCATF( buf, "%s|", insn->mnemonic , len );
1145 STRNCATF( buf, "%s|", get_insn_cpu_str( insn->cpu ) , len );
1146 STRNCATF( buf, "%s|", get_insn_isa_str( insn->isa ) , len );
1147
1148 /* insn note */
1149 len -= format_insn_note( insn, buf, len );
1150
1151 len -= format_insn_eflags_str( insn->flags_set, buf, len );
1152 STRNCAT( buf, "|", len );
1153 len -= format_insn_eflags_str( insn->flags_tested, buf, len );
1154 STRNCAT( buf, "|", len );
1155 STRNCATF( buf, "%d|", insn->stack_mod, len );
1156 STRNCATF( buf, "%" PRId32 "|", insn->stack_mod_val, len );
1157
1158 opstr.len = len;
1159 x86_operand_foreach( insn, format_op_raw, &opstr, op_any );
1160
1161 return( strlen (buf) );
1162}
1163
1164static int format_xml_insn( x86_insn_t *insn, char *buf, int len ) {
1165 char str[MAX_OP_XML_STRING];
1166 int i;
1167
1168 STRNCAT( buf, "<x86_insn>\n", len );
1169
1170 STRNCATF( buf, "\t<address rva=\"0x%08" PRIX32 "\" ", insn->addr, len );
1171 STRNCATF( buf, "offset=\"0x%08" PRIX32 "\" ", insn->offset, len );
1172 STRNCATF( buf, "size=%d bytes=\"", insn->size, len );
1173
1174 for ( i = 0; i < insn->size; i++ ) {
1175 STRNCATF( buf, "%02X ", insn->bytes[i], len );
1176 }
1177 STRNCAT( buf, "\"/>\n", len );
1178
1179 STRNCAT( buf, "\t<prefix type=\"", len );
1180 len -= format_insn_prefix_str( insn->prefix, buf, len );
1181 STRNCATF( buf, "\" string=\"%s\"/>\n", insn->prefix_string, len );
1182
1183 STRNCATF( buf, "\t<mnemonic group=\"%s\" ",
1184 get_insn_group_str (insn->group), len );
1185 STRNCATF( buf, "type=\"%s\" ", get_insn_type_str (insn->type), len );
1186 STRNCATF( buf, "string=\"%s\"/>\n", insn->mnemonic, len );
1187
1188 STRNCAT( buf, "\t<flags type=set>\n", len );
1189 STRNCAT( buf, "\t\t<flag name=\"", len );
1190 len -= format_insn_eflags_str( insn->flags_set, buf, len );
1191 STRNCAT( buf, "\"/>\n\t</flags>\n", len );
1192
1193
1194 STRNCAT( buf, "\t<flags type=tested>\n", len );
1195 STRNCAT( buf, "\t\t<flag name=\"", len );
1196 len -= format_insn_eflags_str( insn->flags_tested, buf, len );
1197 STRNCAT( buf, "\"/>\n\t</flags>\n", len );
1198
1199 if ( x86_operand_1st( insn ) ) {
1200 x86_format_operand( x86_operand_1st(insn), str,
1201 sizeof str, xml_syntax);
1202 STRNCAT( buf, "\t<operand name=dest>\n", len );
1203 STRNCAT( buf, str, len );
1204 STRNCAT( buf, "\t</operand>\n", len );
1205 }
1206
1207 if ( x86_operand_2nd( insn ) ) {
1208 x86_format_operand( x86_operand_2nd( insn ), str,
1209 sizeof str, xml_syntax);
1210 STRNCAT( buf, "\t<operand name=src>\n", len );
1211 STRNCAT( buf, str, len );
1212 STRNCAT( buf, "\t</operand>\n", len );
1213 }
1214
1215 if ( x86_operand_3rd( insn ) ) {
1216 x86_format_operand( x86_operand_3rd(insn), str,
1217 sizeof str, xml_syntax);
1218 STRNCAT( buf, "\t<operand name=imm>\n", len );
1219 STRNCAT( buf, str, len );
1220 STRNCAT( buf, "\t</operand>\n", len );
1221 }
1222
1223 STRNCAT( buf, "</x86_insn>\n", len );
1224
1225 return strlen (buf);
1226}
1227
1228int x86_format_header( char *buf, int len, enum x86_asm_format format ) {
1229 switch (format) {
1230 case att_syntax:
1231 snprintf( buf, len, "MNEMONIC\tSRC, DEST, IMM" );
1232 break;
1233 case intel_syntax:
1234 snprintf( buf, len, "MNEMONIC\tDEST, SRC, IMM" );
1235 break;
1236 case native_syntax:
1237 snprintf( buf, len, "ADDRESS\tBYTES\tMNEMONIC\t"
1238 "DEST\tSRC\tIMM" );
1239 break;
1240 case raw_syntax:
1241 snprintf( buf, len, "ADDRESS|OFFSET|SIZE|BYTES|"
1242 "PREFIX|PREFIX_STRING|GROUP|TYPE|NOTES|"
1243 "MNEMONIC|CPU|ISA|FLAGS_SET|FLAGS_TESTED|"
1244 "STACK_MOD|STACK_MOD_VAL"
1245 "[|OP_TYPE|OP_DATATYPE|OP_ACCESS|OP_FLAGS|OP]*"
1246 );
1247 break;
1248 case xml_syntax:
1249 snprintf( buf, len,
1250 "<x86_insn>"
1251 "<address rva= offset= size= bytes=/>"
1252 "<prefix type= string=/>"
1253 "<mnemonic group= type= string= "
1254 "cpu= isa= note= />"
1255 "<flags type=set>"
1256 "<flag name=>"
1257 "</flags>"
1258 "<stack_mod val= >"
1259 "<flags type=tested>"
1260 "<flag name=>"
1261 "</flags>"
1262 "<operand name=>"
1263 "<register name= type= size=/>"
1264 "<immediate type= value=/>"
1265 "<relative_offset value=/>"
1266 "<absolute_address value=>"
1267 "<segment value=/>"
1268 "</absolute_address>"
1269 "<address_expression>"
1270 "<segment value=/>"
1271 "<base>"
1272 "<register name= type= size=/>"
1273 "</base>"
1274 "<index>"
1275 "<register name= type= size=/>"
1276 "</index>"
1277 "<scale>"
1278 "<immediate value=/>"
1279 "</scale>"
1280 "<displacement>"
1281 "<immediate value=/>"
1282 "<address value=/>"
1283 "</displacement>"
1284 "</address_expression>"
1285 "<segment_offset>"
1286 "<address value=/>"
1287 "</segment_offset>"
1288 "</operand>"
1289 "</x86_insn>"
1290 );
1291 break;
1292 case unknown_syntax:
1293 if ( len ) {
1294 buf[0] = '\0';
1295 }
1296 break;
1297 }
1298
1299 return( strlen(buf) );
1300}
1301
1302int x86_format_insn( x86_insn_t *insn, char *buf, int len,
1303 enum x86_asm_format format ){
1304 char str[MAX_OP_STRING];
1305 x86_op_t *src, *dst;
1306 int i;
1307
1308 memset(buf, 0, len);
1309 if ( format == intel_syntax ) {
1310 /* INTEL STYLE: mnemonic dest, src, imm */
1311 STRNCAT( buf, insn->prefix_string, len );
1312 STRNCAT( buf, insn->mnemonic, len );
1313 STRNCAT( buf, "\t", len );
1314
1315 /* dest */
1316 if ( (dst = x86_operand_1st( insn )) && !(dst->flags & op_implied) ) {
1317 x86_format_operand( dst, str, MAX_OP_STRING, format);
1318 STRNCAT( buf, str, len );
1319 }
1320
1321 /* src */
1322 if ( (src = x86_operand_2nd( insn )) ) {
1323 if ( !(dst->flags & op_implied) ) {
1324 STRNCAT( buf, ", ", len );
1325 }
1326 x86_format_operand( src, str, MAX_OP_STRING, format);
1327 STRNCAT( buf, str, len );
1328 }
1329
1330 /* imm */
1331 if ( x86_operand_3rd( insn )) {
1332 STRNCAT( buf, ", ", len );
1333 x86_format_operand( x86_operand_3rd( insn ),
1334 str, MAX_OP_STRING, format);
1335 STRNCAT( buf, str, len );
1336 }
1337
1338 } else if ( format == att_syntax ) {
1339 /* ATT STYLE: mnemonic src, dest, imm */
1340 STRNCAT( buf, insn->prefix_string, len );
1341 format_att_mnemonic(insn, str, MAX_OP_STRING);
1342 STRNCATF( buf, "%s\t", str, len);
1343
1344
1345 /* not sure which is correct? sometimes GNU as requires
1346 * an imm as the first operand, sometimes as the third... */
1347 /* imm */
1348 if ( x86_operand_3rd( insn ) ) {
1349 x86_format_operand(x86_operand_3rd( insn ),
1350 str, MAX_OP_STRING, format);
1351 STRNCAT( buf, str, len );
1352 /* there is always 'dest' operand if there is 'src' */
1353 STRNCAT( buf, ", ", len );
1354 }
1355
1356 if ( (insn->note & insn_note_nonswap ) == 0 ) {
1357 /* regular AT&T style swap */
1358 src = x86_operand_2nd( insn );
1359 dst = x86_operand_1st( insn );
1360 }
1361 else {
1362 /* special-case instructions */
1363 src = x86_operand_1st( insn );
1364 dst = x86_operand_2nd( insn );
1365 }
1366
1367 /* src */
1368 if ( src ) {
1369 x86_format_operand(src, str, MAX_OP_STRING, format);
1370 STRNCAT( buf, str, len );
1371 /* there is always 'dest' operand if there is 'src' */
1372 if ( dst && !(dst->flags & op_implied) ) {
1373 STRNCAT( buf, ", ", len );
1374 }
1375 }
1376
1377 /* dest */
1378 if ( dst && !(dst->flags & op_implied) ) {
1379 x86_format_operand( dst, str, MAX_OP_STRING, format);
1380 STRNCAT( buf, str, len );
1381 }
1382
1383
1384 } else if ( format == raw_syntax ) {
1385 format_raw_insn( insn, buf, len );
1386 } else if ( format == xml_syntax ) {
1387 format_xml_insn( insn, buf, len );
1388 } else { /* default to native */
1389 /* NATIVE style: RVA\tBYTES\tMNEMONIC\tOPERANDS */
1390 /* print address */
1391 STRNCATF( buf, "%08" PRIX32 "\t", insn->addr, len );
1392
1393 /* print bytes */
1394 for ( i = 0; i < insn->size; i++ ) {
1395 STRNCATF( buf, "%02X ", insn->bytes[i], len );
1396 }
1397
1398 STRNCAT( buf, "\t", len );
1399
1400 /* print mnemonic */
1401 STRNCAT( buf, insn->prefix_string, len );
1402 STRNCAT( buf, insn->mnemonic, len );
1403 STRNCAT( buf, "\t", len );
1404
1405 /* print operands */
1406 /* dest */
1407 if ( x86_operand_1st( insn ) ) {
1408 x86_format_operand( x86_operand_1st( insn ),
1409 str, MAX_OP_STRING, format);
1410 STRNCATF( buf, "%s\t", str, len );
1411 }
1412
1413 /* src */
1414 if ( x86_operand_2nd( insn ) ) {
1415 x86_format_operand(x86_operand_2nd( insn ),
1416 str, MAX_OP_STRING, format);
1417 STRNCATF( buf, "%s\t", str, len );
1418 }
1419
1420 /* imm */
1421 if ( x86_operand_3rd( insn )) {
1422 x86_format_operand( x86_operand_3rd( insn ),
1423 str, MAX_OP_STRING, format);
1424 STRNCAT( buf, str, len );
1425 }
1426 }
1427
1428 return( strlen( buf ) );
1429}
1430
1431