1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4
5#include "libdis.h"
6#include "ia32_insn.h"
7#include "ia32_operand.h"
8#include "ia32_modrm.h"
9#include "ia32_reg.h"
10#include "x86_imm.h"
11#include "x86_operand_list.h"
12
13
14
15/* apply segment override to memory operand in insn */
16static void apply_seg( x86_op_t *op, unsigned int prefixes ) {
17 if (! prefixes ) return;
18
19 /* apply overrides from prefix */
20 switch ( prefixes & PREFIX_REG_MASK ) {
21 case PREFIX_CS:
22 op->flags |= op_cs_seg; break;
23 case PREFIX_SS:
24 op->flags |= op_ss_seg; break;
25 case PREFIX_DS:
26 op->flags |= op_ds_seg; break;
27 case PREFIX_ES:
28 op->flags |= op_es_seg; break;
29 case PREFIX_FS:
30 op->flags |= op_fs_seg; break;
31 case PREFIX_GS:
32 op->flags |= op_gs_seg; break;
33 }
34
35 return;
36}
37
38static size_t decode_operand_value( unsigned char *buf, size_t buf_len,
39 x86_op_t *op, x86_insn_t *insn,
40 unsigned int addr_meth, size_t op_size,
41 unsigned int op_value, unsigned char modrm,
42 size_t gen_regs ) {
43 size_t size = 0;
44
45 /* ++ Do Operand Addressing Method / Decode operand ++ */
46 switch (addr_meth) {
47 /* This sets the operand Size based on the Intel Opcode Map
48 * (Vol 2, Appendix A). Letter encodings are from section
49 * A.1.1, 'Codes for Addressing Method' */
50
51 /* ---------------------- Addressing Method -------------- */
52 /* Note that decoding mod ModR/M operand adjusts the size of
53 * the instruction, but decoding the reg operand does not.
54 * This should not cause any problems, as every 'reg' operand
55 * has an associated 'mod' operand.
56 * Goddamn-Intel-Note:
57 * Some Intel addressing methods [M, R] specify that modR/M
58 * byte may only refer to a memory address/may only refer to
59 * a register -- however Intel provides no clues on what to do
60 * if, say, the modR/M for an M opcode decodes to a register
61 * rather than a memory address ... returning 0 is out of the
62 * question, as this would be an Immediate or a RelOffset, so
63 * instead these modR/Ms are decoded with total disregard to
64 * the M, R constraints. */
65
66 /* MODRM -- mod operand. sets size to at least 1! */
67 case ADDRMETH_E: /* ModR/M present, Gen reg or memory */
68 size = ia32_modrm_decode( buf, buf_len, op, insn,
69 gen_regs );
70 break;
71 case ADDRMETH_M: /* ModR/M only refers to memory */
72 size = ia32_modrm_decode( buf, buf_len, op, insn,
73 gen_regs );
74 break;
75 case ADDRMETH_Q: /* ModR/M present, MMX or Memory */
76 size = ia32_modrm_decode( buf, buf_len, op, insn,
77 REG_MMX_OFFSET );
78 break;
79 case ADDRMETH_R: /* ModR/M mod == gen reg */
80 size = ia32_modrm_decode( buf, buf_len, op, insn,
81 gen_regs );
82 break;
83 case ADDRMETH_W: /* ModR/M present, mem or SIMD reg */
84 size = ia32_modrm_decode( buf, buf_len, op, insn,
85 REG_SIMD_OFFSET );
86 break;
87
88 /* MODRM -- reg operand. does not effect size! */
89 case ADDRMETH_C: /* ModR/M reg == control reg */
90 ia32_reg_decode( modrm, op, REG_CTRL_OFFSET );
91 break;
92 case ADDRMETH_D: /* ModR/M reg == debug reg */
93 ia32_reg_decode( modrm, op, REG_DEBUG_OFFSET );
94 break;
95 case ADDRMETH_G: /* ModR/M reg == gen-purpose reg */
96 ia32_reg_decode( modrm, op, gen_regs );
97 break;
98 case ADDRMETH_P: /* ModR/M reg == qword MMX reg */
99 ia32_reg_decode( modrm, op, REG_MMX_OFFSET );
100 break;
101 case ADDRMETH_S: /* ModR/M reg == segment reg */
102 ia32_reg_decode( modrm, op, REG_SEG_OFFSET );
103 break;
104 case ADDRMETH_T: /* ModR/M reg == test reg */
105 ia32_reg_decode( modrm, op, REG_TEST_OFFSET );
106 break;
107 case ADDRMETH_V: /* ModR/M reg == SIMD reg */
108 ia32_reg_decode( modrm, op, REG_SIMD_OFFSET );
109 break;
110
111 /* No MODRM : note these set operand type explicitly */
112 case ADDRMETH_A: /* No modR/M -- direct addr */
113 op->type = op_absolute;
114
115 /* segment:offset address used in far calls */
116 x86_imm_sized( buf, buf_len,
117 &op->data.absolute.segment, 2 );
118 if ( insn->addr_size == 4 ) {
119 x86_imm_sized( buf, buf_len,
120 &op->data.absolute.offset.off32, 4 );
121 size = 6;
122 } else {
123 x86_imm_sized( buf, buf_len,
124 &op->data.absolute.offset.off16, 2 );
125 size = 4;
126 }
127
128 break;
129 case ADDRMETH_I: /* Immediate val */
130 op->type = op_immediate;
131 /* if it ever becomes legal to have imm as dest and
132 * there is a src ModR/M operand, we are screwed! */
133 if ( op->flags & op_signed ) {
134 x86_imm_signsized(buf, buf_len, &op->data.byte,
135 op_size);
136 } else {
137 x86_imm_sized(buf, buf_len, &op->data.byte,
138 op_size);
139 }
140 size = op_size;
141 break;
142 case ADDRMETH_J: /* Rel offset to add to IP [jmp] */
143 /* this fills op->data.near_offset or
144 op->data.far_offset depending on the size of
145 the operand */
146 op->flags |= op_signed;
147 if ( op_size == 1 ) {
148 /* one-byte near offset */
149 op->type = op_relative_near;
150 x86_imm_signsized(buf, buf_len,
151 &op->data.relative_near, 1);
152 } else {
153 /* far offset...is this truly signed? */
154 op->type = op_relative_far;
155 x86_imm_signsized(buf, buf_len,
156 &op->data.relative_far, op_size );
157 }
158 size = op_size;
159 break;
160 case ADDRMETH_O: /* No ModR/M; op is word/dword offset */
161 /* NOTE: these are actually RVAs not offsets to seg!! */
162 /* note bene: 'O' ADDR_METH uses addr_size to
163 determine operand size */
164 op->type = op_offset;
165 op->flags |= op_pointer;
166 x86_imm_sized( buf, buf_len, &op->data.offset,
167 insn->addr_size );
168
169 size = insn->addr_size;
170 break;
171
172 /* Hard-coded: these are specified in the insn definition */
173 case ADDRMETH_F: /* EFLAGS register */
174 op->type = op_register;
175 op->flags |= op_hardcode;
176 ia32_handle_register( &op->data.reg, REG_FLAGS_INDEX );
177 break;
178 case ADDRMETH_X: /* Memory addressed by DS:SI [string] */
179 op->type = op_expression;
180 op->flags |= op_hardcode;
181 op->flags |= op_ds_seg | op_pointer | op_string;
182 ia32_handle_register( &op->data.expression.base,
183 REG_DWORD_OFFSET + 6 );
184 break;
185 case ADDRMETH_Y: /* Memory addressed by ES:DI [string] */
186 op->type = op_expression;
187 op->flags |= op_hardcode;
188 op->flags |= op_es_seg | op_pointer | op_string;
189 ia32_handle_register( &op->data.expression.base,
190 REG_DWORD_OFFSET + 7 );
191 break;
192 case ADDRMETH_RR: /* Gen Register hard-coded in opcode */
193 op->type = op_register;
194 op->flags |= op_hardcode;
195 ia32_handle_register( &op->data.reg,
196 op_value + gen_regs );
197 break;
198 case ADDRMETH_RS: /* Seg Register hard-coded in opcode */
199 op->type = op_register;
200 op->flags |= op_hardcode;
201 ia32_handle_register( &op->data.reg,
202 op_value + REG_SEG_OFFSET );
203 break;
204 case ADDRMETH_RF: /* FPU Register hard-coded in opcode */
205 op->type = op_register;
206 op->flags |= op_hardcode;
207 ia32_handle_register( &op->data.reg,
208 op_value + REG_FPU_OFFSET );
209 break;
210 case ADDRMETH_RT: /* TST Register hard-coded in opcode */
211 op->type = op_register;
212 op->flags |= op_hardcode;
213 ia32_handle_register( &op->data.reg,
214 op_value + REG_TEST_OFFSET );
215 break;
216 case ADDRMETH_II: /* Immediate hard-coded in opcode */
217 op->type = op_immediate;
218 op->data.dword = op_value;
219 op->flags |= op_hardcode;
220 break;
221
222 case 0: /* Operand is not used */
223 default:
224 /* ignore -- operand not used in this insn */
225 op->type = op_unused; /* this shouldn't happen! */
226 break;
227 }
228
229 return size;
230}
231
232static size_t decode_operand_size( unsigned int op_type, x86_insn_t *insn,
233 x86_op_t *op ){
234 size_t size;
235
236 /* ++ Do Operand Type ++ */
237 switch (op_type) {
238 /* This sets the operand Size based on the Intel Opcode Map
239 * (Vol 2, Appendix A). Letter encodings are from section
240 * A.1.2, 'Codes for Operand Type' */
241 /* NOTE: in this routines, 'size' refers to the size
242 * of the operand in the raw (encoded) instruction;
243 * 'datatype' stores the actual size and datatype
244 * of the operand */
245
246 /* ------------------------ Operand Type ----------------- */
247 case OPTYPE_c: /* byte or word [op size attr] */
248 size = (insn->op_size == 4) ? 2 : 1;
249 op->datatype = (size == 4) ? op_word : op_byte;
250 break;
251 case OPTYPE_a: /* 2 word or 2 dword [op size attr] */
252 /* pointer to a 16:16 or 32:32 BOUNDS operand */
253 size = (insn->op_size == 4) ? 8 : 4;
254 op->datatype = (size == 4) ? op_bounds32 : op_bounds16;
255 break;
256 case OPTYPE_v: /* word or dword [op size attr] */
257 size = (insn->op_size == 4) ? 4 : 2;
258 op->datatype = (size == 4) ? op_dword : op_word;
259 break;
260 case OPTYPE_p: /* 32/48-bit ptr [op size attr] */
261 /* technically these flags are not accurate: the
262 * value s a 16:16 pointer or a 16:32 pointer, where
263 * the first '16' is a segment */
264 size = (insn->addr_size == 4) ? 6 : 4;
265 op->datatype = (size == 4) ? op_descr32 : op_descr16;
266 break;
267 case OPTYPE_b: /* byte, ignore op-size */
268 size = 1;
269 op->datatype = op_byte;
270 break;
271 case OPTYPE_w: /* word, ignore op-size */
272 size = 2;
273 op->datatype = op_word;
274 break;
275 case OPTYPE_d: /* dword , ignore op-size */
276 size = 4;
277 op->datatype = op_dword;
278 break;
279 case OPTYPE_s: /* 6-byte psuedo-descriptor */
280 /* ptr to 6-byte value which is 32:16 in 32-bit
281 * mode, or 8:24:16 in 16-bit mode. The high byte
282 * is ignored in 16-bit mode. */
283 size = 6;
284 op->datatype = (insn->addr_size == 4) ?
285 op_pdescr32 : op_pdescr16;
286 break;
287 case OPTYPE_q: /* qword, ignore op-size */
288 size = 8;
289 op->datatype = op_qword;
290 break;
291 case OPTYPE_dq: /* d-qword, ignore op-size */
292 size = 16;
293 op->datatype = op_dqword;
294 break;
295 case OPTYPE_ps: /* 128-bit FP data */
296 size = 16;
297 /* really this is 4 packed SP FP values */
298 op->datatype = op_ssimd;
299 break;
300 case OPTYPE_pd: /* 128-bit FP data */
301 size = 16;
302 /* really this is 2 packed DP FP values */
303 op->datatype = op_dsimd;
304 break;
305 case OPTYPE_ss: /* Scalar elem of 128-bit FP data */
306 size = 16;
307 /* this only looks at the low dword (4 bytes)
308 * of the xmmm register passed as a param.
309 * This is a 16-byte register where only 4 bytes
310 * are used in the insn. Painful, ain't it? */
311 op->datatype = op_sssimd;
312 break;
313 case OPTYPE_sd: /* Scalar elem of 128-bit FP data */
314 size = 16;
315 /* this only looks at the low qword (8 bytes)
316 * of the xmmm register passed as a param.
317 * This is a 16-byte register where only 8 bytes
318 * are used in the insn. Painful, again... */
319 op->datatype = op_sdsimd;
320 break;
321 case OPTYPE_pi: /* qword mmx register */
322 size = 8;
323 op->datatype = op_qword;
324 break;
325 case OPTYPE_si: /* dword integer register */
326 size = 4;
327 op->datatype = op_dword;
328 break;
329 case OPTYPE_fs: /* single-real */
330 size = 4;
331 op->datatype = op_sreal;
332 break;
333 case OPTYPE_fd: /* double real */
334 size = 8;
335 op->datatype = op_dreal;
336 break;
337 case OPTYPE_fe: /* extended real */
338 size = 10;
339 op->datatype = op_extreal;
340 break;
341 case OPTYPE_fb: /* packed BCD */
342 size = 10;
343 op->datatype = op_bcd;
344 break;
345 case OPTYPE_fv: /* pointer to FPU env: 14 or 28-bytes */
346 size = (insn->addr_size == 4)? 28 : 14;
347 op->datatype = (size == 28)? op_fpuenv32: op_fpuenv16;
348 break;
349 case OPTYPE_ft: /* pointer to FPU env: 94 or 108 bytes */
350 size = (insn->addr_size == 4)? 108 : 94;
351 op->datatype = (size == 108)?
352 op_fpustate32: op_fpustate16;
353 break;
354 case OPTYPE_fx: /* 512-byte register stack */
355 size = 512;
356 op->datatype = op_fpregset;
357 break;
358 case OPTYPE_fp: /* floating point register */
359 size = 10; /* double extended precision */
360 op->datatype = op_fpreg;
361 break;
362 case OPTYPE_m: /* fake operand type used for "lea Gv, M" */
363 size = insn->addr_size;
364 op->datatype = (size == 4) ? op_dword : op_word;
365 break;
366 case OPTYPE_none: /* handle weird instructions that have no encoding but use a dword datatype, like invlpg */
367 size = 0;
368 op->datatype = op_none;
369 break;
370 case 0:
371 default:
372 size = insn->op_size;
373 op->datatype = (size == 4) ? op_dword : op_word;
374 break;
375 }
376 return size;
377}
378
379size_t ia32_decode_operand( unsigned char *buf, size_t buf_len,
380 x86_insn_t *insn, unsigned int raw_op,
381 unsigned int raw_flags, unsigned int prefixes,
382 unsigned char modrm ) {
383 unsigned int addr_meth, op_type, op_size, gen_regs;
384 x86_op_t *op;
385 size_t size;
386
387 /* ++ Yank optype and addr mode out of operand flags */
388 addr_meth = raw_flags & ADDRMETH_MASK;
389 op_type = raw_flags & OPTYPE_MASK;
390
391 if ( raw_flags == ARG_NONE ) {
392 /* operand is not used in this instruction */
393 return 0;
394 }
395
396 /* allocate a new operand */
397 op = x86_operand_new( insn );
398
399 /* ++ Copy flags from opcode table to x86_insn_t */
400 op->access = (enum x86_op_access) OP_PERM(raw_flags);
401 op->flags = (enum x86_op_flags) (OP_FLAGS(raw_flags) >> 12);
402
403 /* Get size (for decoding) and datatype of operand */
404 op_size = decode_operand_size(op_type, insn, op);
405
406 /* override default register set based on Operand Type */
407 /* this allows mixing of 8, 16, and 32 bit regs in insn */
408 if (op_size == 1) {
409 gen_regs = REG_BYTE_OFFSET;
410 } else if (op_size == 2) {
411 gen_regs = REG_WORD_OFFSET;
412 } else {
413 gen_regs = REG_DWORD_OFFSET;
414 }
415
416 size = decode_operand_value( buf, buf_len, op, insn, addr_meth,
417 op_size, raw_op, modrm, gen_regs );
418
419 /* if operand is an address, apply any segment override prefixes */
420 if ( op->type == op_expression || op->type == op_offset ) {
421 apply_seg(op, prefixes);
422 }
423
424 return size; /* return number of bytes in instruction */
425}
426