1 | #include <stdio.h> |
2 | #include <stdlib.h> |
3 | #include <string.h> |
4 | |
5 | #include "libdis.h" |
6 | #include "ia32_insn.h" |
7 | #include "ia32_operand.h" |
8 | #include "ia32_modrm.h" |
9 | #include "ia32_reg.h" |
10 | #include "x86_imm.h" |
11 | #include "x86_operand_list.h" |
12 | |
13 | |
14 | |
15 | /* apply segment override to memory operand in insn */ |
16 | static void apply_seg( x86_op_t *op, unsigned int prefixes ) { |
17 | if (! prefixes ) return; |
18 | |
19 | /* apply overrides from prefix */ |
20 | switch ( prefixes & PREFIX_REG_MASK ) { |
21 | case PREFIX_CS: |
22 | op->flags |= op_cs_seg; break; |
23 | case PREFIX_SS: |
24 | op->flags |= op_ss_seg; break; |
25 | case PREFIX_DS: |
26 | op->flags |= op_ds_seg; break; |
27 | case PREFIX_ES: |
28 | op->flags |= op_es_seg; break; |
29 | case PREFIX_FS: |
30 | op->flags |= op_fs_seg; break; |
31 | case PREFIX_GS: |
32 | op->flags |= op_gs_seg; break; |
33 | } |
34 | |
35 | return; |
36 | } |
37 | |
38 | static size_t decode_operand_value( unsigned char *buf, size_t buf_len, |
39 | x86_op_t *op, x86_insn_t *insn, |
40 | unsigned int addr_meth, size_t op_size, |
41 | unsigned int op_value, unsigned char modrm, |
42 | size_t gen_regs ) { |
43 | size_t size = 0; |
44 | |
45 | /* ++ Do Operand Addressing Method / Decode operand ++ */ |
46 | switch (addr_meth) { |
47 | /* This sets the operand Size based on the Intel Opcode Map |
48 | * (Vol 2, Appendix A). Letter encodings are from section |
49 | * A.1.1, 'Codes for Addressing Method' */ |
50 | |
51 | /* ---------------------- Addressing Method -------------- */ |
52 | /* Note that decoding mod ModR/M operand adjusts the size of |
53 | * the instruction, but decoding the reg operand does not. |
54 | * This should not cause any problems, as every 'reg' operand |
55 | * has an associated 'mod' operand. |
56 | * Goddamn-Intel-Note: |
57 | * Some Intel addressing methods [M, R] specify that modR/M |
58 | * byte may only refer to a memory address/may only refer to |
59 | * a register -- however Intel provides no clues on what to do |
60 | * if, say, the modR/M for an M opcode decodes to a register |
61 | * rather than a memory address ... returning 0 is out of the |
62 | * question, as this would be an Immediate or a RelOffset, so |
63 | * instead these modR/Ms are decoded with total disregard to |
64 | * the M, R constraints. */ |
65 | |
66 | /* MODRM -- mod operand. sets size to at least 1! */ |
67 | case ADDRMETH_E: /* ModR/M present, Gen reg or memory */ |
68 | size = ia32_modrm_decode( buf, buf_len, op, insn, |
69 | gen_regs ); |
70 | break; |
71 | case ADDRMETH_M: /* ModR/M only refers to memory */ |
72 | size = ia32_modrm_decode( buf, buf_len, op, insn, |
73 | gen_regs ); |
74 | break; |
75 | case ADDRMETH_Q: /* ModR/M present, MMX or Memory */ |
76 | size = ia32_modrm_decode( buf, buf_len, op, insn, |
77 | REG_MMX_OFFSET ); |
78 | break; |
79 | case ADDRMETH_R: /* ModR/M mod == gen reg */ |
80 | size = ia32_modrm_decode( buf, buf_len, op, insn, |
81 | gen_regs ); |
82 | break; |
83 | case ADDRMETH_W: /* ModR/M present, mem or SIMD reg */ |
84 | size = ia32_modrm_decode( buf, buf_len, op, insn, |
85 | REG_SIMD_OFFSET ); |
86 | break; |
87 | |
88 | /* MODRM -- reg operand. does not effect size! */ |
89 | case ADDRMETH_C: /* ModR/M reg == control reg */ |
90 | ia32_reg_decode( modrm, op, REG_CTRL_OFFSET ); |
91 | break; |
92 | case ADDRMETH_D: /* ModR/M reg == debug reg */ |
93 | ia32_reg_decode( modrm, op, REG_DEBUG_OFFSET ); |
94 | break; |
95 | case ADDRMETH_G: /* ModR/M reg == gen-purpose reg */ |
96 | ia32_reg_decode( modrm, op, gen_regs ); |
97 | break; |
98 | case ADDRMETH_P: /* ModR/M reg == qword MMX reg */ |
99 | ia32_reg_decode( modrm, op, REG_MMX_OFFSET ); |
100 | break; |
101 | case ADDRMETH_S: /* ModR/M reg == segment reg */ |
102 | ia32_reg_decode( modrm, op, REG_SEG_OFFSET ); |
103 | break; |
104 | case ADDRMETH_T: /* ModR/M reg == test reg */ |
105 | ia32_reg_decode( modrm, op, REG_TEST_OFFSET ); |
106 | break; |
107 | case ADDRMETH_V: /* ModR/M reg == SIMD reg */ |
108 | ia32_reg_decode( modrm, op, REG_SIMD_OFFSET ); |
109 | break; |
110 | |
111 | /* No MODRM : note these set operand type explicitly */ |
112 | case ADDRMETH_A: /* No modR/M -- direct addr */ |
113 | op->type = op_absolute; |
114 | |
115 | /* segment:offset address used in far calls */ |
116 | x86_imm_sized( buf, buf_len, |
117 | &op->data.absolute.segment, 2 ); |
118 | if ( insn->addr_size == 4 ) { |
119 | x86_imm_sized( buf, buf_len, |
120 | &op->data.absolute.offset.off32, 4 ); |
121 | size = 6; |
122 | } else { |
123 | x86_imm_sized( buf, buf_len, |
124 | &op->data.absolute.offset.off16, 2 ); |
125 | size = 4; |
126 | } |
127 | |
128 | break; |
129 | case ADDRMETH_I: /* Immediate val */ |
130 | op->type = op_immediate; |
131 | /* if it ever becomes legal to have imm as dest and |
132 | * there is a src ModR/M operand, we are screwed! */ |
133 | if ( op->flags & op_signed ) { |
134 | x86_imm_signsized(buf, buf_len, &op->data.byte, |
135 | op_size); |
136 | } else { |
137 | x86_imm_sized(buf, buf_len, &op->data.byte, |
138 | op_size); |
139 | } |
140 | size = op_size; |
141 | break; |
142 | case ADDRMETH_J: /* Rel offset to add to IP [jmp] */ |
143 | /* this fills op->data.near_offset or |
144 | op->data.far_offset depending on the size of |
145 | the operand */ |
146 | op->flags |= op_signed; |
147 | if ( op_size == 1 ) { |
148 | /* one-byte near offset */ |
149 | op->type = op_relative_near; |
150 | x86_imm_signsized(buf, buf_len, |
151 | &op->data.relative_near, 1); |
152 | } else { |
153 | /* far offset...is this truly signed? */ |
154 | op->type = op_relative_far; |
155 | x86_imm_signsized(buf, buf_len, |
156 | &op->data.relative_far, op_size ); |
157 | } |
158 | size = op_size; |
159 | break; |
160 | case ADDRMETH_O: /* No ModR/M; op is word/dword offset */ |
161 | /* NOTE: these are actually RVAs not offsets to seg!! */ |
162 | /* note bene: 'O' ADDR_METH uses addr_size to |
163 | determine operand size */ |
164 | op->type = op_offset; |
165 | op->flags |= op_pointer; |
166 | x86_imm_sized( buf, buf_len, &op->data.offset, |
167 | insn->addr_size ); |
168 | |
169 | size = insn->addr_size; |
170 | break; |
171 | |
172 | /* Hard-coded: these are specified in the insn definition */ |
173 | case ADDRMETH_F: /* EFLAGS register */ |
174 | op->type = op_register; |
175 | op->flags |= op_hardcode; |
176 | ia32_handle_register( &op->data.reg, REG_FLAGS_INDEX ); |
177 | break; |
178 | case ADDRMETH_X: /* Memory addressed by DS:SI [string] */ |
179 | op->type = op_expression; |
180 | op->flags |= op_hardcode; |
181 | op->flags |= op_ds_seg | op_pointer | op_string; |
182 | ia32_handle_register( &op->data.expression.base, |
183 | REG_DWORD_OFFSET + 6 ); |
184 | break; |
185 | case ADDRMETH_Y: /* Memory addressed by ES:DI [string] */ |
186 | op->type = op_expression; |
187 | op->flags |= op_hardcode; |
188 | op->flags |= op_es_seg | op_pointer | op_string; |
189 | ia32_handle_register( &op->data.expression.base, |
190 | REG_DWORD_OFFSET + 7 ); |
191 | break; |
192 | case ADDRMETH_RR: /* Gen Register hard-coded in opcode */ |
193 | op->type = op_register; |
194 | op->flags |= op_hardcode; |
195 | ia32_handle_register( &op->data.reg, |
196 | op_value + gen_regs ); |
197 | break; |
198 | case ADDRMETH_RS: /* Seg Register hard-coded in opcode */ |
199 | op->type = op_register; |
200 | op->flags |= op_hardcode; |
201 | ia32_handle_register( &op->data.reg, |
202 | op_value + REG_SEG_OFFSET ); |
203 | break; |
204 | case ADDRMETH_RF: /* FPU Register hard-coded in opcode */ |
205 | op->type = op_register; |
206 | op->flags |= op_hardcode; |
207 | ia32_handle_register( &op->data.reg, |
208 | op_value + REG_FPU_OFFSET ); |
209 | break; |
210 | case ADDRMETH_RT: /* TST Register hard-coded in opcode */ |
211 | op->type = op_register; |
212 | op->flags |= op_hardcode; |
213 | ia32_handle_register( &op->data.reg, |
214 | op_value + REG_TEST_OFFSET ); |
215 | break; |
216 | case ADDRMETH_II: /* Immediate hard-coded in opcode */ |
217 | op->type = op_immediate; |
218 | op->data.dword = op_value; |
219 | op->flags |= op_hardcode; |
220 | break; |
221 | |
222 | case 0: /* Operand is not used */ |
223 | default: |
224 | /* ignore -- operand not used in this insn */ |
225 | op->type = op_unused; /* this shouldn't happen! */ |
226 | break; |
227 | } |
228 | |
229 | return size; |
230 | } |
231 | |
232 | static size_t decode_operand_size( unsigned int op_type, x86_insn_t *insn, |
233 | x86_op_t *op ){ |
234 | size_t size; |
235 | |
236 | /* ++ Do Operand Type ++ */ |
237 | switch (op_type) { |
238 | /* This sets the operand Size based on the Intel Opcode Map |
239 | * (Vol 2, Appendix A). Letter encodings are from section |
240 | * A.1.2, 'Codes for Operand Type' */ |
241 | /* NOTE: in this routines, 'size' refers to the size |
242 | * of the operand in the raw (encoded) instruction; |
243 | * 'datatype' stores the actual size and datatype |
244 | * of the operand */ |
245 | |
246 | /* ------------------------ Operand Type ----------------- */ |
247 | case OPTYPE_c: /* byte or word [op size attr] */ |
248 | size = (insn->op_size == 4) ? 2 : 1; |
249 | op->datatype = (size == 4) ? op_word : op_byte; |
250 | break; |
251 | case OPTYPE_a: /* 2 word or 2 dword [op size attr] */ |
252 | /* pointer to a 16:16 or 32:32 BOUNDS operand */ |
253 | size = (insn->op_size == 4) ? 8 : 4; |
254 | op->datatype = (size == 4) ? op_bounds32 : op_bounds16; |
255 | break; |
256 | case OPTYPE_v: /* word or dword [op size attr] */ |
257 | size = (insn->op_size == 4) ? 4 : 2; |
258 | op->datatype = (size == 4) ? op_dword : op_word; |
259 | break; |
260 | case OPTYPE_p: /* 32/48-bit ptr [op size attr] */ |
261 | /* technically these flags are not accurate: the |
262 | * value s a 16:16 pointer or a 16:32 pointer, where |
263 | * the first '16' is a segment */ |
264 | size = (insn->addr_size == 4) ? 6 : 4; |
265 | op->datatype = (size == 4) ? op_descr32 : op_descr16; |
266 | break; |
267 | case OPTYPE_b: /* byte, ignore op-size */ |
268 | size = 1; |
269 | op->datatype = op_byte; |
270 | break; |
271 | case OPTYPE_w: /* word, ignore op-size */ |
272 | size = 2; |
273 | op->datatype = op_word; |
274 | break; |
275 | case OPTYPE_d: /* dword , ignore op-size */ |
276 | size = 4; |
277 | op->datatype = op_dword; |
278 | break; |
279 | case OPTYPE_s: /* 6-byte psuedo-descriptor */ |
280 | /* ptr to 6-byte value which is 32:16 in 32-bit |
281 | * mode, or 8:24:16 in 16-bit mode. The high byte |
282 | * is ignored in 16-bit mode. */ |
283 | size = 6; |
284 | op->datatype = (insn->addr_size == 4) ? |
285 | op_pdescr32 : op_pdescr16; |
286 | break; |
287 | case OPTYPE_q: /* qword, ignore op-size */ |
288 | size = 8; |
289 | op->datatype = op_qword; |
290 | break; |
291 | case OPTYPE_dq: /* d-qword, ignore op-size */ |
292 | size = 16; |
293 | op->datatype = op_dqword; |
294 | break; |
295 | case OPTYPE_ps: /* 128-bit FP data */ |
296 | size = 16; |
297 | /* really this is 4 packed SP FP values */ |
298 | op->datatype = op_ssimd; |
299 | break; |
300 | case OPTYPE_pd: /* 128-bit FP data */ |
301 | size = 16; |
302 | /* really this is 2 packed DP FP values */ |
303 | op->datatype = op_dsimd; |
304 | break; |
305 | case OPTYPE_ss: /* Scalar elem of 128-bit FP data */ |
306 | size = 16; |
307 | /* this only looks at the low dword (4 bytes) |
308 | * of the xmmm register passed as a param. |
309 | * This is a 16-byte register where only 4 bytes |
310 | * are used in the insn. Painful, ain't it? */ |
311 | op->datatype = op_sssimd; |
312 | break; |
313 | case OPTYPE_sd: /* Scalar elem of 128-bit FP data */ |
314 | size = 16; |
315 | /* this only looks at the low qword (8 bytes) |
316 | * of the xmmm register passed as a param. |
317 | * This is a 16-byte register where only 8 bytes |
318 | * are used in the insn. Painful, again... */ |
319 | op->datatype = op_sdsimd; |
320 | break; |
321 | case OPTYPE_pi: /* qword mmx register */ |
322 | size = 8; |
323 | op->datatype = op_qword; |
324 | break; |
325 | case OPTYPE_si: /* dword integer register */ |
326 | size = 4; |
327 | op->datatype = op_dword; |
328 | break; |
329 | case OPTYPE_fs: /* single-real */ |
330 | size = 4; |
331 | op->datatype = op_sreal; |
332 | break; |
333 | case OPTYPE_fd: /* double real */ |
334 | size = 8; |
335 | op->datatype = op_dreal; |
336 | break; |
337 | case OPTYPE_fe: /* extended real */ |
338 | size = 10; |
339 | op->datatype = op_extreal; |
340 | break; |
341 | case OPTYPE_fb: /* packed BCD */ |
342 | size = 10; |
343 | op->datatype = op_bcd; |
344 | break; |
345 | case OPTYPE_fv: /* pointer to FPU env: 14 or 28-bytes */ |
346 | size = (insn->addr_size == 4)? 28 : 14; |
347 | op->datatype = (size == 28)? op_fpuenv32: op_fpuenv16; |
348 | break; |
349 | case OPTYPE_ft: /* pointer to FPU env: 94 or 108 bytes */ |
350 | size = (insn->addr_size == 4)? 108 : 94; |
351 | op->datatype = (size == 108)? |
352 | op_fpustate32: op_fpustate16; |
353 | break; |
354 | case OPTYPE_fx: /* 512-byte register stack */ |
355 | size = 512; |
356 | op->datatype = op_fpregset; |
357 | break; |
358 | case OPTYPE_fp: /* floating point register */ |
359 | size = 10; /* double extended precision */ |
360 | op->datatype = op_fpreg; |
361 | break; |
362 | case OPTYPE_m: /* fake operand type used for "lea Gv, M" */ |
363 | size = insn->addr_size; |
364 | op->datatype = (size == 4) ? op_dword : op_word; |
365 | break; |
366 | case OPTYPE_none: /* handle weird instructions that have no encoding but use a dword datatype, like invlpg */ |
367 | size = 0; |
368 | op->datatype = op_none; |
369 | break; |
370 | case 0: |
371 | default: |
372 | size = insn->op_size; |
373 | op->datatype = (size == 4) ? op_dword : op_word; |
374 | break; |
375 | } |
376 | return size; |
377 | } |
378 | |
379 | size_t ia32_decode_operand( unsigned char *buf, size_t buf_len, |
380 | x86_insn_t *insn, unsigned int raw_op, |
381 | unsigned int raw_flags, unsigned int prefixes, |
382 | unsigned char modrm ) { |
383 | unsigned int addr_meth, op_type, op_size, gen_regs; |
384 | x86_op_t *op; |
385 | size_t size; |
386 | |
387 | /* ++ Yank optype and addr mode out of operand flags */ |
388 | addr_meth = raw_flags & ADDRMETH_MASK; |
389 | op_type = raw_flags & OPTYPE_MASK; |
390 | |
391 | if ( raw_flags == ARG_NONE ) { |
392 | /* operand is not used in this instruction */ |
393 | return 0; |
394 | } |
395 | |
396 | /* allocate a new operand */ |
397 | op = x86_operand_new( insn ); |
398 | |
399 | /* ++ Copy flags from opcode table to x86_insn_t */ |
400 | op->access = (enum x86_op_access) OP_PERM(raw_flags); |
401 | op->flags = (enum x86_op_flags) (OP_FLAGS(raw_flags) >> 12); |
402 | |
403 | /* Get size (for decoding) and datatype of operand */ |
404 | op_size = decode_operand_size(op_type, insn, op); |
405 | |
406 | /* override default register set based on Operand Type */ |
407 | /* this allows mixing of 8, 16, and 32 bit regs in insn */ |
408 | if (op_size == 1) { |
409 | gen_regs = REG_BYTE_OFFSET; |
410 | } else if (op_size == 2) { |
411 | gen_regs = REG_WORD_OFFSET; |
412 | } else { |
413 | gen_regs = REG_DWORD_OFFSET; |
414 | } |
415 | |
416 | size = decode_operand_value( buf, buf_len, op, insn, addr_meth, |
417 | op_size, raw_op, modrm, gen_regs ); |
418 | |
419 | /* if operand is an address, apply any segment override prefixes */ |
420 | if ( op->type == op_expression || op->type == op_offset ) { |
421 | apply_seg(op, prefixes); |
422 | } |
423 | |
424 | return size; /* return number of bytes in instruction */ |
425 | } |
426 | |