1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | // |
6 | // This file was previously known as instrs.h |
7 | // |
8 | /***************************************************************************** |
9 | * x86 instructions for the JIT compiler |
10 | * |
11 | * id -- the enum name for the instruction |
12 | * nm -- textual name (for assembly dipslay) |
13 | * um -- update mode, see IUM_xx enum (rd, wr, or rw) |
14 | * mr -- base encoding for R/M[reg] addressing mode |
15 | * mi -- base encoding for R/M,icon addressing mode |
16 | * rm -- base encoding for reg,R/M addressing mode |
17 | * a4 -- base encoding for eax,i32 addressing mode |
18 | * rr -- base encoding for register addressing mode |
19 | * flags -- flags, see INS_FLAGS_* enum |
20 | * |
21 | ******************************************************************************/ |
22 | |
23 | // clang-format off |
24 | #if !defined(_TARGET_XARCH_) |
25 | #error Unexpected target type |
26 | #endif |
27 | |
28 | #ifndef INST1 |
29 | #error At least INST1 must be defined before including this file. |
30 | #endif |
31 | /*****************************************************************************/ |
32 | #ifndef INST0 |
33 | #define INST0(id, nm, um, mr, flags) |
34 | #endif |
35 | #ifndef INST2 |
36 | #define INST2(id, nm, um, mr, mi, flags) |
37 | #endif |
38 | #ifndef INST3 |
39 | #define INST3(id, nm, um, mr, mi, rm, flags) |
40 | #endif |
41 | #ifndef INST4 |
42 | #define INST4(id, nm, um, mr, mi, rm, a4, flags) |
43 | #endif |
44 | #ifndef INST5 |
45 | #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) |
46 | #endif |
47 | |
48 | /*****************************************************************************/ |
49 | /* The following is x86-specific */ |
50 | /*****************************************************************************/ |
51 | |
52 | // id nm um mr mi rm a4 rr flags |
53 | INST5(invalid, "INVALID" , IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
54 | |
55 | INST5(push, "push" , IUM_RD, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050, INS_FLAGS_None) |
56 | INST5(pop, "pop" , IUM_WR, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058, INS_FLAGS_None) |
57 | // Does not affect the stack tracking in the emitter |
58 | INST5(push_hide, "push" , IUM_RD, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050, INS_FLAGS_None) |
59 | INST5(pop_hide, "pop" , IUM_WR, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058, INS_FLAGS_None) |
60 | |
61 | INST5(inc, "inc" , IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000040, INS_FLAGS_WritesFlags) |
62 | INST5(inc_l, "inc" , IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE, INS_FLAGS_WritesFlags) |
63 | INST5(dec, "dec" , IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048, INS_FLAGS_WritesFlags) |
64 | INST5(dec_l, "dec" , IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE, INS_FLAGS_WritesFlags) |
65 | |
66 | // Multi-byte opcodes without modrm are represented in mixed endian fashion. |
67 | // See comment around quarter way through this file for more information. |
68 | INST5(bswap, "bswap" , IUM_RW, 0x0F00C8, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C80F, INS_FLAGS_None) |
69 | |
70 | // id nm um mr mi rm a4 flags |
71 | INST4(add, "add" , IUM_RW, 0x000000, 0x000080, 0x000002, 0x000004, INS_FLAGS_WritesFlags) |
72 | INST4(or, "or" , IUM_RW, 0x000008, 0x000880, 0x00000A, 0x00000C, INS_FLAGS_WritesFlags) |
73 | INST4(adc, "adc" , IUM_RW, 0x000010, 0x001080, 0x000012, 0x000014, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
74 | INST4(sbb, "sbb" , IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
75 | INST4(and, "and" , IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, INS_FLAGS_WritesFlags) |
76 | INST4(sub, "sub" , IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, INS_FLAGS_WritesFlags) |
77 | INST4(xor, "xor" , IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, INS_FLAGS_WritesFlags) |
78 | INST4(cmp, "cmp" , IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, INS_FLAGS_WritesFlags) |
79 | INST4(test, "test" , IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, INS_FLAGS_WritesFlags) |
80 | INST4(mov, "mov" , IUM_WR, 0x000088, 0x0000C6, 0x00008A, 0x0000B0, INS_FLAGS_None) |
81 | |
82 | INST4(lea, "lea" , IUM_WR, BAD_CODE, BAD_CODE, 0x00008D, BAD_CODE, INS_FLAGS_None) |
83 | |
84 | // id nm um mr mi rm flags |
85 | |
86 | // Note that emitter has only partial support for BT. It can only emit the reg,reg form |
87 | // and the registers need to be reversed to get the correct encoding. |
88 | INST3(bt, "bt" , IUM_RD, 0x0F00A3, BAD_CODE, 0x0F00A3, INS_FLAGS_WritesFlags) |
89 | |
90 | INST3(movsx, "movsx" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BE, INS_FLAGS_None) |
91 | #ifdef _TARGET_AMD64_ |
92 | INST3(movsxd, "movsxd" , IUM_WR, BAD_CODE, BAD_CODE, 0x4800000063, INS_FLAGS_None) |
93 | #endif |
94 | INST3(movzx, "movzx" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F00B6, INS_FLAGS_None) |
95 | |
96 | INST3(cmovo, "cmovo" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, INS_FLAGS_ReadsFlags) |
97 | INST3(cmovno, "cmovno" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, INS_FLAGS_ReadsFlags) |
98 | INST3(cmovb, "cmovb" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, INS_FLAGS_ReadsFlags) |
99 | INST3(cmovae, "cmovae" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, INS_FLAGS_ReadsFlags) |
100 | INST3(cmove, "cmove" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, INS_FLAGS_ReadsFlags) |
101 | INST3(cmovne, "cmovne" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, INS_FLAGS_ReadsFlags) |
102 | INST3(cmovbe, "cmovbe" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, INS_FLAGS_ReadsFlags) |
103 | INST3(cmova, "cmova" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, INS_FLAGS_ReadsFlags) |
104 | INST3(cmovs, "cmovs" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, INS_FLAGS_ReadsFlags) |
105 | INST3(cmovns, "cmovns" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, INS_FLAGS_ReadsFlags) |
106 | INST3(cmovpe, "cmovpe" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, INS_FLAGS_ReadsFlags) |
107 | INST3(cmovpo, "cmovpo" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, INS_FLAGS_ReadsFlags) |
108 | INST3(cmovl, "cmovl" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, INS_FLAGS_ReadsFlags) |
109 | INST3(cmovge, "cmovge" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, INS_FLAGS_ReadsFlags) |
110 | INST3(cmovle, "cmovle" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, INS_FLAGS_ReadsFlags) |
111 | INST3(cmovg, "cmovg" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_FLAGS_ReadsFlags) |
112 | |
113 | INST3(xchg, "xchg" , IUM_RW, 0x000086, BAD_CODE, 0x000086, INS_FLAGS_None) |
114 | INST3(imul, "imul" , IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, INS_FLAGS_WritesFlags) // op1 *= op2 |
115 | |
116 | // id nm um mr mi rm flags |
117 | |
118 | // Instead of encoding these as 3-operand instructions, we encode them |
119 | // as 2-operand instructions with the target register being implicit |
120 | // implicit_reg = op1*op2_icon |
121 | #define INSTMUL INST3 |
122 | INSTMUL(imul_AX, "imul" , IUM_RD, BAD_CODE, 0x000068, BAD_CODE, INS_FLAGS_WritesFlags) |
123 | INSTMUL(imul_CX, "imul" , IUM_RD, BAD_CODE, 0x000868, BAD_CODE, INS_FLAGS_WritesFlags) |
124 | INSTMUL(imul_DX, "imul" , IUM_RD, BAD_CODE, 0x001068, BAD_CODE, INS_FLAGS_WritesFlags) |
125 | INSTMUL(imul_BX, "imul" , IUM_RD, BAD_CODE, 0x001868, BAD_CODE, INS_FLAGS_WritesFlags) |
126 | INSTMUL(imul_SP, "imul" , IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_WritesFlags) |
127 | INSTMUL(imul_BP, "imul" , IUM_RD, BAD_CODE, 0x002868, BAD_CODE, INS_FLAGS_WritesFlags) |
128 | INSTMUL(imul_SI, "imul" , IUM_RD, BAD_CODE, 0x003068, BAD_CODE, INS_FLAGS_WritesFlags) |
129 | INSTMUL(imul_DI, "imul" , IUM_RD, BAD_CODE, 0x003868, BAD_CODE, INS_FLAGS_WritesFlags) |
130 | |
131 | #ifdef _TARGET_AMD64_ |
132 | |
133 | INSTMUL(imul_08, "imul" , IUM_RD, BAD_CODE, 0x4400000068, BAD_CODE, INS_FLAGS_WritesFlags) |
134 | INSTMUL(imul_09, "imul" , IUM_RD, BAD_CODE, 0x4400000868, BAD_CODE, INS_FLAGS_WritesFlags) |
135 | INSTMUL(imul_10, "imul" , IUM_RD, BAD_CODE, 0x4400001068, BAD_CODE, INS_FLAGS_WritesFlags) |
136 | INSTMUL(imul_11, "imul" , IUM_RD, BAD_CODE, 0x4400001868, BAD_CODE, INS_FLAGS_WritesFlags) |
137 | INSTMUL(imul_12, "imul" , IUM_RD, BAD_CODE, 0x4400002068, BAD_CODE, INS_FLAGS_WritesFlags) |
138 | INSTMUL(imul_13, "imul" , IUM_RD, BAD_CODE, 0x4400002868, BAD_CODE, INS_FLAGS_WritesFlags) |
139 | INSTMUL(imul_14, "imul" , IUM_RD, BAD_CODE, 0x4400003068, BAD_CODE, INS_FLAGS_WritesFlags) |
140 | INSTMUL(imul_15, "imul" , IUM_RD, BAD_CODE, 0x4400003868, BAD_CODE, INS_FLAGS_WritesFlags) |
141 | |
142 | #endif // _TARGET_AMD64_ |
143 | |
144 | // the hex codes in this file represent the instruction encoding as follows: |
145 | // 0x0000ff00 - modrm byte position |
146 | // 0x000000ff - last byte of opcode (before modrm) |
147 | // 0x00ff0000 - first byte of opcode |
148 | // 0xff000000 - middle byte of opcode, if needed (after first, before last) |
149 | // |
150 | // So a 1-byte opcode is: and with modrm: |
151 | // 0x00000011 0x0000RM11 |
152 | // |
153 | // So a 2-byte opcode is: and with modrm: |
154 | // 0x00002211 0x0011RM22 |
155 | // |
156 | // So a 3-byte opcode is: and with modrm: |
157 | // 0x00113322 0x2211RM33 |
158 | // |
159 | // So a 4-byte opcode would be something like this: |
160 | // 0x22114433 |
161 | |
162 | #define PACK3(byte1,byte2,byte3) ((byte1 << 16) | (byte2 << 24) | byte3) |
163 | #define PACK2(byte1,byte2) ((byte1 << 16) | byte2) |
164 | #define SSEFLT(c) PACK3(0xf3, 0x0f, c) |
165 | #define SSEDBL(c) PACK3(0xf2, 0x0f, c) |
166 | #define PCKDBL(c) PACK3(0x66, 0x0f, c) |
167 | #define PCKFLT(c) PACK2(0x0f,c) |
168 | |
169 | // These macros encode extra byte that is implicit in the macro. |
170 | #define PACK4(byte1,byte2,byte3,byte4) ((byte1 << 16) | (byte2 << 24) | byte3 | (byte4 << 8)) |
171 | #define SSE38(c) PACK4(0x66, 0x0f, 0x38, c) |
172 | #define SSE3A(c) PACK4(0x66, 0x0f, 0x3A, c) |
173 | |
174 | // VEX* encodes the implied leading opcode bytes in c1: |
175 | // 1: implied 0f, 2: implied 0f 38, 3: implied 0f 3a |
176 | #define VEX2INT(c1,c2) PACK3(c1, 0xc5, c2) |
177 | #define VEX3INT(c1,c2) PACK4(c1, 0xc5, 0x02, c2) |
178 | #define VEX3FLT(c1,c2) PACK4(c1, 0xc5, 0x02, c2) |
179 | |
180 | INST3(FIRST_SSE_INSTRUCTION, "FIRST_SSE_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
181 | // These are the SSE instructions used on x86 |
182 | INST3(mov_i2xmm, "movd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg |
183 | INST3(mov_xmm2i, "movd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7E), INS_FLAGS_None) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg |
184 | INST3(pmovmskb, "pmovmskb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), INS_FLAGS_None) // Move the MSB bits of all bytes in a xmm reg to an int reg |
185 | INST3(movmskpd, "movmskpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), INS_FLAGS_None) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros. |
186 | INST3(movd, "movd" , IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) |
187 | INST3(movq, "movq" , IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_FLAGS_None) |
188 | INST3(movsdsse2, "movsd" , IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), INS_Flags_IsDstSrcSrcAVXInstruction) |
189 | |
190 | INST3(punpckldq, "punpckldq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_Flags_IsDstDstSrcAVXInstruction) |
191 | |
192 | INST3(xorps, "xorps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed singles |
193 | |
194 | INST3(cvttsd2si, "cvttsd2si" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_FLAGS_None) // cvt with trunc scalar double to signed DWORDs |
195 | |
196 | INST3(movntdq, "movntdq" , IUM_WR, PCKDBL(0xE7), BAD_CODE, BAD_CODE, INS_FLAGS_None) |
197 | INST3(movnti, "movnti" , IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, INS_FLAGS_None) |
198 | INST3(movntpd, "movntpd" , IUM_WR, PCKDBL(0x2B), BAD_CODE, BAD_CODE, INS_FLAGS_None) |
199 | INST3(movntps, "movntps" , IUM_WR, PCKFLT(0x2B), BAD_CODE, BAD_CODE, INS_FLAGS_None) |
200 | INST3(movdqu, "movdqu" , IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_FLAGS_None) |
201 | INST3(movdqa, "movdqa" , IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_FLAGS_None) |
202 | INST3(movlpd, "movlpd" , IUM_WR, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12), INS_Flags_IsDstSrcSrcAVXInstruction) |
203 | INST3(movlps, "movlps" , IUM_WR, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12), INS_Flags_IsDstSrcSrcAVXInstruction) |
204 | INST3(movhpd, "movhpd" , IUM_WR, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16), INS_Flags_IsDstSrcSrcAVXInstruction) |
205 | INST3(movhps, "movhps" , IUM_WR, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16), INS_Flags_IsDstSrcSrcAVXInstruction) |
206 | INST3(movss, "movss" , IUM_WR, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10), INS_Flags_IsDstSrcSrcAVXInstruction) |
207 | INST3(movapd, "movapd" , IUM_WR, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28), INS_FLAGS_None) |
208 | INST3(movaps, "movaps" , IUM_WR, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28), INS_FLAGS_None) |
209 | INST3(movupd, "movupd" , IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), INS_FLAGS_None) |
210 | INST3(movups, "movups" , IUM_WR, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10), INS_FLAGS_None) |
211 | INST3(movhlps, "movhlps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), INS_Flags_IsDstDstSrcAVXInstruction) |
212 | INST3(movlhps, "movlhps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x16), INS_Flags_IsDstDstSrcAVXInstruction) |
213 | INST3(movmskps, "movmskps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x50), INS_FLAGS_None) |
214 | INST3(unpckhps, "unpckhps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), INS_Flags_IsDstDstSrcAVXInstruction) |
215 | INST3(unpcklps, "unpcklps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), INS_Flags_IsDstDstSrcAVXInstruction) |
216 | INST3(maskmovdqu, "maskmovdqu" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF7), INS_FLAGS_None) |
217 | |
218 | INST3(shufps, "shufps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), INS_Flags_IsDstDstSrcAVXInstruction) |
219 | INST3(shufpd, "shufpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), INS_Flags_IsDstDstSrcAVXInstruction) |
220 | |
221 | INST3(punpckhdq, "punpckhdq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), INS_Flags_IsDstDstSrcAVXInstruction) |
222 | |
223 | INST3(lfence, "lfence" , IUM_RD, 0x000FE8AE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
224 | INST3(mfence, "mfence" , IUM_RD, 0x000FF0AE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
225 | INST3(prefetchnta, "prefetchnta" , IUM_RD, 0x000F0018, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
226 | INST3(prefetcht0, "prefetcht0" , IUM_RD, 0x000F0818, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
227 | INST3(prefetcht1, "prefetcht1" , IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
228 | INST3(prefetcht2, "prefetcht2" , IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
229 | INST3(sfence, "sfence" , IUM_RD, 0x000FF8AE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
230 | |
231 | // SSE 2 arith |
232 | INST3(addps, "addps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed singles |
233 | INST3(addss, "addss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar singles |
234 | INST3(addpd, "addpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed doubles |
235 | INST3(addsd, "addsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar doubles |
236 | INST3(mulps, "mulps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed singles |
237 | INST3(mulss, "mulss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar single |
238 | INST3(mulpd, "mulpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed doubles |
239 | INST3(mulsd, "mulsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar doubles |
240 | INST3(subps, "subps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed singles |
241 | INST3(subss, "subss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar singles |
242 | INST3(subpd, "subpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed doubles |
243 | INST3(subsd, "subsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar doubles |
244 | INST3(minps, "minps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed singles |
245 | INST3(minss, "minss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar single |
246 | INST3(minpd, "minpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed doubles |
247 | INST3(minsd, "minsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar double |
248 | INST3(divps, "divps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed singles |
249 | INST3(divss, "divss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar singles |
250 | INST3(divpd, "divpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed doubles |
251 | INST3(divsd, "divsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar doubles |
252 | INST3(maxps, "maxps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed singles |
253 | INST3(maxss, "maxss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar single |
254 | INST3(maxpd, "maxpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed doubles |
255 | INST3(maxsd, "maxsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar double |
256 | INST3(xorpd, "xorpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed doubles |
257 | INST3(andps, "andps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), INS_Flags_IsDstDstSrcAVXInstruction) // AND packed singles |
258 | INST3(andpd, "andpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), INS_Flags_IsDstDstSrcAVXInstruction) // AND packed doubles |
259 | INST3(sqrtps, "sqrtps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), INS_FLAGS_None) // Sqrt of packed singles |
260 | INST3(sqrtss, "sqrtss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x51), INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar single |
261 | INST3(sqrtpd, "sqrtpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), INS_FLAGS_None) // Sqrt of packed doubles |
262 | INST3(sqrtsd, "sqrtsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x51), INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar double |
263 | INST3(andnps, "andnps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed singles |
264 | INST3(andnpd, "andnpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed doubles |
265 | INST3(orps, "orps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), INS_Flags_IsDstDstSrcAVXInstruction) // Or packed singles |
266 | INST3(orpd, "orpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), INS_Flags_IsDstDstSrcAVXInstruction) // Or packed doubles |
267 | INST3(haddpd, "haddpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed doubles |
268 | INST3(haddps, "haddps" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed floats |
269 | INST3(hsubpd, "hsubpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed doubles |
270 | INST3(hsubps, "hsubps" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed floats |
271 | INST3(addsubps, "addsubps" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed singles |
272 | INST3(addsubpd, "addsubpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed doubles |
273 | |
274 | // SSE 2 approx arith |
275 | INST3(rcpps, "rcpps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), INS_FLAGS_None) // Reciprocal of packed singles |
276 | INST3(rcpss, "rcpss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal of scalar single |
277 | INST3(rsqrtps, "rsqrtps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), INS_FLAGS_None) // Reciprocal Sqrt of packed singles |
278 | INST3(rsqrtss, "rsqrtss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x52), INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal Sqrt of scalar single |
279 | |
280 | // SSE2 conversions |
281 | INST3(cvtpi2ps, "cvtpi2ps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2A), INS_FLAGS_None) // cvt packed DWORDs to singles |
282 | INST3(cvtsi2ss, "cvtsi2ss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar single |
283 | INST3(cvtpi2pd, "cvtpi2pd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2A), INS_FLAGS_None) // cvt packed DWORDs to doubles |
284 | INST3(cvtsi2sd, "cvtsi2sd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar double |
285 | INST3(cvttps2pi, "cvttps2pi" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2C), INS_FLAGS_None) // cvt with trunc packed singles to DWORDs |
286 | INST3(cvttss2si, "cvttss2si" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_FLAGS_None) // cvt with trunc scalar single to DWORD |
287 | INST3(cvttpd2pi, "cvttpd2pi" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2C), INS_FLAGS_None) // cvt with trunc packed doubles to DWORDs |
288 | INST3(cvtps2pi, "cvtps2pi" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2D), INS_FLAGS_None) // cvt packed singles to DWORDs |
289 | INST3(cvtss2si, "cvtss2si" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), INS_FLAGS_None) // cvt scalar single to DWORD |
290 | INST3(cvtpd2pi, "cvtpd2pi" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2D), INS_FLAGS_None) // cvt packed doubles to DWORDs |
291 | INST3(cvtsd2si, "cvtsd2si" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), INS_FLAGS_None) // cvt scalar double to DWORD |
292 | INST3(cvtps2pd, "cvtps2pd" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), INS_FLAGS_None) // cvt packed singles to doubles |
293 | INST3(cvtpd2ps, "cvtpd2ps" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), INS_FLAGS_None) // cvt packed doubles to singles |
294 | INST3(cvtss2sd, "cvtss2sd" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar single to scalar doubles |
295 | INST3(cvtsd2ss, "cvtsd2ss" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar double to scalar singles |
296 | INST3(cvtdq2ps, "cvtdq2ps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_FLAGS_None) // cvt packed DWORDs to singles |
297 | INST3(cvtps2dq, "cvtps2dq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), INS_FLAGS_None) // cvt packed singles to DWORDs |
298 | INST3(cvttps2dq, "cvttps2dq" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), INS_FLAGS_None) // cvt with trunc packed singles to DWORDs |
299 | INST3(cvtpd2dq, "cvtpd2dq" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xE6), INS_FLAGS_None) // cvt packed doubles to DWORDs |
300 | INST3(cvttpd2dq, "cvttpd2dq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), INS_FLAGS_None) // cvt with trunc packed doubles to DWORDs |
301 | INST3(cvtdq2pd, "cvtdq2pd" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_FLAGS_None) // cvt packed DWORDs to doubles |
302 | |
303 | // SSE2 comparison instructions |
304 | INST3(comiss, "comiss" , IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), INS_FLAGS_None) // ordered compare singles |
305 | INST3(comisd, "comisd" , IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_FLAGS_None) // ordered compare doubles |
306 | INST3(ucomiss, "ucomiss" , IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_FLAGS_None) // unordered compare singles |
307 | INST3(ucomisd, "ucomisd" , IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_FLAGS_None) // unordered compare doubles |
308 | |
309 | // SSE2 packed single/double comparison operations. |
310 | // Note that these instructions not only compare but also overwrite the first source. |
311 | INST3(cmpps, "cmpps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles |
312 | INST3(cmppd, "cmppd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles |
313 | INST3(cmpss, "cmpss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles |
314 | INST3(cmpsd, "cmpsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles |
315 | |
316 | //SSE2 packed integer operations |
317 | INST3(paddb, "paddb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed byte integers |
318 | INST3(paddw, "paddw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed word (16-bit) integers |
319 | INST3(paddd, "paddd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed double-word (32-bit) integers |
320 | INST3(paddq, "paddq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD4), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed quad-word (64-bit) integers |
321 | INST3(paddsb, "paddsb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed byte integers and saturate the results |
322 | INST3(paddsw, "paddsw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xED), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed word integers and saturate the results |
323 | INST3(paddusb, "paddusb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned byte integers and saturate the results |
324 | INST3(paddusw, "paddusw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned word integers and saturate the results |
325 | INST3(pavgb, "pavgb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed byte integers |
326 | INST3(pavgw, "pavgw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed word integers |
327 | INST3(psubb, "psubb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers |
328 | INST3(psubw, "psubw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers |
329 | INST3(psubd, "psubd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed double-word (32-bit) integers |
330 | INST3(psubq, "psubq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFB), INS_Flags_IsDstDstSrcAVXInstruction) // subtract packed quad-word (64-bit) integers |
331 | INST3(pmaddwd, "pmaddwd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst |
332 | INST3(pmulhw, "pmulhw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit signed integers |
333 | INST3(pmulhuw, "pmulhuw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers |
334 | INST3(pmuludq, "pmuludq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit unsigned integers and store 64-bit result |
335 | INST3(pmullw, "pmullw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result |
336 | INST3(pand, "pand" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs |
337 | INST3(pandn, "pandn" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs |
338 | INST3(por, "por" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs |
339 | INST3(pxor, "pxor" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs |
340 | INST3(psadbw, "psadbw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers |
341 | INST3(psubsb, "psubsb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation |
342 | INST3(psubusb, "psubusb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation |
343 | INST3(psubsw, "psubsw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation |
344 | INST3(psubusw, "psubusw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation |
345 | |
346 | // Note that the shift immediates share the same encoding between left and right-shift, and are distinguished by the Reg/Opcode, |
347 | // which is handled in emitxarch.cpp. |
348 | INST3(psrldq, "psrldq" , IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_Flags_IsDstDstSrcAVXInstruction) // Shift right logical of xmm reg by given number of bytes |
349 | INST3(pslldq, "pslldq" , IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_Flags_IsDstDstSrcAVXInstruction) // Shift left logical of xmm reg by given number of bytes |
350 | INST3(psllw, "psllw" , IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 16-bit integers |
351 | INST3(pslld, "pslld" , IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 32-bit integers |
352 | INST3(psllq, "psllq" , IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 64-bit integers |
353 | INST3(psrlw, "psrlw" , IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 16-bit integers |
354 | INST3(psrld, "psrld" , IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 32-bit integers |
355 | INST3(psrlq, "psrlq" , IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 64-bit integers |
356 | INST3(psraw, "psraw" , IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 16-bit integers |
357 | INST3(psrad, "psrad" , IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 32-bit integers |
358 | |
359 | INST3(pmaxub, "pmaxub" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum unsigned bytes |
360 | INST3(pminub, "pminub" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum unsigned bytes |
361 | INST3(pmaxsw, "pmaxsw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed words |
362 | INST3(pminsw, "pminsw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed words |
363 | INST3(pcmpeqd, "pcmpeqd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality |
364 | INST3(pcmpgtd, "pcmpgtd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than |
365 | INST3(pcmpeqw, "pcmpeqw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality |
366 | INST3(pcmpgtw, "pcmpgtw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than |
367 | INST3(pcmpeqb, "pcmpeqb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality |
368 | INST3(pcmpgtb, "pcmpgtb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than |
369 | |
370 | INST3(pshufd, "pshufd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), INS_FLAGS_None) // Packed shuffle of 32-bit integers |
371 | INST3(pshufhw, "pshufhw" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), INS_FLAGS_None) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. |
372 | INST3(pshuflw, "pshuflw" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), INS_FLAGS_None) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. |
373 | INST3(pextrw, "pextrw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC5), INS_FLAGS_None) // Extract 16-bit value into a r32 with zero extended to 32-bits |
374 | INST3(pinsrw, "pinsrw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), INS_Flags_IsDstDstSrcAVXInstruction) // Insert word at index |
375 | |
376 | INST3(punpckhbw, "punpckhbw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) |
377 | INST3(punpcklbw, "punpcklbw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (lo) |
378 | INST3(punpckhqdq, "punpckhqdq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (hi) |
379 | INST3(punpcklqdq, "punpcklqdq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (lo) |
380 | INST3(punpckhwd, "punpckhwd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (hi) |
381 | INST3(punpcklwd, "punpcklwd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (lo) |
382 | INST3(unpckhpd, "unpckhpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) |
383 | INST3(unpcklpd, "unpcklpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) |
384 | |
385 | INST3(packssdw, "packssdw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to short with saturation |
386 | INST3(packsswb, "packsswb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation |
387 | INST3(packuswb, "packuswb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation |
388 | |
389 | // id nm um mr mi rm flags |
390 | INST3(dpps, "dpps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs |
391 | INST3(dppd, "dppd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs |
392 | INST3(insertps, "insertps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value |
393 | INST3(pcmpeqq, "pcmpeqq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality |
394 | INST3(pcmpgtq, "pcmpgtq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality |
395 | INST3(pmulld, "pmulld" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result |
396 | INST3(ptest, "ptest" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x17), INS_FLAGS_None) // Packed logical compare |
397 | INST3(phaddd, "phaddd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add |
398 | INST3(pabsb, "pabsb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), INS_FLAGS_None) // Packed absolute value of bytes |
399 | INST3(pabsw, "pabsw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), INS_FLAGS_None) // Packed absolute value of 16-bit integers |
400 | INST3(pabsd, "pabsd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), INS_FLAGS_None) // Packed absolute value of 32-bit integers |
401 | INST3(palignr, "palignr" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right |
402 | INST3(pmaddubsw, "pmaddubsw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Packed Signed and Unsigned Bytes |
403 | INST3(pmulhrsw, "pmulhrsw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply High with Round and Scale |
404 | INST3(pshufb, "pshufb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Shuffle Bytes |
405 | INST3(psignb, "psignb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN |
406 | INST3(psignw, "psignw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN |
407 | INST3(psignd, "psignd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN |
408 | INST3(pminsb, "pminsb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed bytes |
409 | INST3(pminsd, "pminsd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit signed integers |
410 | INST3(pminuw, "pminuw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 16-bit unsigned integers |
411 | INST3(pminud, "pminud" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit unsigned integers |
412 | INST3(pmaxsb, "pmaxsb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed bytes |
413 | INST3(pmaxsd, "pmaxsd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit signed integers |
414 | INST3(pmaxuw, "pmaxuw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 16-bit unsigned integers |
415 | INST3(pmaxud, "pmaxud" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit unsigned integers |
416 | INST3(pmovsxbw, "pmovsxbw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), INS_FLAGS_None) // Packed sign extend byte to short |
417 | INST3(pmovsxbd, "pmovsxbd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), INS_FLAGS_None) // Packed sign extend byte to int |
418 | INST3(pmovsxbq, "pmovsxbq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), INS_FLAGS_None) // Packed sign extend byte to long |
419 | INST3(pmovsxwd, "pmovsxwd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x23), INS_FLAGS_None) // Packed sign extend short to int |
420 | INST3(pmovsxwq, "pmovsxwq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x24), INS_FLAGS_None) // Packed sign extend short to long |
421 | INST3(pmovsxdq, "pmovsxdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x25), INS_FLAGS_None) // Packed sign extend int to long |
422 | INST3(pmovzxbw, "pmovzxbw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x30), INS_FLAGS_None) // Packed zero extend byte to short |
423 | INST3(pmovzxbd, "pmovzxbd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x31), INS_FLAGS_None) // Packed zero extend byte to intg |
424 | INST3(pmovzxbq, "pmovzxbq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x32), INS_FLAGS_None) // Packed zero extend byte to lon |
425 | INST3(pmovzxwd, "pmovzxwd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), INS_FLAGS_None) // Packed zero extend short to int |
426 | INST3(pmovzxwq, "pmovzxwq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), INS_FLAGS_None) // Packed zero extend short to long |
427 | INST3(pmovzxdq, "pmovzxdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), INS_FLAGS_None) // Packed zero extend int to long |
428 | INST3(packusdw, "packusdw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to unsigned short with saturation |
429 | INST3(roundps, "roundps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_FLAGS_None) // Round packed single precision floating-point values |
430 | INST3(roundss, "roundss" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single precision floating-point values |
431 | INST3(roundpd, "roundpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_FLAGS_None) // Round packed double precision floating-point values |
432 | INST3(roundsd, "roundsd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double precision floating-point values |
433 | INST3(pmuldq, "pmuldq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit signed integers and store 64-bit result |
434 | INST3(blendps, "blendps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Single Precision Floating-Point Values |
435 | INST3(blendvps, "blendvps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_FLAGS_None) // Variable Blend Packed Singles |
436 | INST3(blendpd, "blendpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values |
437 | INST3(blendvpd, "blendvpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_FLAGS_None) // Variable Blend Packed Doubles |
438 | INST3(pblendw, "pblendw" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words |
439 | INST3(pblendvb, "pblendvb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_FLAGS_None) // Variable Blend Packed Bytes |
440 | INST3(phaddw, "phaddw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers |
441 | INST3(phsubw, "phsubw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers |
442 | INST3(phsubd, "phsubd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 32-bit integers |
443 | INST3(phaddsw, "phaddsw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers with saturation |
444 | INST3(phsubsw, "phsubsw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers with saturation |
445 | INST3(lddqu, "lddqu" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), INS_FLAGS_None) // Load Unaligned integer |
446 | INST3(movntdqa, "movntdqa" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), INS_FLAGS_None) // Load Double Quadword Non-Temporal Aligned Hint |
447 | INST3(movddup, "movddup" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), INS_FLAGS_None) // Replicate Double FP Values |
448 | INST3(movsldup, "movsldup" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), INS_FLAGS_None) // Replicate even-indexed Single FP Values |
449 | INST3(movshdup, "movshdup" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), INS_FLAGS_None) // Replicate odd-indexed Single FP Values |
450 | INST3(phminposuw, "phminposuw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), INS_FLAGS_None) // Packed Horizontal Word Minimum |
451 | INST3(mpsadbw, "mpsadbw" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference |
452 | INST3(pinsrb, "pinsrb" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Byte |
453 | INST3(pinsrd, "pinsrd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Dword |
454 | INST3(pinsrq, "pinsrq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Qword |
455 | INST3(pextrb, "pextrb" , IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Byte |
456 | INST3(pextrd, "pextrd" , IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Dword |
457 | INST3(pextrq, "pextrq" , IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Qword |
458 | INST3(pextrw_sse41, "pextrw" , IUM_WR, SSE3A(0x15), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Word |
459 | INST3(extractps, "extractps" , IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Packed Floating-Point Values |
460 | |
461 | //PCLMULQDQ instructions |
462 | INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords |
463 | |
464 | //AES instructions |
465 | INST3(aesdec, "aesdec" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow |
466 | INST3(aesdeclast, "aesdeclast" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow |
467 | INST3(aesenc, "aesenc" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES encryption flow |
468 | INST3(aesenclast, "aesenclast" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow |
469 | INST3(aesimc, "aesimc" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_FLAGS_None) // Perform the AES InvMixColumn Transformation |
470 | INST3(aeskeygenassist, "aeskeygenassist" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_FLAGS_None) // AES Round Key Generation Assist |
471 | INST3(LAST_SSE_INSTRUCTION, "LAST_SSE_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
472 | |
473 | INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
474 | // AVX only instructions |
475 | INST3(vbroadcastss, "broadcastss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x18), INS_FLAGS_None) // Broadcast float value read from memory to entire ymm register |
476 | INST3(vbroadcastsd, "broadcastsd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_FLAGS_None) // Broadcast float value read from memory to entire ymm register |
477 | INST3(vpbroadcastb, "pbroadcastb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x78), INS_FLAGS_None) // Broadcast int8 value from reg/memory to entire ymm register |
478 | INST3(vpbroadcastw, "pbroadcastw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x79), INS_FLAGS_None) // Broadcast int16 value from reg/memory to entire ymm register |
479 | INST3(vpbroadcastd, "pbroadcastd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x58), INS_FLAGS_None) // Broadcast int32 value from reg/memory to entire ymm register |
480 | INST3(vpbroadcastq, "pbroadcastq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_FLAGS_None) // Broadcast int64 value from reg/memory to entire ymm register |
481 | INST3(vextractf128, "extractf128" , IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract 128-bit packed floating point values |
482 | INST3(vextracti128, "extracti128" , IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract 128-bit packed integer values |
483 | INST3(vinsertf128, "insertf128" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed floating point values |
484 | INST3(vinserti128, "inserti128" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed integer values |
485 | INST3(vzeroupper, "zeroupper" , IUM_WR, 0xC577F8, BAD_CODE, BAD_CODE, INS_FLAGS_None) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix) |
486 | INST3(vperm2i128, "perm2i128" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), INS_Flags_IsDstDstSrcAVXInstruction) // Permute 128-bit halves of input register |
487 | INST3(vpermq, "permq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), INS_FLAGS_None) // Permute 64-bit of input register |
488 | INST3(vpblendd, "pblendd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed DWORDs |
489 | INST3(vblendvps, "blendvps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Singles |
490 | INST3(vblendvpd, "blendvpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Doubles |
491 | INST3(vpblendvb, "pblendvb" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Bytes |
492 | INST3(vtestps, "testps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0E), INS_FLAGS_None) // Packed Bit Test |
493 | INST3(vtestpd, "testpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0F), INS_FLAGS_None) // Packed Bit Test |
494 | INST3(vpsrlvd, "psrlvd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical |
495 | INST3(vpsrlvq, "psrlvq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical |
496 | INST3(vpsravd, "psravd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic |
497 | INST3(vpsllvd, "psllvd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical |
498 | INST3(vpsllvq, "psllvq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical |
499 | INST3(vpermilps, "permilps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), INS_FLAGS_None) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values |
500 | INST3(vpermilpd, "permilpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_FLAGS_None) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values |
501 | INST3(vpermilpsvar, "permilpsvar" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values |
502 | INST3(vpermilpdvar, "permilpdvar" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values |
503 | INST3(vperm2f128, "perm2f128" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values |
504 | INST3(vpermpd, "permpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x01), INS_FLAGS_None) // Permute Double-Precision Floating-Point Values |
505 | INST3(vpermd, "permd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Packed Doublewords Elements |
506 | INST3(vpermps, "permps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Single-Precision Floating-Point Elements |
507 | INST3(vbroadcastf128, "broadcastf128" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_FLAGS_None) // Broadcast packed float values read from memory to entire ymm register |
508 | INST3(vbroadcasti128, "broadcasti128" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_FLAGS_None) // Broadcast packed integer values read from memory to entire ymm register |
509 | INST3(vmaskmovps, "maskmovps" , IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores |
510 | INST3(vmaskmovpd, "maskmovpd" , IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores |
511 | INST3(vpmaskmovd, "pmaskmovd" , IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Dword Loads and Stores |
512 | INST3(vpmaskmovq, "pmaskmovq" , IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Qword Loads and Stores |
513 | INST3(vpgatherdd, "pgatherdd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword |
514 | INST3(vpgatherqd, "pgatherqd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword |
515 | INST3(vpgatherdq, "pgatherdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices |
516 | INST3(vpgatherqq, "pgatherqq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices |
517 | INST3(vgatherdps, "gatherdps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices |
518 | INST3(vgatherqps, "gatherqps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices |
519 | INST3(vgatherdpd, "gatherdpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices |
520 | INST3(vgatherqpd, "gatherqpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices |
521 | |
522 | INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
523 | // id nm um mr mi rm flags |
524 | INST3(vfmadd132pd, "fmadd132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values |
525 | INST3(vfmadd213pd, "fmadd213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_Flags_IsDstDstSrcAVXInstruction) // |
526 | INST3(vfmadd231pd, "fmadd231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_Flags_IsDstDstSrcAVXInstruction) // |
527 | INST3(vfmadd132ps, "fmadd132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values |
528 | INST3(vfmadd213ps, "fmadd213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_Flags_IsDstDstSrcAVXInstruction) // |
529 | INST3(vfmadd231ps, "fmadd231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_Flags_IsDstDstSrcAVXInstruction) // |
530 | INST3(vfmadd132sd, "fmadd132sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values |
531 | INST3(vfmadd213sd, "fmadd213sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_Flags_IsDstDstSrcAVXInstruction) // |
532 | INST3(vfmadd231sd, "fmadd231sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_Flags_IsDstDstSrcAVXInstruction) // |
533 | INST3(vfmadd132ss, "fmadd132ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values |
534 | INST3(vfmadd213ss, "fmadd213ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_Flags_IsDstDstSrcAVXInstruction) // |
535 | INST3(vfmadd231ss, "fmadd231ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_Flags_IsDstDstSrcAVXInstruction) // |
536 | INST3(vfmaddsub132pd, "fmaddsub132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values |
537 | INST3(vfmaddsub213pd, "fmaddsub213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_Flags_IsDstDstSrcAVXInstruction) // |
538 | INST3(vfmaddsub231pd, "fmaddsub231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_Flags_IsDstDstSrcAVXInstruction) // |
539 | INST3(vfmaddsub132ps, "fmaddsub132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values |
540 | INST3(vfmaddsub213ps, "fmaddsub213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_Flags_IsDstDstSrcAVXInstruction) // |
541 | INST3(vfmaddsub231ps, "fmaddsub231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_Flags_IsDstDstSrcAVXInstruction) // |
542 | INST3(vfmsubadd132pd, "fmsubadd132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values |
543 | INST3(vfmsubadd213pd, "fmsubadd213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_Flags_IsDstDstSrcAVXInstruction) // |
544 | INST3(vfmsubadd231pd, "fmsubadd231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_Flags_IsDstDstSrcAVXInstruction) // |
545 | INST3(vfmsubadd132ps, "fmsubadd132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values |
546 | INST3(vfmsubadd213ps, "fmsubadd213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_Flags_IsDstDstSrcAVXInstruction) // |
547 | INST3(vfmsubadd231ps, "fmsubadd231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_Flags_IsDstDstSrcAVXInstruction) // |
548 | INST3(vfmsub132pd, "fmsub132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values |
549 | INST3(vfmsub213pd, "fmsub213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_Flags_IsDstDstSrcAVXInstruction) // |
550 | INST3(vfmsub231pd, "fmsub231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_Flags_IsDstDstSrcAVXInstruction) // |
551 | INST3(vfmsub132ps, "fmsub132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values |
552 | INST3(vfmsub213ps, "fmsub213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_Flags_IsDstDstSrcAVXInstruction) // |
553 | INST3(vfmsub231ps, "fmsub231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_Flags_IsDstDstSrcAVXInstruction) // |
554 | INST3(vfmsub132sd, "fmsub132sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values |
555 | INST3(vfmsub213sd, "fmsub213sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_Flags_IsDstDstSrcAVXInstruction) // |
556 | INST3(vfmsub231sd, "fmsub231sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_Flags_IsDstDstSrcAVXInstruction) // |
557 | INST3(vfmsub132ss, "fmsub132ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values |
558 | INST3(vfmsub213ss, "fmsub213ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_Flags_IsDstDstSrcAVXInstruction) // |
559 | INST3(vfmsub231ss, "fmsub231ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_Flags_IsDstDstSrcAVXInstruction) // |
560 | INST3(vfnmadd132pd, "fmnadd132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values |
561 | INST3(vfnmadd213pd, "fmnadd213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_Flags_IsDstDstSrcAVXInstruction) // |
562 | INST3(vfnmadd231pd, "fmnadd231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_Flags_IsDstDstSrcAVXInstruction) // |
563 | INST3(vfnmadd132ps, "fmnadd132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values |
564 | INST3(vfnmadd213ps, "fmnadd213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_Flags_IsDstDstSrcAVXInstruction) // |
565 | INST3(vfnmadd231ps, "fmnadd231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_Flags_IsDstDstSrcAVXInstruction) // |
566 | INST3(vfnmadd132sd, "fmnadd132sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values |
567 | INST3(vfnmadd213sd, "fmnadd213sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_Flags_IsDstDstSrcAVXInstruction) // |
568 | INST3(vfnmadd231sd, "fmnadd231sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_Flags_IsDstDstSrcAVXInstruction) // |
569 | INST3(vfnmadd132ss, "fmnadd132ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values |
570 | INST3(vfnmadd213ss, "fmnadd213ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_Flags_IsDstDstSrcAVXInstruction) // |
571 | INST3(vfnmadd231ss, "fmnadd231ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_Flags_IsDstDstSrcAVXInstruction) // |
572 | INST3(vfnmsub132pd, "fmnsub132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values |
573 | INST3(vfnmsub213pd, "fmnsub213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_Flags_IsDstDstSrcAVXInstruction) // |
574 | INST3(vfnmsub231pd, "fmnsub231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_Flags_IsDstDstSrcAVXInstruction) // |
575 | INST3(vfnmsub132ps, "fmnsub132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values |
576 | INST3(vfnmsub213ps, "fmnsub213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_Flags_IsDstDstSrcAVXInstruction) // |
577 | INST3(vfnmsub231ps, "fmnsub231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_Flags_IsDstDstSrcAVXInstruction) // |
578 | INST3(vfnmsub132sd, "fmnsub132sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values |
579 | INST3(vfnmsub213sd, "fmnsub213sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_Flags_IsDstDstSrcAVXInstruction) // |
580 | INST3(vfnmsub231sd, "fmnsub231sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) // |
581 | INST3(vfnmsub132ss, "fmnsub132ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values |
582 | INST3(vfnmsub213ss, "fmnsub213ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_Flags_IsDstDstSrcAVXInstruction) // |
583 | INST3(vfnmsub231ss, "fmnsub231ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) // |
584 | INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
585 | |
586 | // BMI1 |
587 | INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
588 | INST3(andn, "andn" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT |
589 | INST3(blsi, "blsi" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit |
590 | INST3(blsmsk, "blsmsk" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit |
591 | INST3(blsr, "blsr" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit |
592 | INST3(bextr, "bextr" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract |
593 | |
594 | // BMI2 |
595 | INST3(pdep, "pdep" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit |
596 | INST3(pext, "pext" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract |
597 | INST3(bzhi, "bzhi" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Zero High Bits Starting with Specified Bit Position |
598 | INST3(mulx, "mulx" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF6), INS_Flags_IsDstDstSrcAVXInstruction) // Unsigned Multiply Without Affecting Flags |
599 | |
600 | INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
601 | |
602 | INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
603 | |
604 | // Scalar instructions in SSE4.2 |
605 | INST3(crc32, "crc32" , IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF2, 0x0F, 0x38, 0xF0), INS_FLAGS_None) |
606 | |
607 | // BMI1 |
608 | INST3(tzcnt, "tzcnt" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBC), INS_FLAGS_None) // Count the Number of Trailing Zero Bits |
609 | |
610 | // LZCNT |
611 | INST3(lzcnt, "lzcnt" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), INS_FLAGS_None) |
612 | |
613 | // POPCNT |
614 | INST3(popcnt, "popcnt" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), INS_FLAGS_None) |
615 | |
616 | // id nm um mr mi flags |
617 | INST2(ret, "ret" , IUM_RD, 0x0000C3, 0x0000C2, INS_FLAGS_None) |
618 | INST2(loop, "loop" , IUM_RD, BAD_CODE, 0x0000E2, INS_FLAGS_None) |
619 | INST2(call, "call" , IUM_RD, 0x0010FF, 0x0000E8, INS_FLAGS_WritesFlags) |
620 | |
621 | INST2(rol, "rol" , IUM_RW, 0x0000D2, BAD_CODE, INS_FLAGS_WritesFlags) |
622 | INST2(rol_1, "rol" , IUM_RW, 0x0000D0, 0x0000D0, INS_FLAGS_WritesFlags) |
623 | INST2(rol_N, "rol" , IUM_RW, 0x0000C0, 0x0000C0, INS_FLAGS_WritesFlags) |
624 | INST2(ror, "ror" , IUM_RW, 0x0008D2, BAD_CODE, INS_FLAGS_WritesFlags) |
625 | INST2(ror_1, "ror" , IUM_RW, 0x0008D0, 0x0008D0, INS_FLAGS_WritesFlags) |
626 | INST2(ror_N, "ror" , IUM_RW, 0x0008C0, 0x0008C0, INS_FLAGS_WritesFlags) |
627 | |
628 | INST2(rcl, "rcl" , IUM_RW, 0x0010D2, BAD_CODE, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
629 | INST2(rcl_1, "rcl" , IUM_RW, 0x0010D0, 0x0010D0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
630 | INST2(rcl_N, "rcl" , IUM_RW, 0x0010C0, 0x0010C0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
631 | INST2(rcr, "rcr" , IUM_RW, 0x0018D2, BAD_CODE, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
632 | INST2(rcr_1, "rcr" , IUM_RW, 0x0018D0, 0x0018D0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
633 | INST2(rcr_N, "rcr" , IUM_RW, 0x0018C0, 0x0018C0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
634 | INST2(shl, "shl" , IUM_RW, 0x0020D2, BAD_CODE, INS_FLAGS_WritesFlags) |
635 | INST2(shl_1, "shl" , IUM_RW, 0x0020D0, 0x0020D0, INS_FLAGS_WritesFlags) |
636 | INST2(shl_N, "shl" , IUM_RW, 0x0020C0, 0x0020C0, INS_FLAGS_WritesFlags) |
637 | INST2(shr, "shr" , IUM_RW, 0x0028D2, BAD_CODE, INS_FLAGS_WritesFlags) |
638 | INST2(shr_1, "shr" , IUM_RW, 0x0028D0, 0x0028D0, INS_FLAGS_WritesFlags) |
639 | INST2(shr_N, "shr" , IUM_RW, 0x0028C0, 0x0028C0, INS_FLAGS_WritesFlags) |
640 | INST2(sar, "sar" , IUM_RW, 0x0038D2, BAD_CODE, INS_FLAGS_WritesFlags) |
641 | INST2(sar_1, "sar" , IUM_RW, 0x0038D0, 0x0038D0, INS_FLAGS_WritesFlags) |
642 | INST2(sar_N, "sar" , IUM_RW, 0x0038C0, 0x0038C0, INS_FLAGS_WritesFlags) |
643 | |
644 | |
645 | // id nm um mr flags |
646 | INST1(r_movsb, "rep movsb" , IUM_RD, 0x00A4F3, INS_FLAGS_None) |
647 | INST1(r_movsd, "rep movsd" , IUM_RD, 0x00A5F3, INS_FLAGS_None) |
648 | #if defined(_TARGET_AMD64_) |
649 | INST1(r_movsq, "rep movsq" , IUM_RD, 0xF3A548, INS_FLAGS_None) |
650 | #endif // defined(_TARGET_AMD64_) |
651 | INST1(movsb, "movsb" , IUM_RD, 0x0000A4, INS_FLAGS_None) |
652 | INST1(movsd, "movsd" , IUM_RD, 0x0000A5, INS_FLAGS_None) |
653 | #if defined(_TARGET_AMD64_) |
654 | INST1(movsq, "movsq" , IUM_RD, 0x00A548, INS_FLAGS_None) |
655 | #endif // defined(_TARGET_AMD64_) |
656 | |
657 | INST1(r_stosb, "rep stosb" , IUM_RD, 0x00AAF3, INS_FLAGS_None) |
658 | INST1(r_stosd, "rep stosd" , IUM_RD, 0x00ABF3, INS_FLAGS_None) |
659 | #if defined(_TARGET_AMD64_) |
660 | INST1(r_stosq, "rep stosq" , IUM_RD, 0xF3AB48, INS_FLAGS_None) |
661 | #endif // defined(_TARGET_AMD64_) |
662 | INST1(stosb, "stosb" , IUM_RD, 0x0000AA, INS_FLAGS_None) |
663 | INST1(stosd, "stosd" , IUM_RD, 0x0000AB, INS_FLAGS_None) |
664 | #if defined(_TARGET_AMD64_) |
665 | INST1(stosq, "stosq" , IUM_RD, 0x00AB48, INS_FLAGS_None) |
666 | #endif // defined(_TARGET_AMD64_) |
667 | |
668 | INST1(int3, "int3" , IUM_RD, 0x0000CC, INS_FLAGS_None) |
669 | INST1(nop, "nop" , IUM_RD, 0x000090, INS_FLAGS_None) |
670 | INST1(lock, "lock" , IUM_RD, 0x0000F0, INS_FLAGS_None) |
671 | INST1(leave, "leave" , IUM_RD, 0x0000C9, INS_FLAGS_None) |
672 | |
673 | |
674 | INST1(neg, "neg" , IUM_RW, 0x0018F6, INS_FLAGS_WritesFlags) |
675 | INST1(not, "not" , IUM_RW, 0x0010F6, INS_FLAGS_WritesFlags) |
676 | |
677 | INST1(cdq, "cdq" , IUM_RD, 0x000099, INS_FLAGS_WritesFlags) |
678 | INST1(idiv, "idiv" , IUM_RD, 0x0038F6, INS_FLAGS_WritesFlags) |
679 | INST1(imulEAX, "imul" , IUM_RD, 0x0028F6, INS_FLAGS_WritesFlags) // edx:eax = eax*op1 |
680 | INST1(div, "div" , IUM_RD, 0x0030F6, INS_FLAGS_WritesFlags) |
681 | INST1(mulEAX, "mul" , IUM_RD, 0x0020F6, INS_FLAGS_WritesFlags) |
682 | |
683 | INST1(sahf, "sahf" , IUM_RD, 0x00009E, INS_FLAGS_WritesFlags) |
684 | |
685 | INST1(xadd, "xadd" , IUM_RW, 0x0F00C0, INS_FLAGS_WritesFlags) |
686 | INST1(cmpxchg, "cmpxchg" , IUM_RW, 0x0F00B0, INS_FLAGS_WritesFlags) |
687 | |
688 | INST1(shld, "shld" , IUM_RW, 0x0F00A4, INS_FLAGS_WritesFlags) |
689 | INST1(shrd, "shrd" , IUM_RW, 0x0F00AC, INS_FLAGS_WritesFlags) |
690 | |
691 | // For RyuJIT/x86, we follow the x86 calling convention that requires |
692 | // us to return floating point value on the x87 FP stack, so we need |
693 | // these instructions regardless of whether we're using full stack fp. |
694 | #ifdef _TARGET_X86_ |
695 | INST1(fld, "fld" , IUM_WR, 0x0000D9, INS_FLAGS_x87Instr) |
696 | INST1(fstp, "fstp" , IUM_WR, 0x0018D9, INS_FLAGS_x87Instr) |
697 | #endif // _TARGET_X86 |
698 | |
699 | INST1(seto, "seto" , IUM_WR, 0x0F0090, INS_FLAGS_ReadsFlags) |
700 | INST1(setno, "setno" , IUM_WR, 0x0F0091, INS_FLAGS_ReadsFlags) |
701 | INST1(setb, "setb" , IUM_WR, 0x0F0092, INS_FLAGS_ReadsFlags) |
702 | INST1(setae, "setae" , IUM_WR, 0x0F0093, INS_FLAGS_ReadsFlags) |
703 | INST1(sete, "sete" , IUM_WR, 0x0F0094, INS_FLAGS_ReadsFlags) |
704 | INST1(setne, "setne" , IUM_WR, 0x0F0095, INS_FLAGS_ReadsFlags) |
705 | INST1(setbe, "setbe" , IUM_WR, 0x0F0096, INS_FLAGS_ReadsFlags) |
706 | INST1(seta, "seta" , IUM_WR, 0x0F0097, INS_FLAGS_ReadsFlags) |
707 | INST1(sets, "sets" , IUM_WR, 0x0F0098, INS_FLAGS_ReadsFlags) |
708 | INST1(setns, "setns" , IUM_WR, 0x0F0099, INS_FLAGS_ReadsFlags) |
709 | INST1(setpe, "setpe" , IUM_WR, 0x0F009A, INS_FLAGS_ReadsFlags) |
710 | INST1(setpo, "setpo" , IUM_WR, 0x0F009B, INS_FLAGS_ReadsFlags) |
711 | INST1(setl, "setl" , IUM_WR, 0x0F009C, INS_FLAGS_ReadsFlags) |
712 | INST1(setge, "setge" , IUM_WR, 0x0F009D, INS_FLAGS_ReadsFlags) |
713 | INST1(setle, "setle" , IUM_WR, 0x0F009E, INS_FLAGS_ReadsFlags) |
714 | INST1(setg, "setg" , IUM_WR, 0x0F009F, INS_FLAGS_ReadsFlags) |
715 | |
716 | #ifdef _TARGET_AMD64_ |
717 | // A jump with rex prefix. This is used for register indirect |
718 | // tail calls. |
719 | INST1(rex_jmp, "rex.jmp" , IUM_RD, 0x0020FE, INS_FLAGS_None) |
720 | #endif |
721 | |
722 | INST1(i_jmp, "jmp" , IUM_RD, 0x0020FE, INS_FLAGS_None) |
723 | |
724 | INST0(jmp, "jmp" , IUM_RD, 0x0000EB, INS_FLAGS_None) |
725 | INST0(jo, "jo" , IUM_RD, 0x000070, INS_FLAGS_ReadsFlags) |
726 | INST0(jno, "jno" , IUM_RD, 0x000071, INS_FLAGS_ReadsFlags) |
727 | INST0(jb, "jb" , IUM_RD, 0x000072, INS_FLAGS_ReadsFlags) |
728 | INST0(jae, "jae" , IUM_RD, 0x000073, INS_FLAGS_ReadsFlags) |
729 | INST0(je, "je" , IUM_RD, 0x000074, INS_FLAGS_ReadsFlags) |
730 | INST0(jne, "jne" , IUM_RD, 0x000075, INS_FLAGS_ReadsFlags) |
731 | INST0(jbe, "jbe" , IUM_RD, 0x000076, INS_FLAGS_ReadsFlags) |
732 | INST0(ja, "ja" , IUM_RD, 0x000077, INS_FLAGS_ReadsFlags) |
733 | INST0(js, "js" , IUM_RD, 0x000078, INS_FLAGS_ReadsFlags) |
734 | INST0(jns, "jns" , IUM_RD, 0x000079, INS_FLAGS_ReadsFlags) |
735 | INST0(jpe, "jpe" , IUM_RD, 0x00007A, INS_FLAGS_ReadsFlags) |
736 | INST0(jpo, "jpo" , IUM_RD, 0x00007B, INS_FLAGS_ReadsFlags) |
737 | INST0(jl, "jl" , IUM_RD, 0x00007C, INS_FLAGS_ReadsFlags) |
738 | INST0(jge, "jge" , IUM_RD, 0x00007D, INS_FLAGS_ReadsFlags) |
739 | INST0(jle, "jle" , IUM_RD, 0x00007E, INS_FLAGS_ReadsFlags) |
740 | INST0(jg, "jg" , IUM_RD, 0x00007F, INS_FLAGS_ReadsFlags) |
741 | |
742 | INST0(l_jmp, "jmp" , IUM_RD, 0x0000E9, INS_FLAGS_None) |
743 | INST0(l_jo, "jo" , IUM_RD, 0x00800F, INS_FLAGS_ReadsFlags) |
744 | INST0(l_jno, "jno" , IUM_RD, 0x00810F, INS_FLAGS_ReadsFlags) |
745 | INST0(l_jb, "jb" , IUM_RD, 0x00820F, INS_FLAGS_ReadsFlags) |
746 | INST0(l_jae, "jae" , IUM_RD, 0x00830F, INS_FLAGS_ReadsFlags) |
747 | INST0(l_je, "je" , IUM_RD, 0x00840F, INS_FLAGS_ReadsFlags) |
748 | INST0(l_jne, "jne" , IUM_RD, 0x00850F, INS_FLAGS_ReadsFlags) |
749 | INST0(l_jbe, "jbe" , IUM_RD, 0x00860F, INS_FLAGS_ReadsFlags) |
750 | INST0(l_ja, "ja" , IUM_RD, 0x00870F, INS_FLAGS_ReadsFlags) |
751 | INST0(l_js, "js" , IUM_RD, 0x00880F, INS_FLAGS_ReadsFlags) |
752 | INST0(l_jns, "jns" , IUM_RD, 0x00890F, INS_FLAGS_ReadsFlags) |
753 | INST0(l_jpe, "jpe" , IUM_RD, 0x008A0F, INS_FLAGS_ReadsFlags) |
754 | INST0(l_jpo, "jpo" , IUM_RD, 0x008B0F, INS_FLAGS_ReadsFlags) |
755 | INST0(l_jl, "jl" , IUM_RD, 0x008C0F, INS_FLAGS_ReadsFlags) |
756 | INST0(l_jge, "jge" , IUM_RD, 0x008D0F, INS_FLAGS_ReadsFlags) |
757 | INST0(l_jle, "jle" , IUM_RD, 0x008E0F, INS_FLAGS_ReadsFlags) |
758 | INST0(l_jg, "jg" , IUM_RD, 0x008F0F, INS_FLAGS_ReadsFlags) |
759 | |
760 | INST0(align, "align" , IUM_RD, BAD_CODE, INS_FLAGS_None) |
761 | |
762 | /*****************************************************************************/ |
763 | #undef INST0 |
764 | #undef INST1 |
765 | #undef INST2 |
766 | #undef INST3 |
767 | #undef INST4 |
768 | #undef INST5 |
769 | /*****************************************************************************/ |
770 | |
771 | // clang-format on |
772 | |