1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5//
6// This file was previously known as instrs.h
7//
8/*****************************************************************************
9 * x86 instructions for the JIT compiler
10 *
11 * id -- the enum name for the instruction
12 * nm -- textual name (for assembly dipslay)
13 * um -- update mode, see IUM_xx enum (rd, wr, or rw)
14 * mr -- base encoding for R/M[reg] addressing mode
15 * mi -- base encoding for R/M,icon addressing mode
16 * rm -- base encoding for reg,R/M addressing mode
17 * a4 -- base encoding for eax,i32 addressing mode
18 * rr -- base encoding for register addressing mode
19 * flags -- flags, see INS_FLAGS_* enum
20 *
21******************************************************************************/
22
23// clang-format off
24#if !defined(_TARGET_XARCH_)
25 #error Unexpected target type
26#endif
27
28#ifndef INST1
29#error At least INST1 must be defined before including this file.
30#endif
31/*****************************************************************************/
32#ifndef INST0
33#define INST0(id, nm, um, mr, flags)
34#endif
35#ifndef INST2
36#define INST2(id, nm, um, mr, mi, flags)
37#endif
38#ifndef INST3
39#define INST3(id, nm, um, mr, mi, rm, flags)
40#endif
41#ifndef INST4
42#define INST4(id, nm, um, mr, mi, rm, a4, flags)
43#endif
44#ifndef INST5
45#define INST5(id, nm, um, mr, mi, rm, a4, rr, flags)
46#endif
47
48/*****************************************************************************/
49/* The following is x86-specific */
50/*****************************************************************************/
51
52// id nm um mr mi rm a4 rr flags
53INST5(invalid, "INVALID", IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
54
55INST5(push, "push", IUM_RD, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050, INS_FLAGS_None)
56INST5(pop, "pop", IUM_WR, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058, INS_FLAGS_None)
57// Does not affect the stack tracking in the emitter
58INST5(push_hide, "push", IUM_RD, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050, INS_FLAGS_None)
59INST5(pop_hide, "pop", IUM_WR, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058, INS_FLAGS_None)
60
61INST5(inc, "inc", IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000040, INS_FLAGS_WritesFlags)
62INST5(inc_l, "inc", IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE, INS_FLAGS_WritesFlags)
63INST5(dec, "dec", IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048, INS_FLAGS_WritesFlags)
64INST5(dec_l, "dec", IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE, INS_FLAGS_WritesFlags)
65
66// Multi-byte opcodes without modrm are represented in mixed endian fashion.
67// See comment around quarter way through this file for more information.
68INST5(bswap, "bswap", IUM_RW, 0x0F00C8, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C80F, INS_FLAGS_None)
69
70// id nm um mr mi rm a4 flags
71INST4(add, "add", IUM_RW, 0x000000, 0x000080, 0x000002, 0x000004, INS_FLAGS_WritesFlags)
72INST4(or, "or", IUM_RW, 0x000008, 0x000880, 0x00000A, 0x00000C, INS_FLAGS_WritesFlags)
73INST4(adc, "adc", IUM_RW, 0x000010, 0x001080, 0x000012, 0x000014, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
74INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
75INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, INS_FLAGS_WritesFlags)
76INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, INS_FLAGS_WritesFlags)
77INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, INS_FLAGS_WritesFlags)
78INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, INS_FLAGS_WritesFlags)
79INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, INS_FLAGS_WritesFlags)
80INST4(mov, "mov", IUM_WR, 0x000088, 0x0000C6, 0x00008A, 0x0000B0, INS_FLAGS_None)
81
82INST4(lea, "lea", IUM_WR, BAD_CODE, BAD_CODE, 0x00008D, BAD_CODE, INS_FLAGS_None)
83
84// id nm um mr mi rm flags
85
86// Note that emitter has only partial support for BT. It can only emit the reg,reg form
87// and the registers need to be reversed to get the correct encoding.
88INST3(bt, "bt", IUM_RD, 0x0F00A3, BAD_CODE, 0x0F00A3, INS_FLAGS_WritesFlags)
89
90INST3(movsx, "movsx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BE, INS_FLAGS_None)
91#ifdef _TARGET_AMD64_
92INST3(movsxd, "movsxd", IUM_WR, BAD_CODE, BAD_CODE, 0x4800000063, INS_FLAGS_None)
93#endif
94INST3(movzx, "movzx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00B6, INS_FLAGS_None)
95
96INST3(cmovo, "cmovo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, INS_FLAGS_ReadsFlags)
97INST3(cmovno, "cmovno", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, INS_FLAGS_ReadsFlags)
98INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, INS_FLAGS_ReadsFlags)
99INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, INS_FLAGS_ReadsFlags)
100INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, INS_FLAGS_ReadsFlags)
101INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, INS_FLAGS_ReadsFlags)
102INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, INS_FLAGS_ReadsFlags)
103INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, INS_FLAGS_ReadsFlags)
104INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, INS_FLAGS_ReadsFlags)
105INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, INS_FLAGS_ReadsFlags)
106INST3(cmovpe, "cmovpe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, INS_FLAGS_ReadsFlags)
107INST3(cmovpo, "cmovpo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, INS_FLAGS_ReadsFlags)
108INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, INS_FLAGS_ReadsFlags)
109INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, INS_FLAGS_ReadsFlags)
110INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, INS_FLAGS_ReadsFlags)
111INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_FLAGS_ReadsFlags)
112
113INST3(xchg, "xchg", IUM_RW, 0x000086, BAD_CODE, 0x000086, INS_FLAGS_None)
114INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, INS_FLAGS_WritesFlags) // op1 *= op2
115
116// id nm um mr mi rm flags
117
118// Instead of encoding these as 3-operand instructions, we encode them
119// as 2-operand instructions with the target register being implicit
120// implicit_reg = op1*op2_icon
121#define INSTMUL INST3
122INSTMUL(imul_AX, "imul", IUM_RD, BAD_CODE, 0x000068, BAD_CODE, INS_FLAGS_WritesFlags)
123INSTMUL(imul_CX, "imul", IUM_RD, BAD_CODE, 0x000868, BAD_CODE, INS_FLAGS_WritesFlags)
124INSTMUL(imul_DX, "imul", IUM_RD, BAD_CODE, 0x001068, BAD_CODE, INS_FLAGS_WritesFlags)
125INSTMUL(imul_BX, "imul", IUM_RD, BAD_CODE, 0x001868, BAD_CODE, INS_FLAGS_WritesFlags)
126INSTMUL(imul_SP, "imul", IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_WritesFlags)
127INSTMUL(imul_BP, "imul", IUM_RD, BAD_CODE, 0x002868, BAD_CODE, INS_FLAGS_WritesFlags)
128INSTMUL(imul_SI, "imul", IUM_RD, BAD_CODE, 0x003068, BAD_CODE, INS_FLAGS_WritesFlags)
129INSTMUL(imul_DI, "imul", IUM_RD, BAD_CODE, 0x003868, BAD_CODE, INS_FLAGS_WritesFlags)
130
131#ifdef _TARGET_AMD64_
132
133INSTMUL(imul_08, "imul", IUM_RD, BAD_CODE, 0x4400000068, BAD_CODE, INS_FLAGS_WritesFlags)
134INSTMUL(imul_09, "imul", IUM_RD, BAD_CODE, 0x4400000868, BAD_CODE, INS_FLAGS_WritesFlags)
135INSTMUL(imul_10, "imul", IUM_RD, BAD_CODE, 0x4400001068, BAD_CODE, INS_FLAGS_WritesFlags)
136INSTMUL(imul_11, "imul", IUM_RD, BAD_CODE, 0x4400001868, BAD_CODE, INS_FLAGS_WritesFlags)
137INSTMUL(imul_12, "imul", IUM_RD, BAD_CODE, 0x4400002068, BAD_CODE, INS_FLAGS_WritesFlags)
138INSTMUL(imul_13, "imul", IUM_RD, BAD_CODE, 0x4400002868, BAD_CODE, INS_FLAGS_WritesFlags)
139INSTMUL(imul_14, "imul", IUM_RD, BAD_CODE, 0x4400003068, BAD_CODE, INS_FLAGS_WritesFlags)
140INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, BAD_CODE, INS_FLAGS_WritesFlags)
141
142#endif // _TARGET_AMD64_
143
144// the hex codes in this file represent the instruction encoding as follows:
145// 0x0000ff00 - modrm byte position
146// 0x000000ff - last byte of opcode (before modrm)
147// 0x00ff0000 - first byte of opcode
148// 0xff000000 - middle byte of opcode, if needed (after first, before last)
149//
150// So a 1-byte opcode is: and with modrm:
151// 0x00000011 0x0000RM11
152//
153// So a 2-byte opcode is: and with modrm:
154// 0x00002211 0x0011RM22
155//
156// So a 3-byte opcode is: and with modrm:
157// 0x00113322 0x2211RM33
158//
159// So a 4-byte opcode would be something like this:
160// 0x22114433
161
162#define PACK3(byte1,byte2,byte3) ((byte1 << 16) | (byte2 << 24) | byte3)
163#define PACK2(byte1,byte2) ((byte1 << 16) | byte2)
164#define SSEFLT(c) PACK3(0xf3, 0x0f, c)
165#define SSEDBL(c) PACK3(0xf2, 0x0f, c)
166#define PCKDBL(c) PACK3(0x66, 0x0f, c)
167#define PCKFLT(c) PACK2(0x0f,c)
168
169// These macros encode extra byte that is implicit in the macro.
170#define PACK4(byte1,byte2,byte3,byte4) ((byte1 << 16) | (byte2 << 24) | byte3 | (byte4 << 8))
171#define SSE38(c) PACK4(0x66, 0x0f, 0x38, c)
172#define SSE3A(c) PACK4(0x66, 0x0f, 0x3A, c)
173
174// VEX* encodes the implied leading opcode bytes in c1:
175// 1: implied 0f, 2: implied 0f 38, 3: implied 0f 3a
176#define VEX2INT(c1,c2) PACK3(c1, 0xc5, c2)
177#define VEX3INT(c1,c2) PACK4(c1, 0xc5, 0x02, c2)
178#define VEX3FLT(c1,c2) PACK4(c1, 0xc5, 0x02, c2)
179
180INST3(FIRST_SSE_INSTRUCTION, "FIRST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
181// These are the SSE instructions used on x86
182INST3(mov_i2xmm, "movd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg
183INST3(mov_xmm2i, "movd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7E), INS_FLAGS_None) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg
184INST3(pmovmskb, "pmovmskb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), INS_FLAGS_None) // Move the MSB bits of all bytes in a xmm reg to an int reg
185INST3(movmskpd, "movmskpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), INS_FLAGS_None) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.
186INST3(movd, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None)
187INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_FLAGS_None)
188INST3(movsdsse2, "movsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), INS_Flags_IsDstSrcSrcAVXInstruction)
189
190INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_Flags_IsDstDstSrcAVXInstruction)
191
192INST3(xorps, "xorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed singles
193
194INST3(cvttsd2si, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_FLAGS_None) // cvt with trunc scalar double to signed DWORDs
195
196INST3(movntdq, "movntdq", IUM_WR, PCKDBL(0xE7), BAD_CODE, BAD_CODE, INS_FLAGS_None)
197INST3(movnti, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, INS_FLAGS_None)
198INST3(movntpd, "movntpd", IUM_WR, PCKDBL(0x2B), BAD_CODE, BAD_CODE, INS_FLAGS_None)
199INST3(movntps, "movntps", IUM_WR, PCKFLT(0x2B), BAD_CODE, BAD_CODE, INS_FLAGS_None)
200INST3(movdqu, "movdqu", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_FLAGS_None)
201INST3(movdqa, "movdqa", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_FLAGS_None)
202INST3(movlpd, "movlpd", IUM_WR, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12), INS_Flags_IsDstSrcSrcAVXInstruction)
203INST3(movlps, "movlps", IUM_WR, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12), INS_Flags_IsDstSrcSrcAVXInstruction)
204INST3(movhpd, "movhpd", IUM_WR, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16), INS_Flags_IsDstSrcSrcAVXInstruction)
205INST3(movhps, "movhps", IUM_WR, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16), INS_Flags_IsDstSrcSrcAVXInstruction)
206INST3(movss, "movss", IUM_WR, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10), INS_Flags_IsDstSrcSrcAVXInstruction)
207INST3(movapd, "movapd", IUM_WR, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28), INS_FLAGS_None)
208INST3(movaps, "movaps", IUM_WR, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28), INS_FLAGS_None)
209INST3(movupd, "movupd", IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), INS_FLAGS_None)
210INST3(movups, "movups", IUM_WR, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10), INS_FLAGS_None)
211INST3(movhlps, "movhlps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), INS_Flags_IsDstDstSrcAVXInstruction)
212INST3(movlhps, "movlhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x16), INS_Flags_IsDstDstSrcAVXInstruction)
213INST3(movmskps, "movmskps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x50), INS_FLAGS_None)
214INST3(unpckhps, "unpckhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), INS_Flags_IsDstDstSrcAVXInstruction)
215INST3(unpcklps, "unpcklps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), INS_Flags_IsDstDstSrcAVXInstruction)
216INST3(maskmovdqu, "maskmovdqu", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF7), INS_FLAGS_None)
217
218INST3(shufps, "shufps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), INS_Flags_IsDstDstSrcAVXInstruction)
219INST3(shufpd, "shufpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), INS_Flags_IsDstDstSrcAVXInstruction)
220
221INST3(punpckhdq, "punpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), INS_Flags_IsDstDstSrcAVXInstruction)
222
223INST3(lfence, "lfence", IUM_RD, 0x000FE8AE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
224INST3(mfence, "mfence", IUM_RD, 0x000FF0AE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
225INST3(prefetchnta, "prefetchnta", IUM_RD, 0x000F0018, BAD_CODE, BAD_CODE, INS_FLAGS_None)
226INST3(prefetcht0, "prefetcht0", IUM_RD, 0x000F0818, BAD_CODE, BAD_CODE, INS_FLAGS_None)
227INST3(prefetcht1, "prefetcht1", IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, INS_FLAGS_None)
228INST3(prefetcht2, "prefetcht2", IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, INS_FLAGS_None)
229INST3(sfence, "sfence", IUM_RD, 0x000FF8AE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
230
231// SSE 2 arith
232INST3(addps, "addps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed singles
233INST3(addss, "addss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar singles
234INST3(addpd, "addpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed doubles
235INST3(addsd, "addsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar doubles
236INST3(mulps, "mulps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed singles
237INST3(mulss, "mulss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar single
238INST3(mulpd, "mulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed doubles
239INST3(mulsd, "mulsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar doubles
240INST3(subps, "subps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed singles
241INST3(subss, "subss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar singles
242INST3(subpd, "subpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed doubles
243INST3(subsd, "subsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar doubles
244INST3(minps, "minps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed singles
245INST3(minss, "minss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar single
246INST3(minpd, "minpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed doubles
247INST3(minsd, "minsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar double
248INST3(divps, "divps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed singles
249INST3(divss, "divss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar singles
250INST3(divpd, "divpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed doubles
251INST3(divsd, "divsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar doubles
252INST3(maxps, "maxps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed singles
253INST3(maxss, "maxss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar single
254INST3(maxpd, "maxpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed doubles
255INST3(maxsd, "maxsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar double
256INST3(xorpd, "xorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed doubles
257INST3(andps, "andps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), INS_Flags_IsDstDstSrcAVXInstruction) // AND packed singles
258INST3(andpd, "andpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), INS_Flags_IsDstDstSrcAVXInstruction) // AND packed doubles
259INST3(sqrtps, "sqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), INS_FLAGS_None) // Sqrt of packed singles
260INST3(sqrtss, "sqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x51), INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar single
261INST3(sqrtpd, "sqrtpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), INS_FLAGS_None) // Sqrt of packed doubles
262INST3(sqrtsd, "sqrtsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x51), INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar double
263INST3(andnps, "andnps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed singles
264INST3(andnpd, "andnpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed doubles
265INST3(orps, "orps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), INS_Flags_IsDstDstSrcAVXInstruction) // Or packed singles
266INST3(orpd, "orpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), INS_Flags_IsDstDstSrcAVXInstruction) // Or packed doubles
267INST3(haddpd, "haddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed doubles
268INST3(haddps, "haddps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed floats
269INST3(hsubpd, "hsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed doubles
270INST3(hsubps, "hsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed floats
271INST3(addsubps, "addsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed singles
272INST3(addsubpd, "addsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed doubles
273
274// SSE 2 approx arith
275INST3(rcpps, "rcpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), INS_FLAGS_None) // Reciprocal of packed singles
276INST3(rcpss, "rcpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal of scalar single
277INST3(rsqrtps, "rsqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), INS_FLAGS_None) // Reciprocal Sqrt of packed singles
278INST3(rsqrtss, "rsqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x52), INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal Sqrt of scalar single
279
280// SSE2 conversions
281INST3(cvtpi2ps, "cvtpi2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2A), INS_FLAGS_None) // cvt packed DWORDs to singles
282INST3(cvtsi2ss, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar single
283INST3(cvtpi2pd, "cvtpi2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2A), INS_FLAGS_None) // cvt packed DWORDs to doubles
284INST3(cvtsi2sd, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar double
285INST3(cvttps2pi, "cvttps2pi", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2C), INS_FLAGS_None) // cvt with trunc packed singles to DWORDs
286INST3(cvttss2si, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_FLAGS_None) // cvt with trunc scalar single to DWORD
287INST3(cvttpd2pi, "cvttpd2pi", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2C), INS_FLAGS_None) // cvt with trunc packed doubles to DWORDs
288INST3(cvtps2pi, "cvtps2pi", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2D), INS_FLAGS_None) // cvt packed singles to DWORDs
289INST3(cvtss2si, "cvtss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), INS_FLAGS_None) // cvt scalar single to DWORD
290INST3(cvtpd2pi, "cvtpd2pi", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2D), INS_FLAGS_None) // cvt packed doubles to DWORDs
291INST3(cvtsd2si, "cvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), INS_FLAGS_None) // cvt scalar double to DWORD
292INST3(cvtps2pd, "cvtps2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), INS_FLAGS_None) // cvt packed singles to doubles
293INST3(cvtpd2ps, "cvtpd2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), INS_FLAGS_None) // cvt packed doubles to singles
294INST3(cvtss2sd, "cvtss2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar single to scalar doubles
295INST3(cvtsd2ss, "cvtsd2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar double to scalar singles
296INST3(cvtdq2ps, "cvtdq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_FLAGS_None) // cvt packed DWORDs to singles
297INST3(cvtps2dq, "cvtps2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), INS_FLAGS_None) // cvt packed singles to DWORDs
298INST3(cvttps2dq, "cvttps2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), INS_FLAGS_None) // cvt with trunc packed singles to DWORDs
299INST3(cvtpd2dq, "cvtpd2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xE6), INS_FLAGS_None) // cvt packed doubles to DWORDs
300INST3(cvttpd2dq, "cvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), INS_FLAGS_None) // cvt with trunc packed doubles to DWORDs
301INST3(cvtdq2pd, "cvtdq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_FLAGS_None) // cvt packed DWORDs to doubles
302
303// SSE2 comparison instructions
304INST3(comiss, "comiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), INS_FLAGS_None) // ordered compare singles
305INST3(comisd, "comisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_FLAGS_None) // ordered compare doubles
306INST3(ucomiss, "ucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_FLAGS_None) // unordered compare singles
307INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_FLAGS_None) // unordered compare doubles
308
309// SSE2 packed single/double comparison operations.
310// Note that these instructions not only compare but also overwrite the first source.
311INST3(cmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles
312INST3(cmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles
313INST3(cmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles
314INST3(cmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles
315
316//SSE2 packed integer operations
317INST3(paddb, "paddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed byte integers
318INST3(paddw, "paddw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed word (16-bit) integers
319INST3(paddd, "paddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed double-word (32-bit) integers
320INST3(paddq, "paddq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD4), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed quad-word (64-bit) integers
321INST3(paddsb, "paddsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed byte integers and saturate the results
322INST3(paddsw, "paddsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xED), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed word integers and saturate the results
323INST3(paddusb, "paddusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned byte integers and saturate the results
324INST3(paddusw, "paddusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned word integers and saturate the results
325INST3(pavgb, "pavgb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed byte integers
326INST3(pavgw, "pavgw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed word integers
327INST3(psubb, "psubb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers
328INST3(psubw, "psubw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers
329INST3(psubd, "psubd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed double-word (32-bit) integers
330INST3(psubq, "psubq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFB), INS_Flags_IsDstDstSrcAVXInstruction) // subtract packed quad-word (64-bit) integers
331INST3(pmaddwd, "pmaddwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst
332INST3(pmulhw, "pmulhw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit signed integers
333INST3(pmulhuw, "pmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers
334INST3(pmuludq, "pmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit unsigned integers and store 64-bit result
335INST3(pmullw, "pmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result
336INST3(pand, "pand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs
337INST3(pandn, "pandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs
338INST3(por, "por", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs
339INST3(pxor, "pxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs
340INST3(psadbw, "psadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers
341INST3(psubsb, "psubsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation
342INST3(psubusb, "psubusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation
343INST3(psubsw, "psubsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation
344INST3(psubusw, "psubusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation
345
346// Note that the shift immediates share the same encoding between left and right-shift, and are distinguished by the Reg/Opcode,
347// which is handled in emitxarch.cpp.
348INST3(psrldq, "psrldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_Flags_IsDstDstSrcAVXInstruction) // Shift right logical of xmm reg by given number of bytes
349INST3(pslldq, "pslldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_Flags_IsDstDstSrcAVXInstruction) // Shift left logical of xmm reg by given number of bytes
350INST3(psllw, "psllw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 16-bit integers
351INST3(pslld, "pslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 32-bit integers
352INST3(psllq, "psllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 64-bit integers
353INST3(psrlw, "psrlw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 16-bit integers
354INST3(psrld, "psrld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 32-bit integers
355INST3(psrlq, "psrlq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 64-bit integers
356INST3(psraw, "psraw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 16-bit integers
357INST3(psrad, "psrad", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 32-bit integers
358
359INST3(pmaxub, "pmaxub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum unsigned bytes
360INST3(pminub, "pminub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum unsigned bytes
361INST3(pmaxsw, "pmaxsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed words
362INST3(pminsw, "pminsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed words
363INST3(pcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality
364INST3(pcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than
365INST3(pcmpeqw, "pcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality
366INST3(pcmpgtw, "pcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than
367INST3(pcmpeqb, "pcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality
368INST3(pcmpgtb, "pcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than
369
370INST3(pshufd, "pshufd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), INS_FLAGS_None) // Packed shuffle of 32-bit integers
371INST3(pshufhw, "pshufhw", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), INS_FLAGS_None) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
372INST3(pshuflw, "pshuflw", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), INS_FLAGS_None) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
373INST3(pextrw, "pextrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC5), INS_FLAGS_None) // Extract 16-bit value into a r32 with zero extended to 32-bits
374INST3(pinsrw, "pinsrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), INS_Flags_IsDstDstSrcAVXInstruction) // Insert word at index
375
376INST3(punpckhbw, "punpckhbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi)
377INST3(punpcklbw, "punpcklbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (lo)
378INST3(punpckhqdq, "punpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (hi)
379INST3(punpcklqdq, "punpcklqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (lo)
380INST3(punpckhwd, "punpckhwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (hi)
381INST3(punpcklwd, "punpcklwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (lo)
382INST3(unpckhpd, "unpckhpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi)
383INST3(unpcklpd, "unpcklpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi)
384
385INST3(packssdw, "packssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to short with saturation
386INST3(packsswb, "packsswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation
387INST3(packuswb, "packuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation
388
389// id nm um mr mi rm flags
390INST3(dpps, "dpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs
391INST3(dppd, "dppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs
392INST3(insertps, "insertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value
393INST3(pcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality
394INST3(pcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality
395INST3(pmulld, "pmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
396INST3(ptest, "ptest", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x17), INS_FLAGS_None) // Packed logical compare
397INST3(phaddd, "phaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add
398INST3(pabsb, "pabsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), INS_FLAGS_None) // Packed absolute value of bytes
399INST3(pabsw, "pabsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), INS_FLAGS_None) // Packed absolute value of 16-bit integers
400INST3(pabsd, "pabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), INS_FLAGS_None) // Packed absolute value of 32-bit integers
401INST3(palignr, "palignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right
402INST3(pmaddubsw, "pmaddubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Packed Signed and Unsigned Bytes
403INST3(pmulhrsw, "pmulhrsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply High with Round and Scale
404INST3(pshufb, "pshufb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Shuffle Bytes
405INST3(psignb, "psignb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
406INST3(psignw, "psignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
407INST3(psignd, "psignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
408INST3(pminsb, "pminsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed bytes
409INST3(pminsd, "pminsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit signed integers
410INST3(pminuw, "pminuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 16-bit unsigned integers
411INST3(pminud, "pminud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit unsigned integers
412INST3(pmaxsb, "pmaxsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed bytes
413INST3(pmaxsd, "pmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit signed integers
414INST3(pmaxuw, "pmaxuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 16-bit unsigned integers
415INST3(pmaxud, "pmaxud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit unsigned integers
416INST3(pmovsxbw, "pmovsxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), INS_FLAGS_None) // Packed sign extend byte to short
417INST3(pmovsxbd, "pmovsxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), INS_FLAGS_None) // Packed sign extend byte to int
418INST3(pmovsxbq, "pmovsxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), INS_FLAGS_None) // Packed sign extend byte to long
419INST3(pmovsxwd, "pmovsxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x23), INS_FLAGS_None) // Packed sign extend short to int
420INST3(pmovsxwq, "pmovsxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x24), INS_FLAGS_None) // Packed sign extend short to long
421INST3(pmovsxdq, "pmovsxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x25), INS_FLAGS_None) // Packed sign extend int to long
422INST3(pmovzxbw, "pmovzxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x30), INS_FLAGS_None) // Packed zero extend byte to short
423INST3(pmovzxbd, "pmovzxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x31), INS_FLAGS_None) // Packed zero extend byte to intg
424INST3(pmovzxbq, "pmovzxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x32), INS_FLAGS_None) // Packed zero extend byte to lon
425INST3(pmovzxwd, "pmovzxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), INS_FLAGS_None) // Packed zero extend short to int
426INST3(pmovzxwq, "pmovzxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), INS_FLAGS_None) // Packed zero extend short to long
427INST3(pmovzxdq, "pmovzxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), INS_FLAGS_None) // Packed zero extend int to long
428INST3(packusdw, "packusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to unsigned short with saturation
429INST3(roundps, "roundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_FLAGS_None) // Round packed single precision floating-point values
430INST3(roundss, "roundss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single precision floating-point values
431INST3(roundpd, "roundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_FLAGS_None) // Round packed double precision floating-point values
432INST3(roundsd, "roundsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double precision floating-point values
433INST3(pmuldq, "pmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit signed integers and store 64-bit result
434INST3(blendps, "blendps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Single Precision Floating-Point Values
435INST3(blendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_FLAGS_None) // Variable Blend Packed Singles
436INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values
437INST3(blendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_FLAGS_None) // Variable Blend Packed Doubles
438INST3(pblendw, "pblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words
439INST3(pblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_FLAGS_None) // Variable Blend Packed Bytes
440INST3(phaddw, "phaddw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers
441INST3(phsubw, "phsubw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers
442INST3(phsubd, "phsubd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 32-bit integers
443INST3(phaddsw, "phaddsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers with saturation
444INST3(phsubsw, "phsubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers with saturation
445INST3(lddqu, "lddqu", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), INS_FLAGS_None) // Load Unaligned integer
446INST3(movntdqa, "movntdqa", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), INS_FLAGS_None) // Load Double Quadword Non-Temporal Aligned Hint
447INST3(movddup, "movddup", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), INS_FLAGS_None) // Replicate Double FP Values
448INST3(movsldup, "movsldup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), INS_FLAGS_None) // Replicate even-indexed Single FP Values
449INST3(movshdup, "movshdup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), INS_FLAGS_None) // Replicate odd-indexed Single FP Values
450INST3(phminposuw, "phminposuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), INS_FLAGS_None) // Packed Horizontal Word Minimum
451INST3(mpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference
452INST3(pinsrb, "pinsrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Byte
453INST3(pinsrd, "pinsrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Dword
454INST3(pinsrq, "pinsrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Qword
455INST3(pextrb, "pextrb", IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Byte
456INST3(pextrd, "pextrd", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Dword
457INST3(pextrq, "pextrq", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Qword
458INST3(pextrw_sse41, "pextrw", IUM_WR, SSE3A(0x15), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Word
459INST3(extractps, "extractps", IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Packed Floating-Point Values
460
461//PCLMULQDQ instructions
462INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords
463
464//AES instructions
465INST3(aesdec, "aesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow
466INST3(aesdeclast, "aesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow
467INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES encryption flow
468INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow
469INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_FLAGS_None) // Perform the AES InvMixColumn Transformation
470INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_FLAGS_None) // AES Round Key Generation Assist
471INST3(LAST_SSE_INSTRUCTION, "LAST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
472
473INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
474// AVX only instructions
475INST3(vbroadcastss, "broadcastss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x18), INS_FLAGS_None) // Broadcast float value read from memory to entire ymm register
476INST3(vbroadcastsd, "broadcastsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_FLAGS_None) // Broadcast float value read from memory to entire ymm register
477INST3(vpbroadcastb, "pbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x78), INS_FLAGS_None) // Broadcast int8 value from reg/memory to entire ymm register
478INST3(vpbroadcastw, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x79), INS_FLAGS_None) // Broadcast int16 value from reg/memory to entire ymm register
479INST3(vpbroadcastd, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x58), INS_FLAGS_None) // Broadcast int32 value from reg/memory to entire ymm register
480INST3(vpbroadcastq, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_FLAGS_None) // Broadcast int64 value from reg/memory to entire ymm register
481INST3(vextractf128, "extractf128", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract 128-bit packed floating point values
482INST3(vextracti128, "extracti128", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract 128-bit packed integer values
483INST3(vinsertf128, "insertf128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed floating point values
484INST3(vinserti128, "inserti128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed integer values
485INST3(vzeroupper, "zeroupper", IUM_WR, 0xC577F8, BAD_CODE, BAD_CODE, INS_FLAGS_None) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)
486INST3(vperm2i128, "perm2i128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), INS_Flags_IsDstDstSrcAVXInstruction) // Permute 128-bit halves of input register
487INST3(vpermq, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), INS_FLAGS_None) // Permute 64-bit of input register
488INST3(vpblendd, "pblendd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed DWORDs
489INST3(vblendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Singles
490INST3(vblendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Doubles
491INST3(vpblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Bytes
492INST3(vtestps, "testps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0E), INS_FLAGS_None) // Packed Bit Test
493INST3(vtestpd, "testpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0F), INS_FLAGS_None) // Packed Bit Test
494INST3(vpsrlvd, "psrlvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical
495INST3(vpsrlvq, "psrlvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical
496INST3(vpsravd, "psravd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic
497INST3(vpsllvd, "psllvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical
498INST3(vpsllvq, "psllvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical
499INST3(vpermilps, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), INS_FLAGS_None) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
500INST3(vpermilpd, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_FLAGS_None) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
501INST3(vpermilpsvar, "permilpsvar", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
502INST3(vpermilpdvar, "permilpdvar", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
503INST3(vperm2f128, "perm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values
504INST3(vpermpd, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x01), INS_FLAGS_None) // Permute Double-Precision Floating-Point Values
505INST3(vpermd, "permd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Packed Doublewords Elements
506INST3(vpermps, "permps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Single-Precision Floating-Point Elements
507INST3(vbroadcastf128, "broadcastf128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_FLAGS_None) // Broadcast packed float values read from memory to entire ymm register
508INST3(vbroadcasti128, "broadcasti128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_FLAGS_None) // Broadcast packed integer values read from memory to entire ymm register
509INST3(vmaskmovps, "maskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
510INST3(vmaskmovpd, "maskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
511INST3(vpmaskmovd, "pmaskmovd", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Dword Loads and Stores
512INST3(vpmaskmovq, "pmaskmovq", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Qword Loads and Stores
513INST3(vpgatherdd, "pgatherdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword
514INST3(vpgatherqd, "pgatherqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword
515INST3(vpgatherdq, "pgatherdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices
516INST3(vpgatherqq, "pgatherqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices
517INST3(vgatherdps, "gatherdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices
518INST3(vgatherqps, "gatherqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices
519INST3(vgatherdpd, "gatherdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices
520INST3(vgatherqpd, "gatherqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices
521
522INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
523// id nm um mr mi rm flags
524INST3(vfmadd132pd, "fmadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values
525INST3(vfmadd213pd, "fmadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_Flags_IsDstDstSrcAVXInstruction) //
526INST3(vfmadd231pd, "fmadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_Flags_IsDstDstSrcAVXInstruction) //
527INST3(vfmadd132ps, "fmadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values
528INST3(vfmadd213ps, "fmadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_Flags_IsDstDstSrcAVXInstruction) //
529INST3(vfmadd231ps, "fmadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_Flags_IsDstDstSrcAVXInstruction) //
530INST3(vfmadd132sd, "fmadd132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values
531INST3(vfmadd213sd, "fmadd213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_Flags_IsDstDstSrcAVXInstruction) //
532INST3(vfmadd231sd, "fmadd231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_Flags_IsDstDstSrcAVXInstruction) //
533INST3(vfmadd132ss, "fmadd132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values
534INST3(vfmadd213ss, "fmadd213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_Flags_IsDstDstSrcAVXInstruction) //
535INST3(vfmadd231ss, "fmadd231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_Flags_IsDstDstSrcAVXInstruction) //
536INST3(vfmaddsub132pd, "fmaddsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values
537INST3(vfmaddsub213pd, "fmaddsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_Flags_IsDstDstSrcAVXInstruction) //
538INST3(vfmaddsub231pd, "fmaddsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_Flags_IsDstDstSrcAVXInstruction) //
539INST3(vfmaddsub132ps, "fmaddsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values
540INST3(vfmaddsub213ps, "fmaddsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_Flags_IsDstDstSrcAVXInstruction) //
541INST3(vfmaddsub231ps, "fmaddsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_Flags_IsDstDstSrcAVXInstruction) //
542INST3(vfmsubadd132pd, "fmsubadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values
543INST3(vfmsubadd213pd, "fmsubadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_Flags_IsDstDstSrcAVXInstruction) //
544INST3(vfmsubadd231pd, "fmsubadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_Flags_IsDstDstSrcAVXInstruction) //
545INST3(vfmsubadd132ps, "fmsubadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values
546INST3(vfmsubadd213ps, "fmsubadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_Flags_IsDstDstSrcAVXInstruction) //
547INST3(vfmsubadd231ps, "fmsubadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_Flags_IsDstDstSrcAVXInstruction) //
548INST3(vfmsub132pd, "fmsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values
549INST3(vfmsub213pd, "fmsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_Flags_IsDstDstSrcAVXInstruction) //
550INST3(vfmsub231pd, "fmsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_Flags_IsDstDstSrcAVXInstruction) //
551INST3(vfmsub132ps, "fmsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values
552INST3(vfmsub213ps, "fmsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_Flags_IsDstDstSrcAVXInstruction) //
553INST3(vfmsub231ps, "fmsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_Flags_IsDstDstSrcAVXInstruction) //
554INST3(vfmsub132sd, "fmsub132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values
555INST3(vfmsub213sd, "fmsub213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_Flags_IsDstDstSrcAVXInstruction) //
556INST3(vfmsub231sd, "fmsub231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_Flags_IsDstDstSrcAVXInstruction) //
557INST3(vfmsub132ss, "fmsub132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values
558INST3(vfmsub213ss, "fmsub213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_Flags_IsDstDstSrcAVXInstruction) //
559INST3(vfmsub231ss, "fmsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_Flags_IsDstDstSrcAVXInstruction) //
560INST3(vfnmadd132pd, "fmnadd132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values
561INST3(vfnmadd213pd, "fmnadd213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_Flags_IsDstDstSrcAVXInstruction) //
562INST3(vfnmadd231pd, "fmnadd231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_Flags_IsDstDstSrcAVXInstruction) //
563INST3(vfnmadd132ps, "fmnadd132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values
564INST3(vfnmadd213ps, "fmnadd213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_Flags_IsDstDstSrcAVXInstruction) //
565INST3(vfnmadd231ps, "fmnadd231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_Flags_IsDstDstSrcAVXInstruction) //
566INST3(vfnmadd132sd, "fmnadd132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values
567INST3(vfnmadd213sd, "fmnadd213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_Flags_IsDstDstSrcAVXInstruction) //
568INST3(vfnmadd231sd, "fmnadd231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_Flags_IsDstDstSrcAVXInstruction) //
569INST3(vfnmadd132ss, "fmnadd132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values
570INST3(vfnmadd213ss, "fmnadd213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_Flags_IsDstDstSrcAVXInstruction) //
571INST3(vfnmadd231ss, "fmnadd231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_Flags_IsDstDstSrcAVXInstruction) //
572INST3(vfnmsub132pd, "fmnsub132pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values
573INST3(vfnmsub213pd, "fmnsub213pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_Flags_IsDstDstSrcAVXInstruction) //
574INST3(vfnmsub231pd, "fmnsub231pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_Flags_IsDstDstSrcAVXInstruction) //
575INST3(vfnmsub132ps, "fmnsub132ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values
576INST3(vfnmsub213ps, "fmnsub213ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_Flags_IsDstDstSrcAVXInstruction) //
577INST3(vfnmsub231ps, "fmnsub231ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_Flags_IsDstDstSrcAVXInstruction) //
578INST3(vfnmsub132sd, "fmnsub132sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values
579INST3(vfnmsub213sd, "fmnsub213sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_Flags_IsDstDstSrcAVXInstruction) //
580INST3(vfnmsub231sd, "fmnsub231sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) //
581INST3(vfnmsub132ss, "fmnsub132ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values
582INST3(vfnmsub213ss, "fmnsub213ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_Flags_IsDstDstSrcAVXInstruction) //
583INST3(vfnmsub231ss, "fmnsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) //
584INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
585
586// BMI1
587INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
588INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
589INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit
590INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit
591INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit
592INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract
593
594// BMI2
595INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit
596INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract
597INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Zero High Bits Starting with Specified Bit Position
598INST3(mulx, "mulx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF6), INS_Flags_IsDstDstSrcAVXInstruction) // Unsigned Multiply Without Affecting Flags
599
600INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
601
602INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
603
604// Scalar instructions in SSE4.2
605INST3(crc32, "crc32", IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF2, 0x0F, 0x38, 0xF0), INS_FLAGS_None)
606
607// BMI1
608INST3(tzcnt, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBC), INS_FLAGS_None) // Count the Number of Trailing Zero Bits
609
610// LZCNT
611INST3(lzcnt, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), INS_FLAGS_None)
612
613// POPCNT
614INST3(popcnt, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), INS_FLAGS_None)
615
616// id nm um mr mi flags
617INST2(ret, "ret", IUM_RD, 0x0000C3, 0x0000C2, INS_FLAGS_None)
618INST2(loop, "loop", IUM_RD, BAD_CODE, 0x0000E2, INS_FLAGS_None)
619INST2(call, "call", IUM_RD, 0x0010FF, 0x0000E8, INS_FLAGS_WritesFlags)
620
621INST2(rol, "rol", IUM_RW, 0x0000D2, BAD_CODE, INS_FLAGS_WritesFlags)
622INST2(rol_1, "rol", IUM_RW, 0x0000D0, 0x0000D0, INS_FLAGS_WritesFlags)
623INST2(rol_N, "rol", IUM_RW, 0x0000C0, 0x0000C0, INS_FLAGS_WritesFlags)
624INST2(ror, "ror", IUM_RW, 0x0008D2, BAD_CODE, INS_FLAGS_WritesFlags)
625INST2(ror_1, "ror", IUM_RW, 0x0008D0, 0x0008D0, INS_FLAGS_WritesFlags)
626INST2(ror_N, "ror", IUM_RW, 0x0008C0, 0x0008C0, INS_FLAGS_WritesFlags)
627
628INST2(rcl, "rcl", IUM_RW, 0x0010D2, BAD_CODE, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
629INST2(rcl_1, "rcl", IUM_RW, 0x0010D0, 0x0010D0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
630INST2(rcl_N, "rcl", IUM_RW, 0x0010C0, 0x0010C0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
631INST2(rcr, "rcr", IUM_RW, 0x0018D2, BAD_CODE, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
632INST2(rcr_1, "rcr", IUM_RW, 0x0018D0, 0x0018D0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
633INST2(rcr_N, "rcr", IUM_RW, 0x0018C0, 0x0018C0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags)
634INST2(shl, "shl", IUM_RW, 0x0020D2, BAD_CODE, INS_FLAGS_WritesFlags)
635INST2(shl_1, "shl", IUM_RW, 0x0020D0, 0x0020D0, INS_FLAGS_WritesFlags)
636INST2(shl_N, "shl", IUM_RW, 0x0020C0, 0x0020C0, INS_FLAGS_WritesFlags)
637INST2(shr, "shr", IUM_RW, 0x0028D2, BAD_CODE, INS_FLAGS_WritesFlags)
638INST2(shr_1, "shr", IUM_RW, 0x0028D0, 0x0028D0, INS_FLAGS_WritesFlags)
639INST2(shr_N, "shr", IUM_RW, 0x0028C0, 0x0028C0, INS_FLAGS_WritesFlags)
640INST2(sar, "sar", IUM_RW, 0x0038D2, BAD_CODE, INS_FLAGS_WritesFlags)
641INST2(sar_1, "sar", IUM_RW, 0x0038D0, 0x0038D0, INS_FLAGS_WritesFlags)
642INST2(sar_N, "sar", IUM_RW, 0x0038C0, 0x0038C0, INS_FLAGS_WritesFlags)
643
644
645// id nm um mr flags
646INST1(r_movsb, "rep movsb", IUM_RD, 0x00A4F3, INS_FLAGS_None)
647INST1(r_movsd, "rep movsd", IUM_RD, 0x00A5F3, INS_FLAGS_None)
648#if defined(_TARGET_AMD64_)
649INST1(r_movsq, "rep movsq", IUM_RD, 0xF3A548, INS_FLAGS_None)
650#endif // defined(_TARGET_AMD64_)
651INST1(movsb, "movsb", IUM_RD, 0x0000A4, INS_FLAGS_None)
652INST1(movsd, "movsd", IUM_RD, 0x0000A5, INS_FLAGS_None)
653#if defined(_TARGET_AMD64_)
654INST1(movsq, "movsq", IUM_RD, 0x00A548, INS_FLAGS_None)
655#endif // defined(_TARGET_AMD64_)
656
657INST1(r_stosb, "rep stosb", IUM_RD, 0x00AAF3, INS_FLAGS_None)
658INST1(r_stosd, "rep stosd", IUM_RD, 0x00ABF3, INS_FLAGS_None)
659#if defined(_TARGET_AMD64_)
660INST1(r_stosq, "rep stosq", IUM_RD, 0xF3AB48, INS_FLAGS_None)
661#endif // defined(_TARGET_AMD64_)
662INST1(stosb, "stosb", IUM_RD, 0x0000AA, INS_FLAGS_None)
663INST1(stosd, "stosd", IUM_RD, 0x0000AB, INS_FLAGS_None)
664#if defined(_TARGET_AMD64_)
665INST1(stosq, "stosq", IUM_RD, 0x00AB48, INS_FLAGS_None)
666#endif // defined(_TARGET_AMD64_)
667
668INST1(int3, "int3", IUM_RD, 0x0000CC, INS_FLAGS_None)
669INST1(nop, "nop", IUM_RD, 0x000090, INS_FLAGS_None)
670INST1(lock, "lock", IUM_RD, 0x0000F0, INS_FLAGS_None)
671INST1(leave, "leave", IUM_RD, 0x0000C9, INS_FLAGS_None)
672
673
674INST1(neg, "neg", IUM_RW, 0x0018F6, INS_FLAGS_WritesFlags)
675INST1(not, "not", IUM_RW, 0x0010F6, INS_FLAGS_WritesFlags)
676
677INST1(cdq, "cdq", IUM_RD, 0x000099, INS_FLAGS_WritesFlags)
678INST1(idiv, "idiv", IUM_RD, 0x0038F6, INS_FLAGS_WritesFlags)
679INST1(imulEAX, "imul", IUM_RD, 0x0028F6, INS_FLAGS_WritesFlags) // edx:eax = eax*op1
680INST1(div, "div", IUM_RD, 0x0030F6, INS_FLAGS_WritesFlags)
681INST1(mulEAX, "mul", IUM_RD, 0x0020F6, INS_FLAGS_WritesFlags)
682
683INST1(sahf, "sahf", IUM_RD, 0x00009E, INS_FLAGS_WritesFlags)
684
685INST1(xadd, "xadd", IUM_RW, 0x0F00C0, INS_FLAGS_WritesFlags)
686INST1(cmpxchg, "cmpxchg", IUM_RW, 0x0F00B0, INS_FLAGS_WritesFlags)
687
688INST1(shld, "shld", IUM_RW, 0x0F00A4, INS_FLAGS_WritesFlags)
689INST1(shrd, "shrd", IUM_RW, 0x0F00AC, INS_FLAGS_WritesFlags)
690
691// For RyuJIT/x86, we follow the x86 calling convention that requires
692// us to return floating point value on the x87 FP stack, so we need
693// these instructions regardless of whether we're using full stack fp.
694#ifdef _TARGET_X86_
695INST1(fld, "fld", IUM_WR, 0x0000D9, INS_FLAGS_x87Instr)
696INST1(fstp, "fstp", IUM_WR, 0x0018D9, INS_FLAGS_x87Instr)
697#endif // _TARGET_X86
698
699INST1(seto, "seto", IUM_WR, 0x0F0090, INS_FLAGS_ReadsFlags)
700INST1(setno, "setno", IUM_WR, 0x0F0091, INS_FLAGS_ReadsFlags)
701INST1(setb, "setb", IUM_WR, 0x0F0092, INS_FLAGS_ReadsFlags)
702INST1(setae, "setae", IUM_WR, 0x0F0093, INS_FLAGS_ReadsFlags)
703INST1(sete, "sete", IUM_WR, 0x0F0094, INS_FLAGS_ReadsFlags)
704INST1(setne, "setne", IUM_WR, 0x0F0095, INS_FLAGS_ReadsFlags)
705INST1(setbe, "setbe", IUM_WR, 0x0F0096, INS_FLAGS_ReadsFlags)
706INST1(seta, "seta", IUM_WR, 0x0F0097, INS_FLAGS_ReadsFlags)
707INST1(sets, "sets", IUM_WR, 0x0F0098, INS_FLAGS_ReadsFlags)
708INST1(setns, "setns", IUM_WR, 0x0F0099, INS_FLAGS_ReadsFlags)
709INST1(setpe, "setpe", IUM_WR, 0x0F009A, INS_FLAGS_ReadsFlags)
710INST1(setpo, "setpo", IUM_WR, 0x0F009B, INS_FLAGS_ReadsFlags)
711INST1(setl, "setl", IUM_WR, 0x0F009C, INS_FLAGS_ReadsFlags)
712INST1(setge, "setge", IUM_WR, 0x0F009D, INS_FLAGS_ReadsFlags)
713INST1(setle, "setle", IUM_WR, 0x0F009E, INS_FLAGS_ReadsFlags)
714INST1(setg, "setg", IUM_WR, 0x0F009F, INS_FLAGS_ReadsFlags)
715
716#ifdef _TARGET_AMD64_
717// A jump with rex prefix. This is used for register indirect
718// tail calls.
719INST1(rex_jmp, "rex.jmp", IUM_RD, 0x0020FE, INS_FLAGS_None)
720#endif
721
722INST1(i_jmp, "jmp", IUM_RD, 0x0020FE, INS_FLAGS_None)
723
724INST0(jmp, "jmp", IUM_RD, 0x0000EB, INS_FLAGS_None)
725INST0(jo, "jo", IUM_RD, 0x000070, INS_FLAGS_ReadsFlags)
726INST0(jno, "jno", IUM_RD, 0x000071, INS_FLAGS_ReadsFlags)
727INST0(jb, "jb", IUM_RD, 0x000072, INS_FLAGS_ReadsFlags)
728INST0(jae, "jae", IUM_RD, 0x000073, INS_FLAGS_ReadsFlags)
729INST0(je, "je", IUM_RD, 0x000074, INS_FLAGS_ReadsFlags)
730INST0(jne, "jne", IUM_RD, 0x000075, INS_FLAGS_ReadsFlags)
731INST0(jbe, "jbe", IUM_RD, 0x000076, INS_FLAGS_ReadsFlags)
732INST0(ja, "ja", IUM_RD, 0x000077, INS_FLAGS_ReadsFlags)
733INST0(js, "js", IUM_RD, 0x000078, INS_FLAGS_ReadsFlags)
734INST0(jns, "jns", IUM_RD, 0x000079, INS_FLAGS_ReadsFlags)
735INST0(jpe, "jpe", IUM_RD, 0x00007A, INS_FLAGS_ReadsFlags)
736INST0(jpo, "jpo", IUM_RD, 0x00007B, INS_FLAGS_ReadsFlags)
737INST0(jl, "jl", IUM_RD, 0x00007C, INS_FLAGS_ReadsFlags)
738INST0(jge, "jge", IUM_RD, 0x00007D, INS_FLAGS_ReadsFlags)
739INST0(jle, "jle", IUM_RD, 0x00007E, INS_FLAGS_ReadsFlags)
740INST0(jg, "jg", IUM_RD, 0x00007F, INS_FLAGS_ReadsFlags)
741
742INST0(l_jmp, "jmp", IUM_RD, 0x0000E9, INS_FLAGS_None)
743INST0(l_jo, "jo", IUM_RD, 0x00800F, INS_FLAGS_ReadsFlags)
744INST0(l_jno, "jno", IUM_RD, 0x00810F, INS_FLAGS_ReadsFlags)
745INST0(l_jb, "jb", IUM_RD, 0x00820F, INS_FLAGS_ReadsFlags)
746INST0(l_jae, "jae", IUM_RD, 0x00830F, INS_FLAGS_ReadsFlags)
747INST0(l_je, "je", IUM_RD, 0x00840F, INS_FLAGS_ReadsFlags)
748INST0(l_jne, "jne", IUM_RD, 0x00850F, INS_FLAGS_ReadsFlags)
749INST0(l_jbe, "jbe", IUM_RD, 0x00860F, INS_FLAGS_ReadsFlags)
750INST0(l_ja, "ja", IUM_RD, 0x00870F, INS_FLAGS_ReadsFlags)
751INST0(l_js, "js", IUM_RD, 0x00880F, INS_FLAGS_ReadsFlags)
752INST0(l_jns, "jns", IUM_RD, 0x00890F, INS_FLAGS_ReadsFlags)
753INST0(l_jpe, "jpe", IUM_RD, 0x008A0F, INS_FLAGS_ReadsFlags)
754INST0(l_jpo, "jpo", IUM_RD, 0x008B0F, INS_FLAGS_ReadsFlags)
755INST0(l_jl, "jl", IUM_RD, 0x008C0F, INS_FLAGS_ReadsFlags)
756INST0(l_jge, "jge", IUM_RD, 0x008D0F, INS_FLAGS_ReadsFlags)
757INST0(l_jle, "jle", IUM_RD, 0x008E0F, INS_FLAGS_ReadsFlags)
758INST0(l_jg, "jg", IUM_RD, 0x008F0F, INS_FLAGS_ReadsFlags)
759
760INST0(align, "align", IUM_RD, BAD_CODE, INS_FLAGS_None)
761
762/*****************************************************************************/
763#undef INST0
764#undef INST1
765#undef INST2
766#undef INST3
767#undef INST4
768#undef INST5
769/*****************************************************************************/
770
771// clang-format on
772