| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | |
| 5 | // |
| 6 | // This file was previously known as instrs.h |
| 7 | // |
| 8 | /***************************************************************************** |
| 9 | * x86 instructions for the JIT compiler |
| 10 | * |
| 11 | * id -- the enum name for the instruction |
| 12 | * nm -- textual name (for assembly dipslay) |
| 13 | * um -- update mode, see IUM_xx enum (rd, wr, or rw) |
| 14 | * mr -- base encoding for R/M[reg] addressing mode |
| 15 | * mi -- base encoding for R/M,icon addressing mode |
| 16 | * rm -- base encoding for reg,R/M addressing mode |
| 17 | * a4 -- base encoding for eax,i32 addressing mode |
| 18 | * rr -- base encoding for register addressing mode |
| 19 | * flags -- flags, see INS_FLAGS_* enum |
| 20 | * |
| 21 | ******************************************************************************/ |
| 22 | |
| 23 | // clang-format off |
| 24 | #if !defined(_TARGET_XARCH_) |
| 25 | #error Unexpected target type |
| 26 | #endif |
| 27 | |
| 28 | #ifndef INST1 |
| 29 | #error At least INST1 must be defined before including this file. |
| 30 | #endif |
| 31 | /*****************************************************************************/ |
| 32 | #ifndef INST0 |
| 33 | #define INST0(id, nm, um, mr, flags) |
| 34 | #endif |
| 35 | #ifndef INST2 |
| 36 | #define INST2(id, nm, um, mr, mi, flags) |
| 37 | #endif |
| 38 | #ifndef INST3 |
| 39 | #define INST3(id, nm, um, mr, mi, rm, flags) |
| 40 | #endif |
| 41 | #ifndef INST4 |
| 42 | #define INST4(id, nm, um, mr, mi, rm, a4, flags) |
| 43 | #endif |
| 44 | #ifndef INST5 |
| 45 | #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) |
| 46 | #endif |
| 47 | |
| 48 | /*****************************************************************************/ |
| 49 | /* The following is x86-specific */ |
| 50 | /*****************************************************************************/ |
| 51 | |
| 52 | // id nm um mr mi rm a4 rr flags |
| 53 | INST5(invalid, "INVALID" , IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 54 | |
| 55 | INST5(push, "push" , IUM_RD, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050, INS_FLAGS_None) |
| 56 | INST5(pop, "pop" , IUM_WR, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058, INS_FLAGS_None) |
| 57 | // Does not affect the stack tracking in the emitter |
| 58 | INST5(push_hide, "push" , IUM_RD, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050, INS_FLAGS_None) |
| 59 | INST5(pop_hide, "pop" , IUM_WR, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058, INS_FLAGS_None) |
| 60 | |
| 61 | INST5(inc, "inc" , IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000040, INS_FLAGS_WritesFlags) |
| 62 | INST5(inc_l, "inc" , IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE, INS_FLAGS_WritesFlags) |
| 63 | INST5(dec, "dec" , IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048, INS_FLAGS_WritesFlags) |
| 64 | INST5(dec_l, "dec" , IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE, INS_FLAGS_WritesFlags) |
| 65 | |
| 66 | // Multi-byte opcodes without modrm are represented in mixed endian fashion. |
| 67 | // See comment around quarter way through this file for more information. |
| 68 | INST5(bswap, "bswap" , IUM_RW, 0x0F00C8, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C80F, INS_FLAGS_None) |
| 69 | |
| 70 | // id nm um mr mi rm a4 flags |
| 71 | INST4(add, "add" , IUM_RW, 0x000000, 0x000080, 0x000002, 0x000004, INS_FLAGS_WritesFlags) |
| 72 | INST4(or, "or" , IUM_RW, 0x000008, 0x000880, 0x00000A, 0x00000C, INS_FLAGS_WritesFlags) |
| 73 | INST4(adc, "adc" , IUM_RW, 0x000010, 0x001080, 0x000012, 0x000014, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
| 74 | INST4(sbb, "sbb" , IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
| 75 | INST4(and, "and" , IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, INS_FLAGS_WritesFlags) |
| 76 | INST4(sub, "sub" , IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, INS_FLAGS_WritesFlags) |
| 77 | INST4(xor, "xor" , IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, INS_FLAGS_WritesFlags) |
| 78 | INST4(cmp, "cmp" , IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, INS_FLAGS_WritesFlags) |
| 79 | INST4(test, "test" , IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, INS_FLAGS_WritesFlags) |
| 80 | INST4(mov, "mov" , IUM_WR, 0x000088, 0x0000C6, 0x00008A, 0x0000B0, INS_FLAGS_None) |
| 81 | |
| 82 | INST4(lea, "lea" , IUM_WR, BAD_CODE, BAD_CODE, 0x00008D, BAD_CODE, INS_FLAGS_None) |
| 83 | |
| 84 | // id nm um mr mi rm flags |
| 85 | |
| 86 | // Note that emitter has only partial support for BT. It can only emit the reg,reg form |
| 87 | // and the registers need to be reversed to get the correct encoding. |
| 88 | INST3(bt, "bt" , IUM_RD, 0x0F00A3, BAD_CODE, 0x0F00A3, INS_FLAGS_WritesFlags) |
| 89 | |
| 90 | INST3(movsx, "movsx" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BE, INS_FLAGS_None) |
| 91 | #ifdef _TARGET_AMD64_ |
| 92 | INST3(movsxd, "movsxd" , IUM_WR, BAD_CODE, BAD_CODE, 0x4800000063, INS_FLAGS_None) |
| 93 | #endif |
| 94 | INST3(movzx, "movzx" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F00B6, INS_FLAGS_None) |
| 95 | |
| 96 | INST3(cmovo, "cmovo" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, INS_FLAGS_ReadsFlags) |
| 97 | INST3(cmovno, "cmovno" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, INS_FLAGS_ReadsFlags) |
| 98 | INST3(cmovb, "cmovb" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, INS_FLAGS_ReadsFlags) |
| 99 | INST3(cmovae, "cmovae" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, INS_FLAGS_ReadsFlags) |
| 100 | INST3(cmove, "cmove" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, INS_FLAGS_ReadsFlags) |
| 101 | INST3(cmovne, "cmovne" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, INS_FLAGS_ReadsFlags) |
| 102 | INST3(cmovbe, "cmovbe" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, INS_FLAGS_ReadsFlags) |
| 103 | INST3(cmova, "cmova" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, INS_FLAGS_ReadsFlags) |
| 104 | INST3(cmovs, "cmovs" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, INS_FLAGS_ReadsFlags) |
| 105 | INST3(cmovns, "cmovns" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, INS_FLAGS_ReadsFlags) |
| 106 | INST3(cmovpe, "cmovpe" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, INS_FLAGS_ReadsFlags) |
| 107 | INST3(cmovpo, "cmovpo" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, INS_FLAGS_ReadsFlags) |
| 108 | INST3(cmovl, "cmovl" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, INS_FLAGS_ReadsFlags) |
| 109 | INST3(cmovge, "cmovge" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, INS_FLAGS_ReadsFlags) |
| 110 | INST3(cmovle, "cmovle" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, INS_FLAGS_ReadsFlags) |
| 111 | INST3(cmovg, "cmovg" , IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_FLAGS_ReadsFlags) |
| 112 | |
| 113 | INST3(xchg, "xchg" , IUM_RW, 0x000086, BAD_CODE, 0x000086, INS_FLAGS_None) |
| 114 | INST3(imul, "imul" , IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, INS_FLAGS_WritesFlags) // op1 *= op2 |
| 115 | |
| 116 | // id nm um mr mi rm flags |
| 117 | |
| 118 | // Instead of encoding these as 3-operand instructions, we encode them |
| 119 | // as 2-operand instructions with the target register being implicit |
| 120 | // implicit_reg = op1*op2_icon |
| 121 | #define INSTMUL INST3 |
| 122 | INSTMUL(imul_AX, "imul" , IUM_RD, BAD_CODE, 0x000068, BAD_CODE, INS_FLAGS_WritesFlags) |
| 123 | INSTMUL(imul_CX, "imul" , IUM_RD, BAD_CODE, 0x000868, BAD_CODE, INS_FLAGS_WritesFlags) |
| 124 | INSTMUL(imul_DX, "imul" , IUM_RD, BAD_CODE, 0x001068, BAD_CODE, INS_FLAGS_WritesFlags) |
| 125 | INSTMUL(imul_BX, "imul" , IUM_RD, BAD_CODE, 0x001868, BAD_CODE, INS_FLAGS_WritesFlags) |
| 126 | INSTMUL(imul_SP, "imul" , IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_WritesFlags) |
| 127 | INSTMUL(imul_BP, "imul" , IUM_RD, BAD_CODE, 0x002868, BAD_CODE, INS_FLAGS_WritesFlags) |
| 128 | INSTMUL(imul_SI, "imul" , IUM_RD, BAD_CODE, 0x003068, BAD_CODE, INS_FLAGS_WritesFlags) |
| 129 | INSTMUL(imul_DI, "imul" , IUM_RD, BAD_CODE, 0x003868, BAD_CODE, INS_FLAGS_WritesFlags) |
| 130 | |
| 131 | #ifdef _TARGET_AMD64_ |
| 132 | |
| 133 | INSTMUL(imul_08, "imul" , IUM_RD, BAD_CODE, 0x4400000068, BAD_CODE, INS_FLAGS_WritesFlags) |
| 134 | INSTMUL(imul_09, "imul" , IUM_RD, BAD_CODE, 0x4400000868, BAD_CODE, INS_FLAGS_WritesFlags) |
| 135 | INSTMUL(imul_10, "imul" , IUM_RD, BAD_CODE, 0x4400001068, BAD_CODE, INS_FLAGS_WritesFlags) |
| 136 | INSTMUL(imul_11, "imul" , IUM_RD, BAD_CODE, 0x4400001868, BAD_CODE, INS_FLAGS_WritesFlags) |
| 137 | INSTMUL(imul_12, "imul" , IUM_RD, BAD_CODE, 0x4400002068, BAD_CODE, INS_FLAGS_WritesFlags) |
| 138 | INSTMUL(imul_13, "imul" , IUM_RD, BAD_CODE, 0x4400002868, BAD_CODE, INS_FLAGS_WritesFlags) |
| 139 | INSTMUL(imul_14, "imul" , IUM_RD, BAD_CODE, 0x4400003068, BAD_CODE, INS_FLAGS_WritesFlags) |
| 140 | INSTMUL(imul_15, "imul" , IUM_RD, BAD_CODE, 0x4400003868, BAD_CODE, INS_FLAGS_WritesFlags) |
| 141 | |
| 142 | #endif // _TARGET_AMD64_ |
| 143 | |
| 144 | // the hex codes in this file represent the instruction encoding as follows: |
| 145 | // 0x0000ff00 - modrm byte position |
| 146 | // 0x000000ff - last byte of opcode (before modrm) |
| 147 | // 0x00ff0000 - first byte of opcode |
| 148 | // 0xff000000 - middle byte of opcode, if needed (after first, before last) |
| 149 | // |
| 150 | // So a 1-byte opcode is: and with modrm: |
| 151 | // 0x00000011 0x0000RM11 |
| 152 | // |
| 153 | // So a 2-byte opcode is: and with modrm: |
| 154 | // 0x00002211 0x0011RM22 |
| 155 | // |
| 156 | // So a 3-byte opcode is: and with modrm: |
| 157 | // 0x00113322 0x2211RM33 |
| 158 | // |
| 159 | // So a 4-byte opcode would be something like this: |
| 160 | // 0x22114433 |
| 161 | |
| 162 | #define PACK3(byte1,byte2,byte3) ((byte1 << 16) | (byte2 << 24) | byte3) |
| 163 | #define PACK2(byte1,byte2) ((byte1 << 16) | byte2) |
| 164 | #define SSEFLT(c) PACK3(0xf3, 0x0f, c) |
| 165 | #define SSEDBL(c) PACK3(0xf2, 0x0f, c) |
| 166 | #define PCKDBL(c) PACK3(0x66, 0x0f, c) |
| 167 | #define PCKFLT(c) PACK2(0x0f,c) |
| 168 | |
| 169 | // These macros encode extra byte that is implicit in the macro. |
| 170 | #define PACK4(byte1,byte2,byte3,byte4) ((byte1 << 16) | (byte2 << 24) | byte3 | (byte4 << 8)) |
| 171 | #define SSE38(c) PACK4(0x66, 0x0f, 0x38, c) |
| 172 | #define SSE3A(c) PACK4(0x66, 0x0f, 0x3A, c) |
| 173 | |
| 174 | // VEX* encodes the implied leading opcode bytes in c1: |
| 175 | // 1: implied 0f, 2: implied 0f 38, 3: implied 0f 3a |
| 176 | #define VEX2INT(c1,c2) PACK3(c1, 0xc5, c2) |
| 177 | #define VEX3INT(c1,c2) PACK4(c1, 0xc5, 0x02, c2) |
| 178 | #define VEX3FLT(c1,c2) PACK4(c1, 0xc5, 0x02, c2) |
| 179 | |
| 180 | INST3(FIRST_SSE_INSTRUCTION, "FIRST_SSE_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 181 | // These are the SSE instructions used on x86 |
| 182 | INST3(mov_i2xmm, "movd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg |
| 183 | INST3(mov_xmm2i, "movd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7E), INS_FLAGS_None) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg |
| 184 | INST3(pmovmskb, "pmovmskb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), INS_FLAGS_None) // Move the MSB bits of all bytes in a xmm reg to an int reg |
| 185 | INST3(movmskpd, "movmskpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), INS_FLAGS_None) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros. |
| 186 | INST3(movd, "movd" , IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) |
| 187 | INST3(movq, "movq" , IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_FLAGS_None) |
| 188 | INST3(movsdsse2, "movsd" , IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), INS_Flags_IsDstSrcSrcAVXInstruction) |
| 189 | |
| 190 | INST3(punpckldq, "punpckldq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_Flags_IsDstDstSrcAVXInstruction) |
| 191 | |
| 192 | INST3(xorps, "xorps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed singles |
| 193 | |
| 194 | INST3(cvttsd2si, "cvttsd2si" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_FLAGS_None) // cvt with trunc scalar double to signed DWORDs |
| 195 | |
| 196 | INST3(movntdq, "movntdq" , IUM_WR, PCKDBL(0xE7), BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 197 | INST3(movnti, "movnti" , IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 198 | INST3(movntpd, "movntpd" , IUM_WR, PCKDBL(0x2B), BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 199 | INST3(movntps, "movntps" , IUM_WR, PCKFLT(0x2B), BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 200 | INST3(movdqu, "movdqu" , IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_FLAGS_None) |
| 201 | INST3(movdqa, "movdqa" , IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_FLAGS_None) |
| 202 | INST3(movlpd, "movlpd" , IUM_WR, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12), INS_Flags_IsDstSrcSrcAVXInstruction) |
| 203 | INST3(movlps, "movlps" , IUM_WR, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12), INS_Flags_IsDstSrcSrcAVXInstruction) |
| 204 | INST3(movhpd, "movhpd" , IUM_WR, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16), INS_Flags_IsDstSrcSrcAVXInstruction) |
| 205 | INST3(movhps, "movhps" , IUM_WR, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16), INS_Flags_IsDstSrcSrcAVXInstruction) |
| 206 | INST3(movss, "movss" , IUM_WR, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10), INS_Flags_IsDstSrcSrcAVXInstruction) |
| 207 | INST3(movapd, "movapd" , IUM_WR, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28), INS_FLAGS_None) |
| 208 | INST3(movaps, "movaps" , IUM_WR, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28), INS_FLAGS_None) |
| 209 | INST3(movupd, "movupd" , IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), INS_FLAGS_None) |
| 210 | INST3(movups, "movups" , IUM_WR, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10), INS_FLAGS_None) |
| 211 | INST3(movhlps, "movhlps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), INS_Flags_IsDstDstSrcAVXInstruction) |
| 212 | INST3(movlhps, "movlhps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x16), INS_Flags_IsDstDstSrcAVXInstruction) |
| 213 | INST3(movmskps, "movmskps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x50), INS_FLAGS_None) |
| 214 | INST3(unpckhps, "unpckhps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), INS_Flags_IsDstDstSrcAVXInstruction) |
| 215 | INST3(unpcklps, "unpcklps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), INS_Flags_IsDstDstSrcAVXInstruction) |
| 216 | INST3(maskmovdqu, "maskmovdqu" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF7), INS_FLAGS_None) |
| 217 | |
| 218 | INST3(shufps, "shufps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), INS_Flags_IsDstDstSrcAVXInstruction) |
| 219 | INST3(shufpd, "shufpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), INS_Flags_IsDstDstSrcAVXInstruction) |
| 220 | |
| 221 | INST3(punpckhdq, "punpckhdq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), INS_Flags_IsDstDstSrcAVXInstruction) |
| 222 | |
| 223 | INST3(lfence, "lfence" , IUM_RD, 0x000FE8AE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 224 | INST3(mfence, "mfence" , IUM_RD, 0x000FF0AE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 225 | INST3(prefetchnta, "prefetchnta" , IUM_RD, 0x000F0018, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 226 | INST3(prefetcht0, "prefetcht0" , IUM_RD, 0x000F0818, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 227 | INST3(prefetcht1, "prefetcht1" , IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 228 | INST3(prefetcht2, "prefetcht2" , IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 229 | INST3(sfence, "sfence" , IUM_RD, 0x000FF8AE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 230 | |
| 231 | // SSE 2 arith |
| 232 | INST3(addps, "addps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed singles |
| 233 | INST3(addss, "addss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar singles |
| 234 | INST3(addpd, "addpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed doubles |
| 235 | INST3(addsd, "addsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar doubles |
| 236 | INST3(mulps, "mulps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed singles |
| 237 | INST3(mulss, "mulss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar single |
| 238 | INST3(mulpd, "mulpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed doubles |
| 239 | INST3(mulsd, "mulsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar doubles |
| 240 | INST3(subps, "subps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed singles |
| 241 | INST3(subss, "subss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar singles |
| 242 | INST3(subpd, "subpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed doubles |
| 243 | INST3(subsd, "subsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar doubles |
| 244 | INST3(minps, "minps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed singles |
| 245 | INST3(minss, "minss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar single |
| 246 | INST3(minpd, "minpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed doubles |
| 247 | INST3(minsd, "minsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar double |
| 248 | INST3(divps, "divps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed singles |
| 249 | INST3(divss, "divss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar singles |
| 250 | INST3(divpd, "divpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed doubles |
| 251 | INST3(divsd, "divsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar doubles |
| 252 | INST3(maxps, "maxps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed singles |
| 253 | INST3(maxss, "maxss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar single |
| 254 | INST3(maxpd, "maxpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed doubles |
| 255 | INST3(maxsd, "maxsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5F), INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar double |
| 256 | INST3(xorpd, "xorpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed doubles |
| 257 | INST3(andps, "andps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), INS_Flags_IsDstDstSrcAVXInstruction) // AND packed singles |
| 258 | INST3(andpd, "andpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), INS_Flags_IsDstDstSrcAVXInstruction) // AND packed doubles |
| 259 | INST3(sqrtps, "sqrtps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), INS_FLAGS_None) // Sqrt of packed singles |
| 260 | INST3(sqrtss, "sqrtss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x51), INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar single |
| 261 | INST3(sqrtpd, "sqrtpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), INS_FLAGS_None) // Sqrt of packed doubles |
| 262 | INST3(sqrtsd, "sqrtsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x51), INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar double |
| 263 | INST3(andnps, "andnps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed singles |
| 264 | INST3(andnpd, "andnpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed doubles |
| 265 | INST3(orps, "orps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), INS_Flags_IsDstDstSrcAVXInstruction) // Or packed singles |
| 266 | INST3(orpd, "orpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), INS_Flags_IsDstDstSrcAVXInstruction) // Or packed doubles |
| 267 | INST3(haddpd, "haddpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed doubles |
| 268 | INST3(haddps, "haddps" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed floats |
| 269 | INST3(hsubpd, "hsubpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed doubles |
| 270 | INST3(hsubps, "hsubps" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed floats |
| 271 | INST3(addsubps, "addsubps" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed singles |
| 272 | INST3(addsubpd, "addsubpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed doubles |
| 273 | |
| 274 | // SSE 2 approx arith |
| 275 | INST3(rcpps, "rcpps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), INS_FLAGS_None) // Reciprocal of packed singles |
| 276 | INST3(rcpss, "rcpss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal of scalar single |
| 277 | INST3(rsqrtps, "rsqrtps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), INS_FLAGS_None) // Reciprocal Sqrt of packed singles |
| 278 | INST3(rsqrtss, "rsqrtss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x52), INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal Sqrt of scalar single |
| 279 | |
| 280 | // SSE2 conversions |
| 281 | INST3(cvtpi2ps, "cvtpi2ps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2A), INS_FLAGS_None) // cvt packed DWORDs to singles |
| 282 | INST3(cvtsi2ss, "cvtsi2ss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar single |
| 283 | INST3(cvtpi2pd, "cvtpi2pd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2A), INS_FLAGS_None) // cvt packed DWORDs to doubles |
| 284 | INST3(cvtsi2sd, "cvtsi2sd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar double |
| 285 | INST3(cvttps2pi, "cvttps2pi" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2C), INS_FLAGS_None) // cvt with trunc packed singles to DWORDs |
| 286 | INST3(cvttss2si, "cvttss2si" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), INS_FLAGS_None) // cvt with trunc scalar single to DWORD |
| 287 | INST3(cvttpd2pi, "cvttpd2pi" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2C), INS_FLAGS_None) // cvt with trunc packed doubles to DWORDs |
| 288 | INST3(cvtps2pi, "cvtps2pi" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x2D), INS_FLAGS_None) // cvt packed singles to DWORDs |
| 289 | INST3(cvtss2si, "cvtss2si" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), INS_FLAGS_None) // cvt scalar single to DWORD |
| 290 | INST3(cvtpd2pi, "cvtpd2pi" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x2D), INS_FLAGS_None) // cvt packed doubles to DWORDs |
| 291 | INST3(cvtsd2si, "cvtsd2si" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), INS_FLAGS_None) // cvt scalar double to DWORD |
| 292 | INST3(cvtps2pd, "cvtps2pd" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), INS_FLAGS_None) // cvt packed singles to doubles |
| 293 | INST3(cvtpd2ps, "cvtpd2ps" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), INS_FLAGS_None) // cvt packed doubles to singles |
| 294 | INST3(cvtss2sd, "cvtss2sd" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar single to scalar doubles |
| 295 | INST3(cvtsd2ss, "cvtsd2ss" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar double to scalar singles |
| 296 | INST3(cvtdq2ps, "cvtdq2ps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_FLAGS_None) // cvt packed DWORDs to singles |
| 297 | INST3(cvtps2dq, "cvtps2dq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), INS_FLAGS_None) // cvt packed singles to DWORDs |
| 298 | INST3(cvttps2dq, "cvttps2dq" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), INS_FLAGS_None) // cvt with trunc packed singles to DWORDs |
| 299 | INST3(cvtpd2dq, "cvtpd2dq" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xE6), INS_FLAGS_None) // cvt packed doubles to DWORDs |
| 300 | INST3(cvttpd2dq, "cvttpd2dq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), INS_FLAGS_None) // cvt with trunc packed doubles to DWORDs |
| 301 | INST3(cvtdq2pd, "cvtdq2pd" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_FLAGS_None) // cvt packed DWORDs to doubles |
| 302 | |
| 303 | // SSE2 comparison instructions |
| 304 | INST3(comiss, "comiss" , IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), INS_FLAGS_None) // ordered compare singles |
| 305 | INST3(comisd, "comisd" , IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_FLAGS_None) // ordered compare doubles |
| 306 | INST3(ucomiss, "ucomiss" , IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_FLAGS_None) // unordered compare singles |
| 307 | INST3(ucomisd, "ucomisd" , IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_FLAGS_None) // unordered compare doubles |
| 308 | |
| 309 | // SSE2 packed single/double comparison operations. |
| 310 | // Note that these instructions not only compare but also overwrite the first source. |
| 311 | INST3(cmpps, "cmpps" , IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles |
| 312 | INST3(cmppd, "cmppd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles |
| 313 | INST3(cmpss, "cmpss" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles |
| 314 | INST3(cmpsd, "cmpsd" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles |
| 315 | |
| 316 | //SSE2 packed integer operations |
| 317 | INST3(paddb, "paddb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed byte integers |
| 318 | INST3(paddw, "paddw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed word (16-bit) integers |
| 319 | INST3(paddd, "paddd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed double-word (32-bit) integers |
| 320 | INST3(paddq, "paddq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD4), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed quad-word (64-bit) integers |
| 321 | INST3(paddsb, "paddsb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed byte integers and saturate the results |
| 322 | INST3(paddsw, "paddsw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xED), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed word integers and saturate the results |
| 323 | INST3(paddusb, "paddusb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned byte integers and saturate the results |
| 324 | INST3(paddusw, "paddusw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned word integers and saturate the results |
| 325 | INST3(pavgb, "pavgb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed byte integers |
| 326 | INST3(pavgw, "pavgw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed word integers |
| 327 | INST3(psubb, "psubb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers |
| 328 | INST3(psubw, "psubw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers |
| 329 | INST3(psubd, "psubd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed double-word (32-bit) integers |
| 330 | INST3(psubq, "psubq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFB), INS_Flags_IsDstDstSrcAVXInstruction) // subtract packed quad-word (64-bit) integers |
| 331 | INST3(pmaddwd, "pmaddwd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst |
| 332 | INST3(pmulhw, "pmulhw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit signed integers |
| 333 | INST3(pmulhuw, "pmulhuw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers |
| 334 | INST3(pmuludq, "pmuludq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit unsigned integers and store 64-bit result |
| 335 | INST3(pmullw, "pmullw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result |
| 336 | INST3(pand, "pand" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs |
| 337 | INST3(pandn, "pandn" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs |
| 338 | INST3(por, "por" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs |
| 339 | INST3(pxor, "pxor" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs |
| 340 | INST3(psadbw, "psadbw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers |
| 341 | INST3(psubsb, "psubsb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation |
| 342 | INST3(psubusb, "psubusb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation |
| 343 | INST3(psubsw, "psubsw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation |
| 344 | INST3(psubusw, "psubusw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation |
| 345 | |
| 346 | // Note that the shift immediates share the same encoding between left and right-shift, and are distinguished by the Reg/Opcode, |
| 347 | // which is handled in emitxarch.cpp. |
| 348 | INST3(psrldq, "psrldq" , IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_Flags_IsDstDstSrcAVXInstruction) // Shift right logical of xmm reg by given number of bytes |
| 349 | INST3(pslldq, "pslldq" , IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_Flags_IsDstDstSrcAVXInstruction) // Shift left logical of xmm reg by given number of bytes |
| 350 | INST3(psllw, "psllw" , IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 16-bit integers |
| 351 | INST3(pslld, "pslld" , IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 32-bit integers |
| 352 | INST3(psllq, "psllq" , IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 64-bit integers |
| 353 | INST3(psrlw, "psrlw" , IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 16-bit integers |
| 354 | INST3(psrld, "psrld" , IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 32-bit integers |
| 355 | INST3(psrlq, "psrlq" , IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 64-bit integers |
| 356 | INST3(psraw, "psraw" , IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 16-bit integers |
| 357 | INST3(psrad, "psrad" , IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 32-bit integers |
| 358 | |
| 359 | INST3(pmaxub, "pmaxub" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum unsigned bytes |
| 360 | INST3(pminub, "pminub" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum unsigned bytes |
| 361 | INST3(pmaxsw, "pmaxsw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed words |
| 362 | INST3(pminsw, "pminsw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed words |
| 363 | INST3(pcmpeqd, "pcmpeqd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality |
| 364 | INST3(pcmpgtd, "pcmpgtd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than |
| 365 | INST3(pcmpeqw, "pcmpeqw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality |
| 366 | INST3(pcmpgtw, "pcmpgtw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than |
| 367 | INST3(pcmpeqb, "pcmpeqb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality |
| 368 | INST3(pcmpgtb, "pcmpgtb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than |
| 369 | |
| 370 | INST3(pshufd, "pshufd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), INS_FLAGS_None) // Packed shuffle of 32-bit integers |
| 371 | INST3(pshufhw, "pshufhw" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), INS_FLAGS_None) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. |
| 372 | INST3(pshuflw, "pshuflw" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), INS_FLAGS_None) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. |
| 373 | INST3(pextrw, "pextrw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC5), INS_FLAGS_None) // Extract 16-bit value into a r32 with zero extended to 32-bits |
| 374 | INST3(pinsrw, "pinsrw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), INS_Flags_IsDstDstSrcAVXInstruction) // Insert word at index |
| 375 | |
| 376 | INST3(punpckhbw, "punpckhbw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) |
| 377 | INST3(punpcklbw, "punpcklbw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (lo) |
| 378 | INST3(punpckhqdq, "punpckhqdq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (hi) |
| 379 | INST3(punpcklqdq, "punpcklqdq" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (lo) |
| 380 | INST3(punpckhwd, "punpckhwd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (hi) |
| 381 | INST3(punpcklwd, "punpcklwd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (lo) |
| 382 | INST3(unpckhpd, "unpckhpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) |
| 383 | INST3(unpcklpd, "unpcklpd" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) |
| 384 | |
| 385 | INST3(packssdw, "packssdw" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to short with saturation |
| 386 | INST3(packsswb, "packsswb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation |
| 387 | INST3(packuswb, "packuswb" , IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation |
| 388 | |
| 389 | // id nm um mr mi rm flags |
| 390 | INST3(dpps, "dpps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs |
| 391 | INST3(dppd, "dppd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs |
| 392 | INST3(insertps, "insertps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value |
| 393 | INST3(pcmpeqq, "pcmpeqq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality |
| 394 | INST3(pcmpgtq, "pcmpgtq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality |
| 395 | INST3(pmulld, "pmulld" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result |
| 396 | INST3(ptest, "ptest" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x17), INS_FLAGS_None) // Packed logical compare |
| 397 | INST3(phaddd, "phaddd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add |
| 398 | INST3(pabsb, "pabsb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), INS_FLAGS_None) // Packed absolute value of bytes |
| 399 | INST3(pabsw, "pabsw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), INS_FLAGS_None) // Packed absolute value of 16-bit integers |
| 400 | INST3(pabsd, "pabsd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), INS_FLAGS_None) // Packed absolute value of 32-bit integers |
| 401 | INST3(palignr, "palignr" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right |
| 402 | INST3(pmaddubsw, "pmaddubsw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Packed Signed and Unsigned Bytes |
| 403 | INST3(pmulhrsw, "pmulhrsw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply High with Round and Scale |
| 404 | INST3(pshufb, "pshufb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), INS_Flags_IsDstDstSrcAVXInstruction) // Packed Shuffle Bytes |
| 405 | INST3(psignb, "psignb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN |
| 406 | INST3(psignw, "psignw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN |
| 407 | INST3(psignd, "psignd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN |
| 408 | INST3(pminsb, "pminsb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed bytes |
| 409 | INST3(pminsd, "pminsd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit signed integers |
| 410 | INST3(pminuw, "pminuw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 16-bit unsigned integers |
| 411 | INST3(pminud, "pminud" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit unsigned integers |
| 412 | INST3(pmaxsb, "pmaxsb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed bytes |
| 413 | INST3(pmaxsd, "pmaxsd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit signed integers |
| 414 | INST3(pmaxuw, "pmaxuw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 16-bit unsigned integers |
| 415 | INST3(pmaxud, "pmaxud" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit unsigned integers |
| 416 | INST3(pmovsxbw, "pmovsxbw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), INS_FLAGS_None) // Packed sign extend byte to short |
| 417 | INST3(pmovsxbd, "pmovsxbd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), INS_FLAGS_None) // Packed sign extend byte to int |
| 418 | INST3(pmovsxbq, "pmovsxbq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), INS_FLAGS_None) // Packed sign extend byte to long |
| 419 | INST3(pmovsxwd, "pmovsxwd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x23), INS_FLAGS_None) // Packed sign extend short to int |
| 420 | INST3(pmovsxwq, "pmovsxwq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x24), INS_FLAGS_None) // Packed sign extend short to long |
| 421 | INST3(pmovsxdq, "pmovsxdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x25), INS_FLAGS_None) // Packed sign extend int to long |
| 422 | INST3(pmovzxbw, "pmovzxbw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x30), INS_FLAGS_None) // Packed zero extend byte to short |
| 423 | INST3(pmovzxbd, "pmovzxbd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x31), INS_FLAGS_None) // Packed zero extend byte to intg |
| 424 | INST3(pmovzxbq, "pmovzxbq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x32), INS_FLAGS_None) // Packed zero extend byte to lon |
| 425 | INST3(pmovzxwd, "pmovzxwd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), INS_FLAGS_None) // Packed zero extend short to int |
| 426 | INST3(pmovzxwq, "pmovzxwq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), INS_FLAGS_None) // Packed zero extend short to long |
| 427 | INST3(pmovzxdq, "pmovzxdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), INS_FLAGS_None) // Packed zero extend int to long |
| 428 | INST3(packusdw, "packusdw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to unsigned short with saturation |
| 429 | INST3(roundps, "roundps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_FLAGS_None) // Round packed single precision floating-point values |
| 430 | INST3(roundss, "roundss" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single precision floating-point values |
| 431 | INST3(roundpd, "roundpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_FLAGS_None) // Round packed double precision floating-point values |
| 432 | INST3(roundsd, "roundsd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double precision floating-point values |
| 433 | INST3(pmuldq, "pmuldq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit signed integers and store 64-bit result |
| 434 | INST3(blendps, "blendps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Single Precision Floating-Point Values |
| 435 | INST3(blendvps, "blendvps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_FLAGS_None) // Variable Blend Packed Singles |
| 436 | INST3(blendpd, "blendpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values |
| 437 | INST3(blendvpd, "blendvpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_FLAGS_None) // Variable Blend Packed Doubles |
| 438 | INST3(pblendw, "pblendw" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words |
| 439 | INST3(pblendvb, "pblendvb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_FLAGS_None) // Variable Blend Packed Bytes |
| 440 | INST3(phaddw, "phaddw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers |
| 441 | INST3(phsubw, "phsubw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers |
| 442 | INST3(phsubd, "phsubd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 32-bit integers |
| 443 | INST3(phaddsw, "phaddsw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers with saturation |
| 444 | INST3(phsubsw, "phsubsw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers with saturation |
| 445 | INST3(lddqu, "lddqu" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), INS_FLAGS_None) // Load Unaligned integer |
| 446 | INST3(movntdqa, "movntdqa" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), INS_FLAGS_None) // Load Double Quadword Non-Temporal Aligned Hint |
| 447 | INST3(movddup, "movddup" , IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), INS_FLAGS_None) // Replicate Double FP Values |
| 448 | INST3(movsldup, "movsldup" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), INS_FLAGS_None) // Replicate even-indexed Single FP Values |
| 449 | INST3(movshdup, "movshdup" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), INS_FLAGS_None) // Replicate odd-indexed Single FP Values |
| 450 | INST3(phminposuw, "phminposuw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), INS_FLAGS_None) // Packed Horizontal Word Minimum |
| 451 | INST3(mpsadbw, "mpsadbw" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference |
| 452 | INST3(pinsrb, "pinsrb" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Byte |
| 453 | INST3(pinsrd, "pinsrd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Dword |
| 454 | INST3(pinsrq, "pinsrq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), INS_Flags_IsDstDstSrcAVXInstruction) // Insert Qword |
| 455 | INST3(pextrb, "pextrb" , IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Byte |
| 456 | INST3(pextrd, "pextrd" , IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Dword |
| 457 | INST3(pextrq, "pextrq" , IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Qword |
| 458 | INST3(pextrw_sse41, "pextrw" , IUM_WR, SSE3A(0x15), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Word |
| 459 | INST3(extractps, "extractps" , IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract Packed Floating-Point Values |
| 460 | |
| 461 | //PCLMULQDQ instructions |
| 462 | INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords |
| 463 | |
| 464 | //AES instructions |
| 465 | INST3(aesdec, "aesdec" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow |
| 466 | INST3(aesdeclast, "aesdeclast" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow |
| 467 | INST3(aesenc, "aesenc" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES encryption flow |
| 468 | INST3(aesenclast, "aesenclast" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow |
| 469 | INST3(aesimc, "aesimc" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_FLAGS_None) // Perform the AES InvMixColumn Transformation |
| 470 | INST3(aeskeygenassist, "aeskeygenassist" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_FLAGS_None) // AES Round Key Generation Assist |
| 471 | INST3(LAST_SSE_INSTRUCTION, "LAST_SSE_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 472 | |
| 473 | INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 474 | // AVX only instructions |
| 475 | INST3(vbroadcastss, "broadcastss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x18), INS_FLAGS_None) // Broadcast float value read from memory to entire ymm register |
| 476 | INST3(vbroadcastsd, "broadcastsd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_FLAGS_None) // Broadcast float value read from memory to entire ymm register |
| 477 | INST3(vpbroadcastb, "pbroadcastb" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x78), INS_FLAGS_None) // Broadcast int8 value from reg/memory to entire ymm register |
| 478 | INST3(vpbroadcastw, "pbroadcastw" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x79), INS_FLAGS_None) // Broadcast int16 value from reg/memory to entire ymm register |
| 479 | INST3(vpbroadcastd, "pbroadcastd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x58), INS_FLAGS_None) // Broadcast int32 value from reg/memory to entire ymm register |
| 480 | INST3(vpbroadcastq, "pbroadcastq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_FLAGS_None) // Broadcast int64 value from reg/memory to entire ymm register |
| 481 | INST3(vextractf128, "extractf128" , IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract 128-bit packed floating point values |
| 482 | INST3(vextracti128, "extracti128" , IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, INS_FLAGS_None) // Extract 128-bit packed integer values |
| 483 | INST3(vinsertf128, "insertf128" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed floating point values |
| 484 | INST3(vinserti128, "inserti128" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed integer values |
| 485 | INST3(vzeroupper, "zeroupper" , IUM_WR, 0xC577F8, BAD_CODE, BAD_CODE, INS_FLAGS_None) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix) |
| 486 | INST3(vperm2i128, "perm2i128" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), INS_Flags_IsDstDstSrcAVXInstruction) // Permute 128-bit halves of input register |
| 487 | INST3(vpermq, "permq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), INS_FLAGS_None) // Permute 64-bit of input register |
| 488 | INST3(vpblendd, "pblendd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed DWORDs |
| 489 | INST3(vblendvps, "blendvps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Singles |
| 490 | INST3(vblendvpd, "blendvpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Doubles |
| 491 | INST3(vpblendvb, "pblendvb" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Bytes |
| 492 | INST3(vtestps, "testps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0E), INS_FLAGS_None) // Packed Bit Test |
| 493 | INST3(vtestpd, "testpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0F), INS_FLAGS_None) // Packed Bit Test |
| 494 | INST3(vpsrlvd, "psrlvd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical |
| 495 | INST3(vpsrlvq, "psrlvq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical |
| 496 | INST3(vpsravd, "psravd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic |
| 497 | INST3(vpsllvd, "psllvd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical |
| 498 | INST3(vpsllvq, "psllvq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical |
| 499 | INST3(vpermilps, "permilps" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), INS_FLAGS_None) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values |
| 500 | INST3(vpermilpd, "permilpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_FLAGS_None) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values |
| 501 | INST3(vpermilpsvar, "permilpsvar" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values |
| 502 | INST3(vpermilpdvar, "permilpdvar" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values |
| 503 | INST3(vperm2f128, "perm2f128" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values |
| 504 | INST3(vpermpd, "permpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x01), INS_FLAGS_None) // Permute Double-Precision Floating-Point Values |
| 505 | INST3(vpermd, "permd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Packed Doublewords Elements |
| 506 | INST3(vpermps, "permps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Single-Precision Floating-Point Elements |
| 507 | INST3(vbroadcastf128, "broadcastf128" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_FLAGS_None) // Broadcast packed float values read from memory to entire ymm register |
| 508 | INST3(vbroadcasti128, "broadcasti128" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_FLAGS_None) // Broadcast packed integer values read from memory to entire ymm register |
| 509 | INST3(vmaskmovps, "maskmovps" , IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores |
| 510 | INST3(vmaskmovpd, "maskmovpd" , IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores |
| 511 | INST3(vpmaskmovd, "pmaskmovd" , IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Dword Loads and Stores |
| 512 | INST3(vpmaskmovq, "pmaskmovq" , IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Qword Loads and Stores |
| 513 | INST3(vpgatherdd, "pgatherdd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword |
| 514 | INST3(vpgatherqd, "pgatherqd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword |
| 515 | INST3(vpgatherdq, "pgatherdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices |
| 516 | INST3(vpgatherqq, "pgatherqq" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices |
| 517 | INST3(vgatherdps, "gatherdps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices |
| 518 | INST3(vgatherqps, "gatherqps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices |
| 519 | INST3(vgatherdpd, "gatherdpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices |
| 520 | INST3(vgatherqpd, "gatherqpd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices |
| 521 | |
| 522 | INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 523 | // id nm um mr mi rm flags |
| 524 | INST3(vfmadd132pd, "fmadd132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values |
| 525 | INST3(vfmadd213pd, "fmadd213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 526 | INST3(vfmadd231pd, "fmadd231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 527 | INST3(vfmadd132ps, "fmadd132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x98), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values |
| 528 | INST3(vfmadd213ps, "fmadd213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 529 | INST3(vfmadd231ps, "fmadd231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 530 | INST3(vfmadd132sd, "fmadd132sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values |
| 531 | INST3(vfmadd213sd, "fmadd213sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 532 | INST3(vfmadd231sd, "fmadd231sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 533 | INST3(vfmadd132ss, "fmadd132ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x99), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values |
| 534 | INST3(vfmadd213ss, "fmadd213ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 535 | INST3(vfmadd231ss, "fmadd231ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 536 | INST3(vfmaddsub132pd, "fmaddsub132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values |
| 537 | INST3(vfmaddsub213pd, "fmaddsub213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 538 | INST3(vfmaddsub231pd, "fmaddsub231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 539 | INST3(vfmaddsub132ps, "fmaddsub132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x96), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values |
| 540 | INST3(vfmaddsub213ps, "fmaddsub213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 541 | INST3(vfmaddsub231ps, "fmaddsub231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 542 | INST3(vfmsubadd132pd, "fmsubadd132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values |
| 543 | INST3(vfmsubadd213pd, "fmsubadd213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 544 | INST3(vfmsubadd231pd, "fmsubadd231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 545 | INST3(vfmsubadd132ps, "fmsubadd132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x97), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values |
| 546 | INST3(vfmsubadd213ps, "fmsubadd213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 547 | INST3(vfmsubadd231ps, "fmsubadd231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 548 | INST3(vfmsub132pd, "fmsub132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values |
| 549 | INST3(vfmsub213pd, "fmsub213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 550 | INST3(vfmsub231pd, "fmsub231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 551 | INST3(vfmsub132ps, "fmsub132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values |
| 552 | INST3(vfmsub213ps, "fmsub213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 553 | INST3(vfmsub231ps, "fmsub231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 554 | INST3(vfmsub132sd, "fmsub132sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values |
| 555 | INST3(vfmsub213sd, "fmsub213sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 556 | INST3(vfmsub231sd, "fmsub231sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 557 | INST3(vfmsub132ss, "fmsub132ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values |
| 558 | INST3(vfmsub213ss, "fmsub213ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 559 | INST3(vfmsub231ss, "fmsub231ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 560 | INST3(vfnmadd132pd, "fmnadd132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values |
| 561 | INST3(vfnmadd213pd, "fmnadd213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 562 | INST3(vfnmadd231pd, "fmnadd231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 563 | INST3(vfnmadd132ps, "fmnadd132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values |
| 564 | INST3(vfnmadd213ps, "fmnadd213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 565 | INST3(vfnmadd231ps, "fmnadd231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 566 | INST3(vfnmadd132sd, "fmnadd132sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values |
| 567 | INST3(vfnmadd213sd, "fmnadd213sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 568 | INST3(vfnmadd231sd, "fmnadd231sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 569 | INST3(vfnmadd132ss, "fmnadd132ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values |
| 570 | INST3(vfnmadd213ss, "fmnadd213ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 571 | INST3(vfnmadd231ss, "fmnadd231ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 572 | INST3(vfnmsub132pd, "fmnsub132pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values |
| 573 | INST3(vfnmsub213pd, "fmnsub213pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 574 | INST3(vfnmsub231pd, "fmnsub231pd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 575 | INST3(vfnmsub132ps, "fmnsub132ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values |
| 576 | INST3(vfnmsub213ps, "fmnsub213ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 577 | INST3(vfnmsub231ps, "fmnsub231ps" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 578 | INST3(vfnmsub132sd, "fmnsub132sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values |
| 579 | INST3(vfnmsub213sd, "fmnsub213sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 580 | INST3(vfnmsub231sd, "fmnsub231sd" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 581 | INST3(vfnmsub132ss, "fmnsub132ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values |
| 582 | INST3(vfnmsub213ss, "fmnsub213ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 583 | INST3(vfnmsub231ss, "fmnsub231ss" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) // |
| 584 | INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 585 | |
| 586 | // BMI1 |
| 587 | INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 588 | INST3(andn, "andn" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT |
| 589 | INST3(blsi, "blsi" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit |
| 590 | INST3(blsmsk, "blsmsk" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit |
| 591 | INST3(blsr, "blsr" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit |
| 592 | INST3(bextr, "bextr" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract |
| 593 | |
| 594 | // BMI2 |
| 595 | INST3(pdep, "pdep" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit |
| 596 | INST3(pext, "pext" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract |
| 597 | INST3(bzhi, "bzhi" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Zero High Bits Starting with Specified Bit Position |
| 598 | INST3(mulx, "mulx" , IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF6), INS_Flags_IsDstDstSrcAVXInstruction) // Unsigned Multiply Without Affecting Flags |
| 599 | |
| 600 | INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 601 | |
| 602 | INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION" , IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) |
| 603 | |
| 604 | // Scalar instructions in SSE4.2 |
| 605 | INST3(crc32, "crc32" , IUM_WR, BAD_CODE, BAD_CODE, PACK4(0xF2, 0x0F, 0x38, 0xF0), INS_FLAGS_None) |
| 606 | |
| 607 | // BMI1 |
| 608 | INST3(tzcnt, "tzcnt" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBC), INS_FLAGS_None) // Count the Number of Trailing Zero Bits |
| 609 | |
| 610 | // LZCNT |
| 611 | INST3(lzcnt, "lzcnt" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), INS_FLAGS_None) |
| 612 | |
| 613 | // POPCNT |
| 614 | INST3(popcnt, "popcnt" , IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), INS_FLAGS_None) |
| 615 | |
| 616 | // id nm um mr mi flags |
| 617 | INST2(ret, "ret" , IUM_RD, 0x0000C3, 0x0000C2, INS_FLAGS_None) |
| 618 | INST2(loop, "loop" , IUM_RD, BAD_CODE, 0x0000E2, INS_FLAGS_None) |
| 619 | INST2(call, "call" , IUM_RD, 0x0010FF, 0x0000E8, INS_FLAGS_WritesFlags) |
| 620 | |
| 621 | INST2(rol, "rol" , IUM_RW, 0x0000D2, BAD_CODE, INS_FLAGS_WritesFlags) |
| 622 | INST2(rol_1, "rol" , IUM_RW, 0x0000D0, 0x0000D0, INS_FLAGS_WritesFlags) |
| 623 | INST2(rol_N, "rol" , IUM_RW, 0x0000C0, 0x0000C0, INS_FLAGS_WritesFlags) |
| 624 | INST2(ror, "ror" , IUM_RW, 0x0008D2, BAD_CODE, INS_FLAGS_WritesFlags) |
| 625 | INST2(ror_1, "ror" , IUM_RW, 0x0008D0, 0x0008D0, INS_FLAGS_WritesFlags) |
| 626 | INST2(ror_N, "ror" , IUM_RW, 0x0008C0, 0x0008C0, INS_FLAGS_WritesFlags) |
| 627 | |
| 628 | INST2(rcl, "rcl" , IUM_RW, 0x0010D2, BAD_CODE, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
| 629 | INST2(rcl_1, "rcl" , IUM_RW, 0x0010D0, 0x0010D0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
| 630 | INST2(rcl_N, "rcl" , IUM_RW, 0x0010C0, 0x0010C0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
| 631 | INST2(rcr, "rcr" , IUM_RW, 0x0018D2, BAD_CODE, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
| 632 | INST2(rcr_1, "rcr" , IUM_RW, 0x0018D0, 0x0018D0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
| 633 | INST2(rcr_N, "rcr" , IUM_RW, 0x0018C0, 0x0018C0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) |
| 634 | INST2(shl, "shl" , IUM_RW, 0x0020D2, BAD_CODE, INS_FLAGS_WritesFlags) |
| 635 | INST2(shl_1, "shl" , IUM_RW, 0x0020D0, 0x0020D0, INS_FLAGS_WritesFlags) |
| 636 | INST2(shl_N, "shl" , IUM_RW, 0x0020C0, 0x0020C0, INS_FLAGS_WritesFlags) |
| 637 | INST2(shr, "shr" , IUM_RW, 0x0028D2, BAD_CODE, INS_FLAGS_WritesFlags) |
| 638 | INST2(shr_1, "shr" , IUM_RW, 0x0028D0, 0x0028D0, INS_FLAGS_WritesFlags) |
| 639 | INST2(shr_N, "shr" , IUM_RW, 0x0028C0, 0x0028C0, INS_FLAGS_WritesFlags) |
| 640 | INST2(sar, "sar" , IUM_RW, 0x0038D2, BAD_CODE, INS_FLAGS_WritesFlags) |
| 641 | INST2(sar_1, "sar" , IUM_RW, 0x0038D0, 0x0038D0, INS_FLAGS_WritesFlags) |
| 642 | INST2(sar_N, "sar" , IUM_RW, 0x0038C0, 0x0038C0, INS_FLAGS_WritesFlags) |
| 643 | |
| 644 | |
| 645 | // id nm um mr flags |
| 646 | INST1(r_movsb, "rep movsb" , IUM_RD, 0x00A4F3, INS_FLAGS_None) |
| 647 | INST1(r_movsd, "rep movsd" , IUM_RD, 0x00A5F3, INS_FLAGS_None) |
| 648 | #if defined(_TARGET_AMD64_) |
| 649 | INST1(r_movsq, "rep movsq" , IUM_RD, 0xF3A548, INS_FLAGS_None) |
| 650 | #endif // defined(_TARGET_AMD64_) |
| 651 | INST1(movsb, "movsb" , IUM_RD, 0x0000A4, INS_FLAGS_None) |
| 652 | INST1(movsd, "movsd" , IUM_RD, 0x0000A5, INS_FLAGS_None) |
| 653 | #if defined(_TARGET_AMD64_) |
| 654 | INST1(movsq, "movsq" , IUM_RD, 0x00A548, INS_FLAGS_None) |
| 655 | #endif // defined(_TARGET_AMD64_) |
| 656 | |
| 657 | INST1(r_stosb, "rep stosb" , IUM_RD, 0x00AAF3, INS_FLAGS_None) |
| 658 | INST1(r_stosd, "rep stosd" , IUM_RD, 0x00ABF3, INS_FLAGS_None) |
| 659 | #if defined(_TARGET_AMD64_) |
| 660 | INST1(r_stosq, "rep stosq" , IUM_RD, 0xF3AB48, INS_FLAGS_None) |
| 661 | #endif // defined(_TARGET_AMD64_) |
| 662 | INST1(stosb, "stosb" , IUM_RD, 0x0000AA, INS_FLAGS_None) |
| 663 | INST1(stosd, "stosd" , IUM_RD, 0x0000AB, INS_FLAGS_None) |
| 664 | #if defined(_TARGET_AMD64_) |
| 665 | INST1(stosq, "stosq" , IUM_RD, 0x00AB48, INS_FLAGS_None) |
| 666 | #endif // defined(_TARGET_AMD64_) |
| 667 | |
| 668 | INST1(int3, "int3" , IUM_RD, 0x0000CC, INS_FLAGS_None) |
| 669 | INST1(nop, "nop" , IUM_RD, 0x000090, INS_FLAGS_None) |
| 670 | INST1(lock, "lock" , IUM_RD, 0x0000F0, INS_FLAGS_None) |
| 671 | INST1(leave, "leave" , IUM_RD, 0x0000C9, INS_FLAGS_None) |
| 672 | |
| 673 | |
| 674 | INST1(neg, "neg" , IUM_RW, 0x0018F6, INS_FLAGS_WritesFlags) |
| 675 | INST1(not, "not" , IUM_RW, 0x0010F6, INS_FLAGS_WritesFlags) |
| 676 | |
| 677 | INST1(cdq, "cdq" , IUM_RD, 0x000099, INS_FLAGS_WritesFlags) |
| 678 | INST1(idiv, "idiv" , IUM_RD, 0x0038F6, INS_FLAGS_WritesFlags) |
| 679 | INST1(imulEAX, "imul" , IUM_RD, 0x0028F6, INS_FLAGS_WritesFlags) // edx:eax = eax*op1 |
| 680 | INST1(div, "div" , IUM_RD, 0x0030F6, INS_FLAGS_WritesFlags) |
| 681 | INST1(mulEAX, "mul" , IUM_RD, 0x0020F6, INS_FLAGS_WritesFlags) |
| 682 | |
| 683 | INST1(sahf, "sahf" , IUM_RD, 0x00009E, INS_FLAGS_WritesFlags) |
| 684 | |
| 685 | INST1(xadd, "xadd" , IUM_RW, 0x0F00C0, INS_FLAGS_WritesFlags) |
| 686 | INST1(cmpxchg, "cmpxchg" , IUM_RW, 0x0F00B0, INS_FLAGS_WritesFlags) |
| 687 | |
| 688 | INST1(shld, "shld" , IUM_RW, 0x0F00A4, INS_FLAGS_WritesFlags) |
| 689 | INST1(shrd, "shrd" , IUM_RW, 0x0F00AC, INS_FLAGS_WritesFlags) |
| 690 | |
| 691 | // For RyuJIT/x86, we follow the x86 calling convention that requires |
| 692 | // us to return floating point value on the x87 FP stack, so we need |
| 693 | // these instructions regardless of whether we're using full stack fp. |
| 694 | #ifdef _TARGET_X86_ |
| 695 | INST1(fld, "fld" , IUM_WR, 0x0000D9, INS_FLAGS_x87Instr) |
| 696 | INST1(fstp, "fstp" , IUM_WR, 0x0018D9, INS_FLAGS_x87Instr) |
| 697 | #endif // _TARGET_X86 |
| 698 | |
| 699 | INST1(seto, "seto" , IUM_WR, 0x0F0090, INS_FLAGS_ReadsFlags) |
| 700 | INST1(setno, "setno" , IUM_WR, 0x0F0091, INS_FLAGS_ReadsFlags) |
| 701 | INST1(setb, "setb" , IUM_WR, 0x0F0092, INS_FLAGS_ReadsFlags) |
| 702 | INST1(setae, "setae" , IUM_WR, 0x0F0093, INS_FLAGS_ReadsFlags) |
| 703 | INST1(sete, "sete" , IUM_WR, 0x0F0094, INS_FLAGS_ReadsFlags) |
| 704 | INST1(setne, "setne" , IUM_WR, 0x0F0095, INS_FLAGS_ReadsFlags) |
| 705 | INST1(setbe, "setbe" , IUM_WR, 0x0F0096, INS_FLAGS_ReadsFlags) |
| 706 | INST1(seta, "seta" , IUM_WR, 0x0F0097, INS_FLAGS_ReadsFlags) |
| 707 | INST1(sets, "sets" , IUM_WR, 0x0F0098, INS_FLAGS_ReadsFlags) |
| 708 | INST1(setns, "setns" , IUM_WR, 0x0F0099, INS_FLAGS_ReadsFlags) |
| 709 | INST1(setpe, "setpe" , IUM_WR, 0x0F009A, INS_FLAGS_ReadsFlags) |
| 710 | INST1(setpo, "setpo" , IUM_WR, 0x0F009B, INS_FLAGS_ReadsFlags) |
| 711 | INST1(setl, "setl" , IUM_WR, 0x0F009C, INS_FLAGS_ReadsFlags) |
| 712 | INST1(setge, "setge" , IUM_WR, 0x0F009D, INS_FLAGS_ReadsFlags) |
| 713 | INST1(setle, "setle" , IUM_WR, 0x0F009E, INS_FLAGS_ReadsFlags) |
| 714 | INST1(setg, "setg" , IUM_WR, 0x0F009F, INS_FLAGS_ReadsFlags) |
| 715 | |
| 716 | #ifdef _TARGET_AMD64_ |
| 717 | // A jump with rex prefix. This is used for register indirect |
| 718 | // tail calls. |
| 719 | INST1(rex_jmp, "rex.jmp" , IUM_RD, 0x0020FE, INS_FLAGS_None) |
| 720 | #endif |
| 721 | |
| 722 | INST1(i_jmp, "jmp" , IUM_RD, 0x0020FE, INS_FLAGS_None) |
| 723 | |
| 724 | INST0(jmp, "jmp" , IUM_RD, 0x0000EB, INS_FLAGS_None) |
| 725 | INST0(jo, "jo" , IUM_RD, 0x000070, INS_FLAGS_ReadsFlags) |
| 726 | INST0(jno, "jno" , IUM_RD, 0x000071, INS_FLAGS_ReadsFlags) |
| 727 | INST0(jb, "jb" , IUM_RD, 0x000072, INS_FLAGS_ReadsFlags) |
| 728 | INST0(jae, "jae" , IUM_RD, 0x000073, INS_FLAGS_ReadsFlags) |
| 729 | INST0(je, "je" , IUM_RD, 0x000074, INS_FLAGS_ReadsFlags) |
| 730 | INST0(jne, "jne" , IUM_RD, 0x000075, INS_FLAGS_ReadsFlags) |
| 731 | INST0(jbe, "jbe" , IUM_RD, 0x000076, INS_FLAGS_ReadsFlags) |
| 732 | INST0(ja, "ja" , IUM_RD, 0x000077, INS_FLAGS_ReadsFlags) |
| 733 | INST0(js, "js" , IUM_RD, 0x000078, INS_FLAGS_ReadsFlags) |
| 734 | INST0(jns, "jns" , IUM_RD, 0x000079, INS_FLAGS_ReadsFlags) |
| 735 | INST0(jpe, "jpe" , IUM_RD, 0x00007A, INS_FLAGS_ReadsFlags) |
| 736 | INST0(jpo, "jpo" , IUM_RD, 0x00007B, INS_FLAGS_ReadsFlags) |
| 737 | INST0(jl, "jl" , IUM_RD, 0x00007C, INS_FLAGS_ReadsFlags) |
| 738 | INST0(jge, "jge" , IUM_RD, 0x00007D, INS_FLAGS_ReadsFlags) |
| 739 | INST0(jle, "jle" , IUM_RD, 0x00007E, INS_FLAGS_ReadsFlags) |
| 740 | INST0(jg, "jg" , IUM_RD, 0x00007F, INS_FLAGS_ReadsFlags) |
| 741 | |
| 742 | INST0(l_jmp, "jmp" , IUM_RD, 0x0000E9, INS_FLAGS_None) |
| 743 | INST0(l_jo, "jo" , IUM_RD, 0x00800F, INS_FLAGS_ReadsFlags) |
| 744 | INST0(l_jno, "jno" , IUM_RD, 0x00810F, INS_FLAGS_ReadsFlags) |
| 745 | INST0(l_jb, "jb" , IUM_RD, 0x00820F, INS_FLAGS_ReadsFlags) |
| 746 | INST0(l_jae, "jae" , IUM_RD, 0x00830F, INS_FLAGS_ReadsFlags) |
| 747 | INST0(l_je, "je" , IUM_RD, 0x00840F, INS_FLAGS_ReadsFlags) |
| 748 | INST0(l_jne, "jne" , IUM_RD, 0x00850F, INS_FLAGS_ReadsFlags) |
| 749 | INST0(l_jbe, "jbe" , IUM_RD, 0x00860F, INS_FLAGS_ReadsFlags) |
| 750 | INST0(l_ja, "ja" , IUM_RD, 0x00870F, INS_FLAGS_ReadsFlags) |
| 751 | INST0(l_js, "js" , IUM_RD, 0x00880F, INS_FLAGS_ReadsFlags) |
| 752 | INST0(l_jns, "jns" , IUM_RD, 0x00890F, INS_FLAGS_ReadsFlags) |
| 753 | INST0(l_jpe, "jpe" , IUM_RD, 0x008A0F, INS_FLAGS_ReadsFlags) |
| 754 | INST0(l_jpo, "jpo" , IUM_RD, 0x008B0F, INS_FLAGS_ReadsFlags) |
| 755 | INST0(l_jl, "jl" , IUM_RD, 0x008C0F, INS_FLAGS_ReadsFlags) |
| 756 | INST0(l_jge, "jge" , IUM_RD, 0x008D0F, INS_FLAGS_ReadsFlags) |
| 757 | INST0(l_jle, "jle" , IUM_RD, 0x008E0F, INS_FLAGS_ReadsFlags) |
| 758 | INST0(l_jg, "jg" , IUM_RD, 0x008F0F, INS_FLAGS_ReadsFlags) |
| 759 | |
| 760 | INST0(align, "align" , IUM_RD, BAD_CODE, INS_FLAGS_None) |
| 761 | |
| 762 | /*****************************************************************************/ |
| 763 | #undef INST0 |
| 764 | #undef INST1 |
| 765 | #undef INST2 |
| 766 | #undef INST3 |
| 767 | #undef INST4 |
| 768 | #undef INST5 |
| 769 | /*****************************************************************************/ |
| 770 | |
| 771 | // clang-format on |
| 772 | |