1/*******************************************************************************
2* Copyright 2016-2019 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17/*******************************************************************************
18* Copyright (c) 2007 MITSUNARI Shigeo
19* All rights reserved.
20*
21* Redistribution and use in source and binary forms, with or without
22* modification, are permitted provided that the following conditions are met:
23*
24* Redistributions of source code must retain the above copyright notice, this
25* list of conditions and the following disclaimer.
26* Redistributions in binary form must reproduce the above copyright notice,
27* this list of conditions and the following disclaimer in the documentation
28* and/or other materials provided with the distribution.
29* Neither the name of the copyright owner nor the names of its contributors may
30* be used to endorse or promote products derived from this software without
31* specific prior written permission.
32*
33* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
43* THE POSSIBILITY OF SUCH DAMAGE.
44*******************************************************************************/
45
46#pragma once
47#ifndef XBYAK_XBYAK_H_
48#define XBYAK_XBYAK_H_
49/*!
50 @file xbyak.h
51 @brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
52 @author herumi
53 @url https://github.com/herumi/xbyak
54 @note modified new BSD license
55 http://opensource.org/licenses/BSD-3-Clause
56*/
57#ifndef XBYAK_NO_OP_NAMES
58 #if not +0 // trick to detect whether 'not' is operator or not
59 #error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()."
60 #endif
61#endif
62
63#include <stdio.h> // for debug print
64#include <assert.h>
65#include <list>
66#include <string>
67#include <algorithm>
68#ifndef NDEBUG
69#include <iostream>
70#endif
71
72// #define XBYAK_DISABLE_AVX512
73
74//#define XBYAK_USE_MMAP_ALLOCATOR
75#if !defined(__GNUC__) || defined(__MINGW32__)
76 #undef XBYAK_USE_MMAP_ALLOCATOR
77#endif
78
79#ifdef __GNUC__
80 #define XBYAK_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
81#else
82 #define XBYAK_GNUC_PREREQ(major, minor) 0
83#endif
84
85// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
86#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
87 ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
88 #include <unordered_set>
89 #define XBYAK_STD_UNORDERED_SET std::unordered_set
90 #include <unordered_map>
91 #define XBYAK_STD_UNORDERED_MAP std::unordered_map
92 #define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
93
94/*
95 Clang/llvm-gcc and ICC-EDG in 'GCC-mode' always claim to be GCC 4.2, using
96 libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
97*/
98#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
99 #include <tr1/unordered_set>
100 #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
101 #include <tr1/unordered_map>
102 #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
103 #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
104
105#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
106 #include <unordered_set>
107 #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
108 #include <unordered_map>
109 #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
110 #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
111
112#else
113 #include <set>
114 #define XBYAK_STD_UNORDERED_SET std::set
115 #include <map>
116 #define XBYAK_STD_UNORDERED_MAP std::map
117 #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
118#endif
119#ifdef _WIN32
120 #include <winsock2.h>
121 #include <windows.h>
122 #include <malloc.h>
123#elif defined(__GNUC__)
124 #include <unistd.h>
125 #include <sys/mman.h>
126 #include <stdlib.h>
127#endif
128#if !defined(_MSC_VER) || (_MSC_VER >= 1600)
129 #include <stdint.h>
130#endif
131
132#if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__))
133 #define XBYAK64_WIN
134#elif defined(__x86_64__)
135 #define XBYAK64_GCC
136#endif
137#if !defined(XBYAK64) && !defined(XBYAK32)
138 #if defined(XBYAK64_GCC) || defined(XBYAK64_WIN)
139 #define XBYAK64
140 #else
141 #define XBYAK32
142 #endif
143#endif
144
145#if (__cplusplus >= 201103) || (_MSC_VER >= 1800)
146 #define XBYAK_VARIADIC_TEMPLATE
147#endif
148
149#ifdef _MSC_VER
150 #pragma warning(push)
151 #pragma warning(disable : 4514) /* remove inline function */
152 #pragma warning(disable : 4786) /* identifier is too long */
153 #pragma warning(disable : 4503) /* name is too long */
154 #pragma warning(disable : 4127) /* constant expresison */
155#endif
156
157namespace Xbyak {
158
159enum {
160 DEFAULT_MAX_CODE_SIZE = 4096,
161 VERSION = 0x5760 /* 0xABCD = A.BC(D) */
162};
163
164#ifndef MIE_INTEGER_TYPE_DEFINED
165#define MIE_INTEGER_TYPE_DEFINED
166#ifdef _MSC_VER
167 typedef unsigned __int64 uint64;
168 typedef __int64 sint64;
169#else
170 typedef uint64_t uint64;
171 typedef int64_t sint64;
172#endif
173typedef unsigned int uint32;
174typedef unsigned short uint16;
175typedef unsigned char uint8;
176#endif
177
178#ifndef MIE_ALIGN
179 #ifdef _MSC_VER
180 #define MIE_ALIGN(x) __declspec(align(x))
181 #else
182 #define MIE_ALIGN(x) __attribute__((aligned(x)))
183 #endif
184#endif
185#ifndef MIE_PACK // for shufps
186 #define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w))
187#endif
188
189enum {
190 ERR_NONE = 0,
191 ERR_BAD_ADDRESSING,
192 ERR_CODE_IS_TOO_BIG,
193 ERR_BAD_SCALE,
194 ERR_ESP_CANT_BE_INDEX,
195 ERR_BAD_COMBINATION,
196 ERR_BAD_SIZE_OF_REGISTER,
197 ERR_IMM_IS_TOO_BIG,
198 ERR_BAD_ALIGN,
199 ERR_LABEL_IS_REDEFINED,
200 ERR_LABEL_IS_TOO_FAR,
201 ERR_LABEL_IS_NOT_FOUND,
202 ERR_CODE_ISNOT_COPYABLE,
203 ERR_BAD_PARAMETER,
204 ERR_CANT_PROTECT,
205 ERR_CANT_USE_64BIT_DISP,
206 ERR_OFFSET_IS_TOO_BIG,
207 ERR_MEM_SIZE_IS_NOT_SPECIFIED,
208 ERR_BAD_MEM_SIZE,
209 ERR_BAD_ST_COMBINATION,
210 ERR_OVER_LOCAL_LABEL, // not used
211 ERR_UNDER_LOCAL_LABEL,
212 ERR_CANT_ALLOC,
213 ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW,
214 ERR_BAD_PROTECT_MODE,
215 ERR_BAD_PNUM,
216 ERR_BAD_TNUM,
217 ERR_BAD_VSIB_ADDRESSING,
218 ERR_CANT_CONVERT,
219 ERR_LABEL_ISNOT_SET_BY_L,
220 ERR_LABEL_IS_ALREADY_SET_BY_L,
221 ERR_BAD_LABEL_STR,
222 ERR_MUNMAP,
223 ERR_OPMASK_IS_ALREADY_SET,
224 ERR_ROUNDING_IS_ALREADY_SET,
225 ERR_K0_IS_INVALID,
226 ERR_EVEX_IS_INVALID,
227 ERR_SAE_IS_INVALID,
228 ERR_ER_IS_INVALID,
229 ERR_INVALID_BROADCAST,
230 ERR_INVALID_OPMASK_WITH_MEMORY,
231 ERR_INVALID_ZERO,
232 ERR_INVALID_RIP_IN_AUTO_GROW,
233 ERR_INVALID_MIB_ADDRESS,
234 ERR_INTERNAL,
235 ERR_X2APIC_IS_NOT_SUPPORTED
236};
237
238class Error : public std::exception {
239 int err_;
240public:
241 explicit Error(int err) : err_(err)
242 {
243 if (err_ < 0 || err_ > ERR_INTERNAL) {
244 fprintf(stderr, "bad err=%d in Xbyak::Error\n", err_);
245 //exit(1);
246 }
247 }
248 operator int() const { return err_; }
249 const char *what() const throw()
250 {
251 static const char *errTbl[] = {
252 "none",
253 "bad addressing",
254 "code is too big",
255 "bad scale",
256 "esp can't be index",
257 "bad combination",
258 "bad size of register",
259 "imm is too big",
260 "bad align",
261 "label is redefined",
262 "label is too far",
263 "label is not found",
264 "code is not copyable",
265 "bad parameter",
266 "can't protect",
267 "can't use 64bit disp(use (void*))",
268 "offset is too big",
269 "MEM size is not specified",
270 "bad mem size",
271 "bad st combination",
272 "over local label",
273 "under local label",
274 "can't alloc",
275 "T_SHORT is not supported in AutoGrow",
276 "bad protect mode",
277 "bad pNum",
278 "bad tNum",
279 "bad vsib addressing",
280 "can't convert",
281 "label is not set by L()",
282 "label is already set by L()",
283 "bad label string",
284 "err munmap",
285 "opmask is already set",
286 "rounding is already set",
287 "k0 is invalid",
288 "evex is invalid",
289 "sae(suppress all exceptions) is invalid",
290 "er(embedded rounding) is invalid",
291 "invalid broadcast",
292 "invalid opmask with memory",
293 "invalid zero",
294 "invalid rip in AutoGrow",
295 "invalid mib address",
296 "internal error",
297 "x2APIC is not supported"
298 };
299 assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
300 return errTbl[err_];
301 }
302};
303
304inline const char *ConvertErrorToString(const Error& err)
305{
306 return err.what();
307}
308
309inline void *AlignedMalloc(size_t size, size_t alignment)
310{
311#ifdef __MINGW32__
312 return __mingw_aligned_malloc(size, alignment);
313#elif defined(_WIN32)
314 return _aligned_malloc(size, alignment);
315#else
316 void *p;
317 int ret = posix_memalign(&p, alignment, size);
318 return (ret == 0) ? p : 0;
319#endif
320}
321
322inline void AlignedFree(void *p)
323{
324#ifdef __MINGW32__
325 __mingw_aligned_free(p);
326#elif defined(_MSC_VER)
327 _aligned_free(p);
328#else
329 free(p);
330#endif
331}
332
333template<class To, class From>
334inline const To CastTo(From p) throw()
335{
336 return (const To)(size_t)(p);
337}
338namespace inner {
339
340static const size_t ALIGN_PAGE_SIZE = 4096;
341
342inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
343inline bool IsInInt32(uint64 x) { return ~uint64(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; }
344
345inline uint32 VerifyInInt32(uint64 x)
346{
347#ifdef XBYAK64
348 if (!IsInInt32(x)) throw Error(ERR_OFFSET_IS_TOO_BIG);
349#endif
350 return static_cast<uint32>(x);
351}
352
353enum LabelMode {
354 LasIs, // as is
355 Labs, // absolute
356 LaddTop // (addr + top) for mov(reg, label) with AutoGrow
357};
358
359} // inner
360
361/*
362 custom allocator
363*/
364struct Allocator {
365 virtual uint8 *alloc(size_t size) { return reinterpret_cast<uint8*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
366 virtual void free(uint8 *p) { AlignedFree(p); }
367 virtual ~Allocator() {}
368 /* override to return false if you call protect() manually */
369 virtual bool useProtect() const { return true; }
370};
371
372#ifdef XBYAK_USE_MMAP_ALLOCATOR
373class MmapAllocator : Allocator {
374 typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, size_t> SizeList;
375 SizeList sizeList_;
376public:
377 uint8 *alloc(size_t size)
378 {
379 const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
380 size = (size + alignedSizeM1) & ~alignedSizeM1;
381#ifdef MAP_ANONYMOUS
382 const int mode = MAP_PRIVATE | MAP_ANONYMOUS;
383#elif defined(MAP_ANON)
384 const int mode = MAP_PRIVATE | MAP_ANON;
385#else
386 #error "not supported"
387#endif
388 void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, -1, 0);
389 if (p == MAP_FAILED) throw Error(ERR_CANT_ALLOC);
390 assert(p);
391 sizeList_[(uintptr_t)p] = size;
392 return (uint8*)p;
393 }
394 void free(uint8 *p)
395 {
396 if (p == 0) return;
397 SizeList::iterator i = sizeList_.find((uintptr_t)p);
398 if (i == sizeList_.end()) throw Error(ERR_BAD_PARAMETER);
399 if (munmap((void*)i->first, i->second) < 0) throw Error(ERR_MUNMAP);
400 sizeList_.erase(i);
401 }
402};
403#endif
404
405class Address;
406class Reg;
407
408class Operand {
409 static const uint8 EXT8BIT = 0x20;
410 unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil
411 unsigned int kind_:9;
412 unsigned int bit_:10;
413protected:
414 unsigned int zero_:1;
415 unsigned int mask_:3;
416 unsigned int rounding_:3;
417 void setIdx(int idx) { idx_ = idx; }
418public:
419 enum Kind {
420 NONE = 0,
421 MEM = 1 << 0,
422 REG = 1 << 1,
423 MMX = 1 << 2,
424 FPU = 1 << 3,
425 XMM = 1 << 4,
426 YMM = 1 << 5,
427 ZMM = 1 << 6,
428 OPMASK = 1 << 7,
429 BNDREG = 1 << 8
430 };
431 enum Code {
432#ifdef XBYAK64
433 RAX = 0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
434 R8D = 8, R9D, R10D, R11D, R12D, R13D, R14D, R15D,
435 R8W = 8, R9W, R10W, R11W, R12W, R13W, R14W, R15W,
436 R8B = 8, R9B, R10B, R11B, R12B, R13B, R14B, R15B,
437 SPL = 4, BPL, SIL, DIL,
438#endif
439 EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
440 AX = 0, CX, DX, BX, SP, BP, SI, DI,
441 AL = 0, CL, DL, BL, AH, CH, DH, BH
442 };
443 Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { }
444 Operand(int idx, Kind kind, int bit, bool ext8bit = 0)
445 : idx_(static_cast<uint8>(idx | (ext8bit ? EXT8BIT : 0)))
446 , kind_(kind)
447 , bit_(bit)
448 , zero_(0), mask_(0), rounding_(0)
449 {
450 assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two
451 }
452 Kind getKind() const { return static_cast<Kind>(kind_); }
453 int getIdx() const { return idx_ & (EXT8BIT - 1); }
454 bool isNone() const { return kind_ == 0; }
455 bool isMMX() const { return is(MMX); }
456 bool isXMM() const { return is(XMM); }
457 bool isYMM() const { return is(YMM); }
458 bool isZMM() const { return is(ZMM); }
459 bool isXMEM() const { return is(XMM | MEM); }
460 bool isYMEM() const { return is(YMM | MEM); }
461 bool isZMEM() const { return is(ZMM | MEM); }
462 bool isOPMASK() const { return is(OPMASK); }
463 bool isBNDREG() const { return is(BNDREG); }
464 bool isREG(int bit = 0) const { return is(REG, bit); }
465 bool isMEM(int bit = 0) const { return is(MEM, bit); }
466 bool isFPU() const { return is(FPU); }
467 bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; }
468 bool isExtIdx() const { return (getIdx() & 8) != 0; }
469 bool isExtIdx2() const { return (getIdx() & 16) != 0; }
470 bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); }
471 bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); }
472 bool hasZero() const { return zero_; }
473 int getOpmaskIdx() const { return mask_; }
474 int getRounding() const { return rounding_; }
475 void setKind(Kind kind)
476 {
477 if ((kind & (XMM|YMM|ZMM)) == 0) return;
478 kind_ = kind;
479 bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512;
480 }
481 void setBit(int bit) { bit_ = bit; }
482 void setOpmaskIdx(int idx, bool ignore_idx0 = false)
483 {
484 if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
485 if (mask_) throw Error(ERR_OPMASK_IS_ALREADY_SET);
486 mask_ = idx;
487 }
488 void setRounding(int idx)
489 {
490 if (rounding_) throw Error(ERR_ROUNDING_IS_ALREADY_SET);
491 rounding_ = idx;
492 }
493 void setZero() { zero_ = true; }
494 // ah, ch, dh, bh?
495 bool isHigh8bit() const
496 {
497 if (!isBit(8)) return false;
498 if (isExt8bit()) return false;
499 const int idx = getIdx();
500 return AH <= idx && idx <= BH;
501 }
502 // any bit is accetable if bit == 0
503 bool is(int kind, uint32 bit = 0) const
504 {
505 return (kind == 0 || (kind_ & kind)) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16)
506 }
507 bool isBit(uint32 bit) const { return (bit_ & bit) != 0; }
508 uint32 getBit() const { return bit_; }
509 const char *toString() const
510 {
511 const int idx = getIdx();
512 if (kind_ == REG) {
513 if (isExt8bit()) {
514 static const char *tbl[4] = { "spl", "bpl", "sil", "dil" };
515 return tbl[idx - 4];
516 }
517 static const char *tbl[4][16] = {
518 { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
519 { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
520 { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
521 { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
522 };
523 return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx];
524 } else if (isOPMASK()) {
525 static const char *tbl[8] = { "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" };
526 return tbl[idx];
527 } else if (isZMM()) {
528 static const char *tbl[32] = {
529 "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15",
530 "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31"
531 };
532 return tbl[idx];
533 } else if (isYMM()) {
534 static const char *tbl[32] = {
535 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
536 "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25", "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31"
537 };
538 return tbl[idx];
539 } else if (isXMM()) {
540 static const char *tbl[32] = {
541 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
542 "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31"
543 };
544 return tbl[idx];
545 } else if (isMMX()) {
546 static const char *tbl[8] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" };
547 return tbl[idx];
548 } else if (isFPU()) {
549 static const char *tbl[8] = { "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" };
550 return tbl[idx];
551 } else if (isBNDREG()) {
552 static const char *tbl[4] = { "bnd0", "bnd1", "bnd2", "bnd3" };
553 return tbl[idx];
554 }
555 throw Error(ERR_INTERNAL);
556 }
557 bool isEqualIfNotInherited(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_ && zero_ == rhs.zero_ && mask_ == rhs.mask_ && rounding_ == rhs.rounding_; }
558 bool operator==(const Operand& rhs) const;
559 bool operator!=(const Operand& rhs) const { return !operator==(rhs); }
560 const Address& getAddress() const;
561 const Reg& getReg() const;
562};
563
564class Label;
565
566struct Reg8;
567struct Reg16;
568struct Reg32;
569#ifdef XBYAK64
570struct Reg64;
571#endif
572class Reg : public Operand {
573public:
574 Reg() { }
575 Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { }
576 Reg changeBit(int bit) const { return Reg(getIdx(), getKind(), bit, isExt8bit()); }
577 uint8 getRexW() const { return isREG(64) ? 8 : 0; }
578 uint8 getRexR() const { return isExtIdx() ? 4 : 0; }
579 uint8 getRexX() const { return isExtIdx() ? 2 : 0; }
580 uint8 getRexB() const { return isExtIdx() ? 1 : 0; }
581 uint8 getRex(const Reg& base = Reg()) const
582 {
583 uint8 rex = getRexW() | getRexR() | base.getRexW() | base.getRexB();
584 if (rex || isExt8bit() || base.isExt8bit()) rex |= 0x40;
585 return rex;
586 }
587 Reg8 cvt8() const;
588 Reg16 cvt16() const;
589 Reg32 cvt32() const;
590#ifdef XBYAK64
591 Reg64 cvt64() const;
592#endif
593};
594
595inline const Reg& Operand::getReg() const
596{
597 assert(!isMEM());
598 return static_cast<const Reg&>(*this);
599}
600
601struct Reg8 : public Reg {
602 explicit Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { }
603};
604
605struct Reg16 : public Reg {
606 explicit Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { }
607};
608
609struct Mmx : public Reg {
610 explicit Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { }
611};
612
613struct EvexModifierRounding {
614 enum {
615 T_RN_SAE = 1,
616 T_RD_SAE = 2,
617 T_RU_SAE = 3,
618 T_RZ_SAE = 4,
619 T_SAE = 5
620 };
621 explicit EvexModifierRounding(int rounding) : rounding(rounding) {}
622 int rounding;
623};
624struct EvexModifierZero{EvexModifierZero() {}};
625
626struct Xmm : public Mmx {
627 explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
628 Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { }
629 Xmm operator|(const EvexModifierRounding& emr) const { Xmm r(*this); r.setRounding(emr.rounding); return r; }
630 Xmm copyAndSetIdx(int idx) const { Xmm ret(*this); ret.setIdx(idx); return ret; }
631 Xmm copyAndSetKind(Operand::Kind kind) const { Xmm ret(*this); ret.setKind(kind); return ret; }
632};
633
634struct Ymm : public Xmm {
635 explicit Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { }
636 Ymm operator|(const EvexModifierRounding& emr) const { Ymm r(*this); r.setRounding(emr.rounding); return r; }
637};
638
639struct Zmm : public Ymm {
640 explicit Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { }
641 Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; }
642};
643
644struct Opmask : public Reg {
645 explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
646};
647
648struct BoundsReg : public Reg {
649 explicit BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {}
650};
651
652template<class T>T operator|(const T& x, const Opmask& k) { T r(x); r.setOpmaskIdx(k.getIdx()); return r; }
653template<class T>T operator|(const T& x, const EvexModifierZero&) { T r(x); r.setZero(); return r; }
654template<class T>T operator|(const T& x, const EvexModifierRounding& emr) { T r(x); r.setRounding(emr.rounding); return r; }
655
656struct Fpu : public Reg {
657 explicit Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { }
658};
659
660struct Reg32e : public Reg {
661 explicit Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {}
662};
663struct Reg32 : public Reg32e {
664 explicit Reg32(int idx = 0) : Reg32e(idx, 32) {}
665};
666#ifdef XBYAK64
667struct Reg64 : public Reg32e {
668 explicit Reg64(int idx = 0) : Reg32e(idx, 64) {}
669};
670struct RegRip {
671 sint64 disp_;
672 const Label* label_;
673 bool isAddr_;
674 explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
675 friend const RegRip operator+(const RegRip& r, int disp) {
676 return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
677 }
678 friend const RegRip operator-(const RegRip& r, int disp) {
679 return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
680 }
681 friend const RegRip operator+(const RegRip& r, sint64 disp) {
682 return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
683 }
684 friend const RegRip operator-(const RegRip& r, sint64 disp) {
685 return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
686 }
687 friend const RegRip operator+(const RegRip& r, const Label& label) {
688 if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING);
689 return RegRip(r.disp_, &label);
690 }
691 friend const RegRip operator+(const RegRip& r, const void *addr) {
692 if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING);
693 return RegRip(r.disp_ + (sint64)addr, 0, true);
694 }
695};
696#endif
697
698inline Reg8 Reg::cvt8() const
699{
700 const int idx = getIdx();
701 if (isBit(8)) return Reg8(idx, isExt8bit());
702#ifdef XBYAK32
703 if (idx >= 4) throw Error(ERR_CANT_CONVERT);
704#endif
705 return Reg8(idx, 4 <= idx && idx < 8);
706}
707
708inline Reg16 Reg::cvt16() const
709{
710 const int idx = getIdx();
711 if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
712 return Reg16(idx);
713}
714
715inline Reg32 Reg::cvt32() const
716{
717 const int idx = getIdx();
718 if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
719 return Reg32(idx);
720}
721
722#ifdef XBYAK64
723inline Reg64 Reg::cvt64() const
724{
725 const int idx = getIdx();
726 if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
727 return Reg64(idx);
728}
729#endif
730
731#ifndef XBYAK_DISABLE_SEGMENT
732// not derived from Reg
733class Segment {
734 int idx_;
735public:
736 enum {
737 es, cs, ss, ds, fs, gs
738 };
739 explicit Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); }
740 int getIdx() const { return idx_; }
741 const char *toString() const
742 {
743 static const char tbl[][3] = {
744 "es", "cs", "ss", "ds", "fs", "gs"
745 };
746 return tbl[idx_];
747 }
748};
749#endif
750
751class RegExp {
752public:
753#ifdef XBYAK64
754 enum { i32e = 32 | 64 };
755#else
756 enum { i32e = 32 };
757#endif
758 RegExp(size_t disp = 0) : scale_(0), disp_(disp) { }
759 RegExp(const Reg& r, int scale = 1)
760 : scale_(scale)
761 , disp_(0)
762 {
763 if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
764 if (scale == 0) return;
765 if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw Error(ERR_BAD_SCALE);
766 if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index
767 index_ = r;
768 } else {
769 base_ = r;
770 }
771 }
772 bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); }
773 RegExp optimize() const
774 {
775 RegExp exp = *this;
776 // [reg * 2] => [reg + reg]
777 if (index_.isBit(i32e) && !base_.getBit() && scale_ == 2) {
778 exp.base_ = index_;
779 exp.scale_ = 1;
780 }
781 return exp;
782 }
783 bool operator==(const RegExp& rhs) const
784 {
785 return base_ == rhs.base_ && index_ == rhs.index_ && disp_ == rhs.disp_ && scale_ == rhs.scale_;
786 }
787 const Reg& getBase() const { return base_; }
788 const Reg& getIndex() const { return index_; }
789 int getScale() const { return scale_; }
790 size_t getDisp() const { return disp_; }
791 void verify() const
792 {
793 if (base_.getBit() >= 128) throw Error(ERR_BAD_SIZE_OF_REGISTER);
794 if (index_.getBit() && index_.getBit() <= 64) {
795 if (index_.getIdx() == Operand::ESP) throw Error(ERR_ESP_CANT_BE_INDEX);
796 if (base_.getBit() && base_.getBit() != index_.getBit()) throw Error(ERR_BAD_SIZE_OF_REGISTER);
797 }
798 }
799 friend RegExp operator+(const RegExp& a, const RegExp& b);
800 friend RegExp operator-(const RegExp& e, size_t disp);
801 uint8 getRex() const
802 {
803 uint8 rex = index_.getRexX() | base_.getRexB();
804 return rex ? uint8(rex | 0x40) : 0;
805 }
806private:
807 /*
808 [base_ + index_ * scale_ + disp_]
809 base : Reg32e, index : Reg32e(w/o esp), Xmm, Ymm
810 */
811 Reg base_;
812 Reg index_;
813 int scale_;
814 size_t disp_;
815};
816
817inline RegExp operator+(const RegExp& a, const RegExp& b)
818{
819 if (a.index_.getBit() && b.index_.getBit()) throw Error(ERR_BAD_ADDRESSING);
820 RegExp ret = a;
821 if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; }
822 if (b.base_.getBit()) {
823 if (ret.base_.getBit()) {
824 if (ret.index_.getBit()) throw Error(ERR_BAD_ADDRESSING);
825 // base + base => base + index * 1
826 ret.index_ = b.base_;
827 // [reg + esp] => [esp + reg]
828 if (ret.index_.getIdx() == Operand::ESP) std::swap(ret.base_, ret.index_);
829 ret.scale_ = 1;
830 } else {
831 ret.base_ = b.base_;
832 }
833 }
834 ret.disp_ += b.disp_;
835 return ret;
836}
837inline RegExp operator*(const Reg& r, int scale)
838{
839 return RegExp(r, scale);
840}
841inline RegExp operator-(const RegExp& e, size_t disp)
842{
843 RegExp ret = e;
844 ret.disp_ -= disp;
845 return ret;
846}
847
848// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
849void *const AutoGrow = (void*)1; //-V566
850void *const DontSetProtectRWE = (void*)2; //-V566
851
852class CodeArray {
853 enum Type {
854 USER_BUF = 1, // use userPtr(non alignment, non protect)
855 ALLOC_BUF, // use new(alignment, protect)
856 AUTO_GROW // automatically move and grow memory if necessary
857 };
858 CodeArray(const CodeArray& rhs);
859 void operator=(const CodeArray&);
860 bool isAllocType() const { return type_ == ALLOC_BUF || type_ == AUTO_GROW; }
861 struct AddrInfo {
862 size_t codeOffset; // position to write
863 size_t jmpAddr; // value to write
864 int jmpSize; // size of jmpAddr
865 inner::LabelMode mode;
866 AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode)
867 : codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {}
868 uint64 getVal(const uint8 *top) const
869 {
870 uint64 disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top);
871 if (jmpSize == 4) disp = inner::VerifyInInt32(disp);
872 return disp;
873 }
874 };
875 typedef std::list<AddrInfo> AddrInfoList;
876 AddrInfoList addrInfoList_;
877 const Type type_;
878#ifdef XBYAK_USE_MMAP_ALLOCATOR
879 MmapAllocator defaultAllocator_;
880#else
881 Allocator defaultAllocator_;
882#endif
883 Allocator *alloc_;
884protected:
885 size_t maxSize_;
886 uint8 *top_;
887 size_t size_;
888 bool isCalledCalcJmpAddress_;
889
890 bool useProtect() const { return alloc_->useProtect(); }
891 /*
892 allocate new memory and copy old data to the new area
893 */
894 void growMemory()
895 {
896 const size_t newSize = (std::max<size_t>)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2);
897 uint8 *newTop = alloc_->alloc(newSize);
898 if (newTop == 0) throw Error(ERR_CANT_ALLOC);
899 for (size_t i = 0; i < size_; i++) newTop[i] = top_[i];
900 alloc_->free(top_);
901 top_ = newTop;
902 maxSize_ = newSize;
903 }
904 /*
905 calc jmp address for AutoGrow mode
906 */
907 void calcJmpAddress()
908 {
909 if (isCalledCalcJmpAddress_) return;
910 for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) {
911 uint64 disp = i->getVal(top_);
912 rewrite(i->codeOffset, disp, i->jmpSize);
913 }
914 isCalledCalcJmpAddress_ = true;
915 }
916public:
917 enum ProtectMode {
918 PROTECT_RW = 0, // read/write
919 PROTECT_RWE = 1, // read/write/exec
920 PROTECT_RE = 2 // read/exec
921 };
922 explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
923 : type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
924 , alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
925 , maxSize_(maxSize)
926 , top_(type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
927 , size_(0)
928 , isCalledCalcJmpAddress_(false)
929 {
930 if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
931 if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
932 alloc_->free(top_);
933 throw Error(ERR_CANT_PROTECT);
934 }
935 }
936 virtual ~CodeArray()
937 {
938 if (isAllocType()) {
939 if (useProtect()) setProtectModeRW(false);
940 alloc_->free(top_);
941 }
942 }
943 bool setProtectMode(ProtectMode mode, bool throwException = true)
944 {
945 bool isOK = protect(top_, maxSize_, mode);
946 if (isOK) return true;
947 if (throwException) throw Error(ERR_CANT_PROTECT);
948 return false;
949 }
950 bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
951 bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
952 void resetSize()
953 {
954 size_ = 0;
955 addrInfoList_.clear();
956 isCalledCalcJmpAddress_ = false;
957 }
958 void db(int code)
959 {
960 if (size_ >= maxSize_) {
961 if (type_ == AUTO_GROW) {
962 growMemory();
963 } else {
964 throw Error(ERR_CODE_IS_TOO_BIG);
965 }
966 }
967 top_[size_++] = static_cast<uint8>(code);
968 }
969 void db(const uint8 *code, size_t codeSize)
970 {
971 for (size_t i = 0; i < codeSize; i++) db(code[i]);
972 }
973 void db(uint64 code, size_t codeSize)
974 {
975 if (codeSize > 8) throw Error(ERR_BAD_PARAMETER);
976 for (size_t i = 0; i < codeSize; i++) db(static_cast<uint8>(code >> (i * 8)));
977 }
978 void dw(uint32 code) { db(code, 2); }
979 void dd(uint32 code) { db(code, 4); }
980 void dq(uint64 code) { db(code, 8); }
981 const uint8 *getCode() const { return top_; }
982 template<class F>
983 const F getCode() const { return reinterpret_cast<F>(top_); }
984 const uint8 *getCurr() const { return &top_[size_]; }
985 template<class F>
986 const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
987 size_t getSize() const { return size_; }
988 void setSize(size_t size)
989 {
990 if (size > maxSize_) throw Error(ERR_OFFSET_IS_TOO_BIG);
991 size_ = size;
992 }
993 void dump() const
994 {
995 const uint8 *p = getCode();
996 size_t bufSize = getSize();
997 size_t remain = bufSize;
998 for (int i = 0; i < 4; i++) {
999 size_t disp = 16;
1000 if (remain < 16) {
1001 disp = remain;
1002 }
1003 for (size_t j = 0; j < 16; j++) {
1004 if (j < disp) {
1005 printf("%02X", p[i * 16 + j]);
1006 }
1007 }
1008 putchar('\n');
1009 remain -= disp;
1010 if (remain == 0) {
1011 break;
1012 }
1013 }
1014 }
1015 /*
1016 @param offset [in] offset from top
1017 @param disp [in] offset from the next of jmp
1018 @param size [in] write size(1, 2, 4, 8)
1019 */
1020 void rewrite(size_t offset, uint64 disp, size_t size)
1021 {
1022 assert(offset < maxSize_);
1023 if (size != 1 && size != 2 && size != 4 && size != 8) throw Error(ERR_BAD_PARAMETER);
1024 uint8 *const data = top_ + offset;
1025 for (size_t i = 0; i < size; i++) {
1026 data[i] = static_cast<uint8>(disp >> (i * 8));
1027 }
1028 }
1029 void save(size_t offset, size_t val, int size, inner::LabelMode mode)
1030 {
1031 addrInfoList_.push_back(AddrInfo(offset, val, size, mode));
1032 }
1033 bool isAutoGrow() const { return type_ == AUTO_GROW; }
1034 bool isCalledCalcJmpAddress() const { return isCalledCalcJmpAddress_; }
1035 /**
1036 change exec permission of memory
1037 @param addr [in] buffer address
1038 @param size [in] buffer size
1039 @param protectMode [in] mode(RW/RWE/RE)
1040 @return true(success), false(failure)
1041 */
1042 static inline bool protect(const void *addr, size_t size, int protectMode)
1043 {
1044#if defined(_WIN32)
1045 const DWORD c_rw = PAGE_READWRITE;
1046 const DWORD c_rwe = PAGE_EXECUTE_READWRITE;
1047 const DWORD c_re = PAGE_EXECUTE_READ;
1048 DWORD mode;
1049#else
1050 const int c_rw = PROT_READ | PROT_WRITE;
1051 const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC;
1052 const int c_re = PROT_READ | PROT_EXEC;
1053 int mode;
1054#endif
1055 switch (protectMode) {
1056 case PROTECT_RW: mode = c_rw; break;
1057 case PROTECT_RWE: mode = c_rwe; break;
1058 case PROTECT_RE: mode = c_re; break;
1059 default:
1060 return false;
1061 }
1062#if defined(_WIN32)
1063 DWORD oldProtect;
1064 return VirtualProtect(const_cast<void*>(addr), size, mode, &oldProtect) != 0;
1065#elif defined(__GNUC__)
1066 size_t pageSize = sysconf(_SC_PAGESIZE);
1067 size_t iaddr = reinterpret_cast<size_t>(addr);
1068 size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
1069#ifndef NDEBUG
1070 if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
1071#endif
1072 return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
1073#else
1074 return true;
1075#endif
1076 }
1077 /**
1078 get aligned memory pointer
1079 @param addr [in] address
1080 @param alignedSize [in] power of two
1081 @return aligned addr by alingedSize
1082 */
1083 static inline uint8 *getAlignedAddress(uint8 *addr, size_t alignedSize = 16)
1084 {
1085 return reinterpret_cast<uint8*>((reinterpret_cast<size_t>(addr) + alignedSize - 1) & ~(alignedSize - static_cast<size_t>(1)));
1086 }
1087};
1088
1089class Address : public Operand {
1090public:
1091 enum Mode {
1092 M_ModRM,
1093 M_64bitDisp,
1094 M_rip,
1095 M_ripAddr
1096 };
1097 Address(uint32 sizeBit, bool broadcast, const RegExp& e)
1098 : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
1099 {
1100 e_.verify();
1101 }
1102#ifdef XBYAK64
1103 explicit Address(size_t disp)
1104 : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
1105 Address(uint32 sizeBit, bool broadcast, const RegRip& addr)
1106 : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
1107#endif
1108 RegExp getRegExp(bool optimize = true) const
1109 {
1110 return optimize ? e_.optimize() : e_;
1111 }
1112 Mode getMode() const { return mode_; }
1113 bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
1114 bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
1115 size_t getDisp() const { return e_.getDisp(); }
1116 uint8 getRex() const
1117 {
1118 if (mode_ != M_ModRM) return 0;
1119 return getRegExp().getRex();
1120 }
1121 bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset
1122 bool isBroadcast() const { return broadcast_; }
1123 const Label* getLabel() const { return label_; }
1124 bool operator==(const Address& rhs) const
1125 {
1126 return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
1127 }
1128 bool operator!=(const Address& rhs) const { return !operator==(rhs); }
1129 bool isVsib() const { return e_.isVsib(); }
1130private:
1131 RegExp e_;
1132 const Label* label_;
1133 Mode mode_;
1134 bool broadcast_;
1135};
1136
1137inline const Address& Operand::getAddress() const
1138{
1139 assert(isMEM());
1140 return static_cast<const Address&>(*this);
1141}
1142
1143inline bool Operand::operator==(const Operand& rhs) const
1144{
1145 if (isMEM() && rhs.isMEM()) return this->getAddress() == rhs.getAddress();
1146 return isEqualIfNotInherited(rhs);
1147}
1148
1149class AddressFrame {
1150 void operator=(const AddressFrame&);
1151 AddressFrame(const AddressFrame&);
1152public:
1153 const uint32 bit_;
1154 const bool broadcast_;
1155 explicit AddressFrame(uint32 bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { }
1156 Address operator[](const RegExp& e) const
1157 {
1158 return Address(bit_, broadcast_, e);
1159 }
1160 Address operator[](const void *disp) const
1161 {
1162 return Address(bit_, broadcast_, RegExp(reinterpret_cast<size_t>(disp)));
1163 }
1164#ifdef XBYAK64
1165 Address operator[](uint64 disp) const { return Address(disp); }
1166 Address operator[](const RegRip& addr) const { return Address(bit_, broadcast_, addr); }
1167#endif
1168};
1169
1170struct JmpLabel {
1171 size_t endOfJmp; /* offset from top to the end address of jmp */
1172 int jmpSize;
1173 inner::LabelMode mode;
1174 size_t disp; // disp for [rip + disp]
1175 explicit JmpLabel(size_t endOfJmp = 0, int jmpSize = 0, inner::LabelMode mode = inner::LasIs, size_t disp = 0)
1176 : endOfJmp(endOfJmp), jmpSize(jmpSize), mode(mode), disp(disp)
1177 {
1178 }
1179};
1180
1181class LabelManager;
1182
1183class Label {
1184 mutable LabelManager *mgr;
1185 mutable int id;
1186 friend class LabelManager;
1187public:
1188 Label() : mgr(0), id(0) {}
1189 Label(const Label& rhs);
1190 Label& operator=(const Label& rhs);
1191 ~Label();
1192 void clear() { mgr = 0; id = 0; }
1193 int getId() const { return id; }
1194 const uint8 *getAddress() const;
1195
1196 // backward compatibility
1197 static inline std::string toStr(int num)
1198 {
1199 char buf[16];
1200#if defined(_MSC_VER) && (_MSC_VER < 1900)
1201 _snprintf_s
1202#else
1203 snprintf
1204#endif
1205 (buf, sizeof(buf), ".%08x", num);
1206 return buf;
1207 }
1208};
1209
1210class LabelManager {
1211 // for string label
1212 struct SlabelVal {
1213 size_t offset;
1214 SlabelVal(size_t offset) : offset(offset) {}
1215 };
1216 typedef XBYAK_STD_UNORDERED_MAP<std::string, SlabelVal> SlabelDefList;
1217 typedef XBYAK_STD_UNORDERED_MULTIMAP<std::string, const JmpLabel> SlabelUndefList;
1218 struct SlabelState {
1219 SlabelDefList defList;
1220 SlabelUndefList undefList;
1221 };
1222 typedef std::list<SlabelState> StateList;
1223 // for Label class
1224 struct ClabelVal {
1225 ClabelVal(size_t offset = 0) : offset(offset), refCount(1) {}
1226 size_t offset;
1227 int refCount;
1228 };
1229 typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
1230 typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
1231 typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
1232
1233 CodeArray *base_;
1234 // global : stateList_.front(), local : stateList_.back()
1235 StateList stateList_;
1236 mutable int labelId_;
1237 ClabelDefList clabelDefList_;
1238 ClabelUndefList clabelUndefList_;
1239 LabelPtrList labelPtrList_;
1240
1241 int getId(const Label& label) const
1242 {
1243 if (label.id == 0) label.id = labelId_++;
1244 return label.id;
1245 }
1246 template<class DefList, class UndefList, class T>
1247 void define_inner(DefList& defList, UndefList& undefList, const T& labelId, size_t addrOffset)
1248 {
1249 // add label
1250 typename DefList::value_type item(labelId, addrOffset);
1251 std::pair<typename DefList::iterator, bool> ret = defList.insert(item);
1252 if (!ret.second) throw Error(ERR_LABEL_IS_REDEFINED);
1253 // search undefined label
1254 for (;;) {
1255 typename UndefList::iterator itr = undefList.find(labelId);
1256 if (itr == undefList.end()) break;
1257 const JmpLabel *jmp = &itr->second;
1258 const size_t offset = jmp->endOfJmp - jmp->jmpSize;
1259 size_t disp;
1260 if (jmp->mode == inner::LaddTop) {
1261 disp = addrOffset;
1262 } else if (jmp->mode == inner::Labs) {
1263 disp = size_t(base_->getCurr());
1264 } else {
1265 disp = addrOffset - jmp->endOfJmp + jmp->disp;
1266#ifdef XBYAK64
1267 if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) throw Error(ERR_OFFSET_IS_TOO_BIG);
1268#endif
1269 if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32)disp)) throw Error(ERR_LABEL_IS_TOO_FAR);
1270 }
1271 if (base_->isAutoGrow()) {
1272 base_->save(offset, disp, jmp->jmpSize, jmp->mode);
1273 } else {
1274 base_->rewrite(offset, disp, jmp->jmpSize);
1275 }
1276 undefList.erase(itr);
1277 }
1278 }
1279 template<class DefList, class T>
1280 bool getOffset_inner(const DefList& defList, size_t *offset, const T& label) const
1281 {
1282 typename DefList::const_iterator i = defList.find(label);
1283 if (i == defList.end()) return false;
1284 *offset = i->second.offset;
1285 return true;
1286 }
1287 friend class Label;
1288 void incRefCount(int id, Label *label)
1289 {
1290 clabelDefList_[id].refCount++;
1291 labelPtrList_.insert(label);
1292 }
1293 void decRefCount(int id, Label *label)
1294 {
1295 labelPtrList_.erase(label);
1296 ClabelDefList::iterator i = clabelDefList_.find(id);
1297 if (i == clabelDefList_.end()) return;
1298 if (i->second.refCount == 1) {
1299 clabelDefList_.erase(id);
1300 } else {
1301 --i->second.refCount;
1302 }
1303 }
1304 template<class T>
1305 bool hasUndefinedLabel_inner(const T& list) const
1306 {
1307#ifndef NDEBUG
1308 for (typename T::const_iterator i = list.begin(); i != list.end(); ++i) {
1309 std::cerr << "undefined label:" << i->first << std::endl;
1310 }
1311#endif
1312 return !list.empty();
1313 }
1314 // detach all labels linked to LabelManager
1315 void resetLabelPtrList()
1316 {
1317 for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
1318 (*i)->clear();
1319 }
1320 labelPtrList_.clear();
1321 }
1322public:
1323 LabelManager()
1324 {
1325 reset();
1326 }
1327 ~LabelManager()
1328 {
1329 resetLabelPtrList();
1330 }
1331 void reset()
1332 {
1333 base_ = 0;
1334 labelId_ = 1;
1335 stateList_.clear();
1336 stateList_.push_back(SlabelState());
1337 stateList_.push_back(SlabelState());
1338 clabelDefList_.clear();
1339 clabelUndefList_.clear();
1340 resetLabelPtrList();
1341 }
1342 void enterLocal()
1343 {
1344 stateList_.push_back(SlabelState());
1345 }
1346 void leaveLocal()
1347 {
1348 if (stateList_.size() <= 2) throw Error(ERR_UNDER_LOCAL_LABEL);
1349 if (hasUndefinedLabel_inner(stateList_.back().undefList)) throw Error(ERR_LABEL_IS_NOT_FOUND);
1350 stateList_.pop_back();
1351 }
1352 void set(CodeArray *base) { base_ = base; }
1353 void defineSlabel(std::string label)
1354 {
1355 if (label == "@b" || label == "@f") throw Error(ERR_BAD_LABEL_STR);
1356 if (label == "@@") {
1357 SlabelDefList& defList = stateList_.front().defList;
1358 SlabelDefList::iterator i = defList.find("@f");
1359 if (i != defList.end()) {
1360 defList.erase(i);
1361 label = "@b";
1362 } else {
1363 i = defList.find("@b");
1364 if (i != defList.end()) {
1365 defList.erase(i);
1366 }
1367 label = "@f";
1368 }
1369 }
1370 SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1371 define_inner(st.defList, st.undefList, label, base_->getSize());
1372 }
1373 void defineClabel(Label& label)
1374 {
1375 define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
1376 label.mgr = this;
1377 labelPtrList_.insert(&label);
1378 }
1379 void assign(Label& dst, const Label& src)
1380 {
1381 ClabelDefList::const_iterator i = clabelDefList_.find(src.id);
1382 if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
1383 define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
1384 dst.mgr = this;
1385 labelPtrList_.insert(&dst);
1386 }
1387 bool getOffset(size_t *offset, std::string& label) const
1388 {
1389 const SlabelDefList& defList = stateList_.front().defList;
1390 if (label == "@b") {
1391 if (defList.find("@f") != defList.end()) {
1392 label = "@f";
1393 } else if (defList.find("@b") == defList.end()) {
1394 throw Error(ERR_LABEL_IS_NOT_FOUND);
1395 }
1396 } else if (label == "@f") {
1397 if (defList.find("@f") != defList.end()) {
1398 label = "@b";
1399 }
1400 }
1401 const SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1402 return getOffset_inner(st.defList, offset, label);
1403 }
1404 bool getOffset(size_t *offset, const Label& label) const
1405 {
1406 return getOffset_inner(clabelDefList_, offset, getId(label));
1407 }
1408 void addUndefinedLabel(const std::string& label, const JmpLabel& jmp)
1409 {
1410 SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1411 st.undefList.insert(SlabelUndefList::value_type(label, jmp));
1412 }
1413 void addUndefinedLabel(const Label& label, const JmpLabel& jmp)
1414 {
1415 clabelUndefList_.insert(ClabelUndefList::value_type(label.id, jmp));
1416 }
1417 bool hasUndefSlabel() const
1418 {
1419 for (StateList::const_iterator i = stateList_.begin(), ie = stateList_.end(); i != ie; ++i) {
1420 if (hasUndefinedLabel_inner(i->undefList)) return true;
1421 }
1422 return false;
1423 }
1424 bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); }
1425 const uint8 *getCode() const { return base_->getCode(); }
1426 bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); }
1427};
1428
1429inline Label::Label(const Label& rhs)
1430{
1431 id = rhs.id;
1432 mgr = rhs.mgr;
1433 if (mgr) mgr->incRefCount(id, this);
1434}
1435inline Label& Label::operator=(const Label& rhs)
1436{
1437 if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
1438 id = rhs.id;
1439 mgr = rhs.mgr;
1440 if (mgr) mgr->incRefCount(id, this);
1441 return *this;
1442}
1443inline Label::~Label()
1444{
1445 if (id && mgr) mgr->decRefCount(id, this);
1446}
1447inline const uint8* Label::getAddress() const
1448{
1449 if (mgr == 0 || !mgr->isReady()) return 0;
1450 size_t offset;
1451 if (!mgr->getOffset(&offset, *this)) return 0;
1452 return mgr->getCode() + offset;
1453}
1454
1455class CodeGenerator : public CodeArray {
1456public:
1457 enum LabelType {
1458 T_SHORT,
1459 T_NEAR,
1460 T_AUTO // T_SHORT if possible
1461 };
1462private:
1463 CodeGenerator operator=(const CodeGenerator&); // don't call
1464#ifdef XBYAK64
1465 enum { i32e = 32 | 64, BIT = 64 };
1466 static const size_t dummyAddr = (size_t(0x11223344) << 32) | 55667788;
1467 typedef Reg64 NativeReg;
1468#else
1469 enum { i32e = 32, BIT = 32 };
1470 static const size_t dummyAddr = 0x12345678;
1471 typedef Reg32 NativeReg;
1472#endif
1473 // (XMM, XMM|MEM)
1474 static inline bool isXMM_XMMorMEM(const Operand& op1, const Operand& op2)
1475 {
1476 return op1.isXMM() && (op2.isXMM() || op2.isMEM());
1477 }
1478 // (MMX, MMX|MEM) or (XMM, XMM|MEM)
1479 static inline bool isXMMorMMX_MEM(const Operand& op1, const Operand& op2)
1480 {
1481 return (op1.isMMX() && (op2.isMMX() || op2.isMEM())) || isXMM_XMMorMEM(op1, op2);
1482 }
1483 // (XMM, MMX|MEM)
1484 static inline bool isXMM_MMXorMEM(const Operand& op1, const Operand& op2)
1485 {
1486 return op1.isXMM() && (op2.isMMX() || op2.isMEM());
1487 }
1488 // (MMX, XMM|MEM)
1489 static inline bool isMMX_XMMorMEM(const Operand& op1, const Operand& op2)
1490 {
1491 return op1.isMMX() && (op2.isXMM() || op2.isMEM());
1492 }
1493 // (XMM, REG32|MEM)
1494 static inline bool isXMM_REG32orMEM(const Operand& op1, const Operand& op2)
1495 {
1496 return op1.isXMM() && (op2.isREG(i32e) || op2.isMEM());
1497 }
1498 // (REG32, XMM|MEM)
1499 static inline bool isREG32_XMMorMEM(const Operand& op1, const Operand& op2)
1500 {
1501 return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM());
1502 }
1503 // (REG32, REG32|MEM)
1504 static inline bool isREG32_REG32orMEM(const Operand& op1, const Operand& op2)
1505 {
1506 return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
1507 }
1508 void rex(const Operand& op1, const Operand& op2 = Operand())
1509 {
1510 uint8 rex = 0;
1511 const Operand *p1 = &op1, *p2 = &op2;
1512 if (p1->isMEM()) std::swap(p1, p2);
1513 if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION);
1514 if (p2->isMEM()) {
1515 const Address& addr = p2->getAddress();
1516 if (BIT == 64 && addr.is32bit()) db(0x67);
1517 rex = addr.getRex() | p1->getReg().getRex();
1518 } else {
1519 // ModRM(reg, base);
1520 rex = op2.getReg().getRex(op1.getReg());
1521 }
1522 // except movsx(16bit, 32/64bit)
1523 if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66);
1524 if (rex) db(rex);
1525 }
1526 enum AVXtype {
1527 // low 3 bit
1528 T_N1 = 1,
1529 T_N2 = 2,
1530 T_N4 = 3,
1531 T_N8 = 4,
1532 T_N16 = 5,
1533 T_N32 = 6,
1534 T_NX_MASK = 7,
1535 //
1536 T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
1537 T_DUP = 1 << 4, // N = (8, 32, 64)
1538 T_66 = 1 << 5,
1539 T_F3 = 1 << 6,
1540 T_F2 = 1 << 7,
1541 T_0F = 1 << 8,
1542 T_0F38 = 1 << 9,
1543 T_0F3A = 1 << 10,
1544 T_L0 = 1 << 11,
1545 T_L1 = 1 << 12,
1546 T_W0 = 1 << 13,
1547 T_W1 = 1 << 14,
1548 T_EW0 = 1 << 15,
1549 T_EW1 = 1 << 16,
1550 T_YMM = 1 << 17, // support YMM, ZMM
1551 T_EVEX = 1 << 18,
1552 T_ER_X = 1 << 19, // xmm{er}
1553 T_ER_Y = 1 << 20, // ymm{er}
1554 T_ER_Z = 1 << 21, // zmm{er}
1555 T_SAE_X = 1 << 22, // xmm{sae}
1556 T_SAE_Y = 1 << 23, // ymm{sae}
1557 T_SAE_Z = 1 << 24, // zmm{sae}
1558 T_MUST_EVEX = 1 << 25, // contains T_EVEX
1559 T_B32 = 1 << 26, // m32bcst
1560 T_B64 = 1 << 27, // m64bcst
1561 T_M_K = 1 << 28, // mem{k}
1562 T_VSIB = 1 << 29,
1563 T_MEM_EVEX = 1 << 30, // use evex if mem
1564 T_XXX
1565 };
1566 void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
1567 {
1568 int w = (type & T_W1) ? 1 : 0;
1569 bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
1570 bool r = reg.isExtIdx();
1571 bool b = base.isExtIdx();
1572 int idx = v ? v->getIdx() : 0;
1573 if ((idx | reg.getIdx() | base.getIdx()) >= 16) throw Error(ERR_BAD_COMBINATION);
1574 uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
1575 uint32 vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
1576 if (!b && !x && !w && (type & T_0F)) {
1577 db(0xC5); db((r ? 0 : 0x80) | vvvv);
1578 } else {
1579 uint32 mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
1580 db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv);
1581 }
1582 db(code);
1583 }
1584 void verifySAE(const Reg& r, int type) const
1585 {
1586 if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
1587 throw Error(ERR_SAE_IS_INVALID);
1588 }
1589 void verifyER(const Reg& r, int type) const
1590 {
1591 if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
1592 throw Error(ERR_ER_IS_INVALID);
1593 }
1594 // (a, b, c) contains non zero two or three values then err
1595 int verifyDuplicate(int a, int b, int c, int err)
1596 {
1597 int v = a | b | c;
1598 if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
1599 return v;
1600 }
1601 int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
1602 {
1603 if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
1604 int w = (type & T_EW1) ? 1 : 0;
1605 uint32 mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
1606 uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
1607
1608 int idx = v ? v->getIdx() : 0;
1609 uint32 vvvv = ~idx;
1610
1611 bool R = !reg.isExtIdx();
1612 bool X = x ? false : !base.isExtIdx2();
1613 bool B = !base.isExtIdx();
1614 bool Rp = !reg.isExtIdx2();
1615 int LL;
1616 int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET);
1617 int disp8N = 1;
1618 if (rounding) {
1619 if (rounding == EvexModifierRounding::T_SAE) {
1620 verifySAE(base, type); LL = 0;
1621 } else {
1622 verifyER(base, type); LL = rounding - 1;
1623 }
1624 b = true;
1625 } else {
1626 if (v) VL = (std::max)(VL, v->getBit());
1627 VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
1628 LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
1629 if (b) {
1630 disp8N = (type & T_B32) ? 4 : 8;
1631 } else if (type & T_DUP) {
1632 disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64;
1633 } else {
1634 if ((type & (T_NX_MASK | T_N_VL)) == 0) {
1635 type |= T_N16 | T_N_VL; // default
1636 }
1637 int low = type & T_NX_MASK;
1638 if (low > 0) {
1639 disp8N = 1 << (low - 1);
1640 if (type & T_N_VL) disp8N *= (VL == 512 ? 4 : VL == 256 ? 2 : 1);
1641 }
1642 }
1643 }
1644 bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
1645 bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
1646 if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
1647 db(0x62);
1648 db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3));
1649 db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
1650 db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7));
1651 db(code);
1652 return disp8N;
1653 }
1654 void setModRM(int mod, int r1, int r2)
1655 {
1656 db(static_cast<uint8>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)));
1657 }
1658 void setSIB(const RegExp& e, int reg, int disp8N = 0)
1659 {
1660 size_t disp64 = e.getDisp();
1661#ifdef XBYAK64
1662 size_t high = disp64 >> 32;
1663 if (high != 0 && high != 0xFFFFFFFF) throw Error(ERR_OFFSET_IS_TOO_BIG);
1664#endif
1665 uint32 disp = static_cast<uint32>(disp64);
1666 const Reg& base = e.getBase();
1667 const Reg& index = e.getIndex();
1668 const int baseIdx = base.getIdx();
1669 const int baseBit = base.getBit();
1670 const int indexBit = index.getBit();
1671 enum {
1672 mod00 = 0, mod01 = 1, mod10 = 2
1673 };
1674 int mod = mod10; // disp32
1675 if (!baseBit || ((baseIdx & 7) != Operand::EBP && disp == 0)) {
1676 mod = mod00;
1677 } else {
1678 if (disp8N == 0) {
1679 if (inner::IsInDisp8(disp)) {
1680 mod = mod01;
1681 }
1682 } else {
1683 // disp must be casted to signed
1684 uint32 t = static_cast<uint32>(static_cast<int>(disp) / disp8N);
1685 if ((disp % disp8N) == 0 && inner::IsInDisp8(t)) {
1686 disp = t;
1687 mod = mod01;
1688 }
1689 }
1690 }
1691 const int newBaseIdx = baseBit ? (baseIdx & 7) : Operand::EBP;
1692 /* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */
1693 bool hasSIB = indexBit || (baseIdx & 7) == Operand::ESP;
1694#ifdef XBYAK64
1695 if (!baseBit && !indexBit) hasSIB = true;
1696#endif
1697 if (hasSIB) {
1698 setModRM(mod, reg, Operand::ESP);
1699 /* SIB = [2:3:3] = [SS:index:base(=rm)] */
1700 const int idx = indexBit ? (index.getIdx() & 7) : Operand::ESP;
1701 const int scale = e.getScale();
1702 const int SS = (scale == 8) ? 3 : (scale == 4) ? 2 : (scale == 2) ? 1 : 0;
1703 setModRM(SS, idx, newBaseIdx);
1704 } else {
1705 setModRM(mod, reg, newBaseIdx);
1706 }
1707 if (mod == mod01) {
1708 db(disp);
1709 } else if (mod == mod10 || (mod == mod00 && !baseBit)) {
1710 dd(disp);
1711 }
1712 }
1713 LabelManager labelMgr_;
1714 bool isInDisp16(uint32 x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
1715 void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE)
1716 {
1717 rex(reg2, reg1);
1718 db(code0 | (reg1.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
1719 setModRM(3, reg1.getIdx(), reg2.getIdx());
1720 }
1721 void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
1722 {
1723 if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
1724 rex(addr, reg);
1725 db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
1726 opAddr(addr, reg.getIdx(), immSize);
1727 }
1728 void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
1729 {
1730 if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
1731 if (addr.getMode() != Address::M_ModRM) throw Error(ERR_INVALID_MIB_ADDRESS);
1732 if (BIT == 64 && addr.is32bit()) db(0x67);
1733 const RegExp& regExp = addr.getRegExp(false);
1734 uint8 rex = regExp.getRex();
1735 if (rex) db(rex);
1736 db(code0); db(code1);
1737 setSIB(regExp, reg.getIdx());
1738 }
1739 void makeJmp(uint32 disp, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
1740 {
1741 const int shortJmpSize = 2;
1742 const int longHeaderSize = longPref ? 2 : 1;
1743 const int longJmpSize = longHeaderSize + 4;
1744 if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) {
1745 db(shortCode); db(disp - shortJmpSize);
1746 } else {
1747 if (type == T_SHORT) throw Error(ERR_LABEL_IS_TOO_FAR);
1748 if (longPref) db(longPref);
1749 db(longCode); dd(disp - longJmpSize);
1750 }
1751 }
1752 template<class T>
1753 void opJmp(T& label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
1754 {
1755 if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */
1756 size_t offset = 0;
1757 if (labelMgr_.getOffset(&offset, label)) { /* label exists */
1758 makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref);
1759 } else {
1760 int jmpSize = 0;
1761 if (type == T_NEAR) {
1762 jmpSize = 4;
1763 if (longPref) db(longPref);
1764 db(longCode); dd(0);
1765 } else {
1766 jmpSize = 1;
1767 db(shortCode); db(0);
1768 }
1769 JmpLabel jmp(size_, jmpSize, inner::LasIs);
1770 labelMgr_.addUndefinedLabel(label, jmp);
1771 }
1772 }
1773 void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref = 0)
1774 {
1775 if (isAutoGrow()) {
1776 if (type != T_NEAR) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW);
1777 if (size_ + 16 >= maxSize_) growMemory();
1778 if (longPref) db(longPref);
1779 db(longCode);
1780 dd(0);
1781 save(size_ - 4, size_t(addr) - size_, 4, inner::Labs);
1782 } else {
1783 makeJmp(inner::VerifyInInt32(reinterpret_cast<const uint8*>(addr) - getCurr()), type, shortCode, longCode, longPref);
1784 }
1785
1786 }
1787 // reg is reg field of ModRM
1788 // immSize is the size for immediate value
1789 // disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
1790 void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
1791 {
1792 if (!permitVisb && addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING);
1793 if (addr.getMode() == Address::M_ModRM) {
1794 setSIB(addr.getRegExp(), reg, disp8N);
1795 } else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
1796 setModRM(0, reg, 5);
1797 if (addr.getLabel()) { // [rip + Label]
1798 putL_inner(*addr.getLabel(), true, addr.getDisp() - immSize);
1799 } else {
1800 size_t disp = addr.getDisp();
1801 if (addr.getMode() == Address::M_ripAddr) {
1802 if (isAutoGrow()) throw Error(ERR_INVALID_RIP_IN_AUTO_GROW);
1803 disp -= (size_t)getCurr() + 4 + immSize;
1804 }
1805 dd(inner::VerifyInInt32(disp));
1806 }
1807 }
1808 }
1809 /* preCode is for SSSE3/SSE4 */
1810 void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE)
1811 {
1812 if (isValid && !isValid(reg, op)) throw Error(ERR_BAD_COMBINATION);
1813 if (pref != NONE) db(pref);
1814 if (op.isMEM()) {
1815 opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
1816 } else {
1817 opModR(reg.getReg(), op.getReg(), 0x0F, preCode, code);
1818 }
1819 if (imm8 != NONE) db(imm8);
1820 }
1821 void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext)
1822 {
1823 if (mmx.isXMM()) db(0x66);
1824 opModR(Reg32(ext), mmx, 0x0F, code);
1825 db(imm8);
1826 }
1827 void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE)
1828 {
1829 opGen(mmx, op, code, mmx.isXMM() ? pref : NONE, isXMMorMMX_MEM, imm8, preCode);
1830 }
1831 void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
1832 {
1833 if (pref != NONE) db(pref);
1834 if (op1.isXMM() && op2.isMEM()) {
1835 opModM(op2.getAddress(), op1.getReg(), 0x0F, code);
1836 } else if (op1.isMEM() && op2.isXMM()) {
1837 opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1);
1838 } else {
1839 throw Error(ERR_BAD_COMBINATION);
1840 }
1841 }
1842 void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false)
1843 {
1844 if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */
1845 if (mmx.isXMM()) db(0x66);
1846 opModR(op.getReg(), mmx, 0x0F, 0xC5); db(imm);
1847 } else {
1848 opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, 0x3A);
1849 }
1850 }
1851 void opR_ModM(const Operand& op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false, int immSize = 0)
1852 {
1853 int opBit = op.getBit();
1854 if (disableRex && opBit == 64) opBit = 32;
1855 if (op.isREG(bit)) {
1856 opModR(Reg(ext, Operand::REG, opBit), op.getReg().changeBit(opBit), code0, code1, code2);
1857 } else if (op.isMEM()) {
1858 opModM(op.getAddress(), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize);
1859 } else {
1860 throw Error(ERR_BAD_COMBINATION);
1861 }
1862 }
1863 void opShift(const Operand& op, int imm, int ext)
1864 {
1865 verifyMemHasSize(op);
1866 opR_ModM(op, 0, ext, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), NONE, NONE, false, (imm != 1) ? 1 : 0);
1867 if (imm != 1) db(imm);
1868 }
1869 void opShift(const Operand& op, const Reg8& _cl, int ext)
1870 {
1871 if (_cl.getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION);
1872 opR_ModM(op, 0, ext, 0xD2);
1873 }
1874 void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
1875 {
1876 if (condR) {
1877 opModR(op1.getReg(), op2.getReg(), code0, code1, code2);
1878 } else if (condM) {
1879 opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize);
1880 } else {
1881 throw Error(ERR_BAD_COMBINATION);
1882 }
1883 }
1884 void opShxd(const Operand& op, const Reg& reg, uint8 imm, int code, const Reg8 *_cl = 0)
1885 {
1886 if (_cl && _cl->getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION);
1887 opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1);
1888 if (!_cl) db(imm);
1889 }
1890 // (REG, REG|MEM), (MEM, REG)
1891 void opRM_RM(const Operand& op1, const Operand& op2, int code)
1892 {
1893 if (op1.isREG() && op2.isMEM()) {
1894 opModM(op2.getAddress(), op1.getReg(), code | 2);
1895 } else {
1896 opModRM(op2, op1, op1.isREG() && op1.getKind() == op2.getKind(), op1.isMEM() && op2.isREG(), code);
1897 }
1898 }
1899 // (REG|MEM, IMM)
1900 void opRM_I(const Operand& op, uint32 imm, int code, int ext)
1901 {
1902 verifyMemHasSize(op);
1903 uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
1904 if (op.isBit(8)) immBit = 8;
1905 if (op.getBit() < immBit) throw Error(ERR_IMM_IS_TOO_BIG);
1906 if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
1907 if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al
1908 rex(op);
1909 db(code | 4 | (immBit == 8 ? 0 : 1));
1910 } else {
1911 int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0;
1912 opR_ModM(op, 0, ext, 0x80 | tmp, NONE, NONE, false, immBit / 8);
1913 }
1914 db(imm, immBit / 8);
1915 }
1916 void opIncDec(const Operand& op, int code, int ext)
1917 {
1918 verifyMemHasSize(op);
1919#ifndef XBYAK64
1920 if (op.isREG() && !op.isBit(8)) {
1921 rex(op); db(code | op.getIdx());
1922 return;
1923 }
1924#endif
1925 code = 0xFE;
1926 if (op.isREG()) {
1927 opModR(Reg(ext, Operand::REG, op.getBit()), op.getReg(), code);
1928 } else {
1929 opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
1930 }
1931 }
1932 void opPushPop(const Operand& op, int code, int ext, int alt)
1933 {
1934 int bit = op.getBit();
1935 if (bit == 16 || bit == BIT) {
1936 if (bit == 16) db(0x66);
1937 if (op.isREG()) {
1938 if (op.getReg().getIdx() >= 8) db(0x41);
1939 db(alt | (op.getIdx() & 7));
1940 return;
1941 }
1942 if (op.isMEM()) {
1943 opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code);
1944 return;
1945 }
1946 }
1947 throw Error(ERR_BAD_COMBINATION);
1948 }
1949 void verifyMemHasSize(const Operand& op) const
1950 {
1951 if (op.isMEM() && op.getBit() == 0) throw Error(ERR_MEM_SIZE_IS_NOT_SPECIFIED);
1952 }
1953 /*
1954 mov(r, imm) = db(imm, mov_imm(r, imm))
1955 */
1956 int mov_imm(const Reg& reg, size_t imm)
1957 {
1958 int bit = reg.getBit();
1959 const int idx = reg.getIdx();
1960 int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3);
1961 if (bit == 64 && (imm & ~size_t(0xffffffffu)) == 0) {
1962 rex(Reg32(idx));
1963 bit = 32;
1964 } else {
1965 rex(reg);
1966 if (bit == 64 && inner::IsInInt32(imm)) {
1967 db(0xC7);
1968 code = 0xC0;
1969 bit = 32;
1970 }
1971 }
1972 db(code | (idx & 7));
1973 return bit / 8;
1974 }
1975 template<class T>
1976 void putL_inner(T& label, bool relative = false, size_t disp = 0)
1977 {
1978 const int jmpSize = relative ? 4 : (int)sizeof(size_t);
1979 if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory();
1980 size_t offset = 0;
1981 if (labelMgr_.getOffset(&offset, label)) {
1982 if (relative) {
1983 db(inner::VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize);
1984 } else if (isAutoGrow()) {
1985 db(uint64(0), jmpSize);
1986 save(size_ - jmpSize, offset, jmpSize, inner::LaddTop);
1987 } else {
1988 db(size_t(top_) + offset, jmpSize);
1989 }
1990 return;
1991 }
1992 db(uint64(0), jmpSize);
1993 JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp);
1994 labelMgr_.addUndefinedLabel(label, jmp);
1995 }
1996 void opMovxx(const Reg& reg, const Operand& op, uint8 code)
1997 {
1998 if (op.isBit(32)) throw Error(ERR_BAD_COMBINATION);
1999 int w = op.isBit(16);
2000#ifdef XBYAK64
2001 if (op.isHigh8bit()) throw Error(ERR_BAD_COMBINATION);
2002#endif
2003 bool cond = reg.isREG() && (reg.getBit() > op.getBit());
2004 opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
2005 }
2006 void opFpuMem(const Address& addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext)
2007 {
2008 if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
2009 uint8 code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
2010 if (!code) throw Error(ERR_BAD_MEM_SIZE);
2011 if (m64ext && addr.isBit(64)) ext = m64ext;
2012
2013 rex(addr, st0);
2014 db(code);
2015 opAddr(addr, ext);
2016 }
2017 // use code1 if reg1 == st0
2018 // use code2 if reg1 != st0 && reg2 == st0
2019 void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32 code1, uint32 code2)
2020 {
2021 uint32 code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0;
2022 if (!code) throw Error(ERR_BAD_ST_COMBINATION);
2023 db(uint8(code >> 8));
2024 db(uint8(code | (reg1.getIdx() | reg2.getIdx())));
2025 }
2026 void opFpu(const Fpu& reg, uint8 code1, uint8 code2)
2027 {
2028 db(code1); db(code2 | reg.getIdx());
2029 }
2030 void opVex(const Reg& r, const Operand *p1, const Operand& op2, int type, int code, int imm8 = NONE)
2031 {
2032 if (op2.isMEM()) {
2033 const Address& addr = op2.getAddress();
2034 const RegExp& regExp = addr.getRegExp();
2035 const Reg& base = regExp.getBase();
2036 const Reg& index = regExp.getIndex();
2037 if (BIT == 64 && addr.is32bit()) db(0x67);
2038 int disp8N = 0;
2039 bool x = index.isExtIdx();
2040 if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
2041 int aaa = addr.getOpmaskIdx();
2042 if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
2043 bool b = false;
2044 if (addr.isBroadcast()) {
2045 if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
2046 b = true;
2047 }
2048 int VL = regExp.isVsib() ? index.getBit() : 0;
2049 disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
2050 } else {
2051 vex(r, base, p1, type, code, x);
2052 }
2053 opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
2054 } else {
2055 const Reg& base = op2.getReg();
2056 if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
2057 evex(r, base, p1, type, code);
2058 } else {
2059 vex(r, base, p1, type, code);
2060 }
2061 setModRM(3, r.getIdx(), base.getIdx());
2062 }
2063 if (imm8 != NONE) db(imm8);
2064 }
2065 // (r, r, r/m) if isR_R_RM
2066 // (r, r/m, r)
2067 void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8 code, bool isR_R_RM, int imm8 = NONE)
2068 {
2069 const Operand *p1 = &op1;
2070 const Operand *p2 = &op2;
2071 if (!isR_R_RM) std::swap(p1, p2);
2072 const unsigned int bit = r.getBit();
2073 if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) throw Error(ERR_BAD_COMBINATION);
2074 type |= (bit == 64) ? T_W1 : T_W0;
2075 opVex(r, p1, *p2, type, code, imm8);
2076 }
2077 void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, int imm8 = NONE)
2078 {
2079 const Xmm *x2 = static_cast<const Xmm*>(&op1);
2080 const Operand *op = &op2;
2081 if (op2.isNone()) { // (x1, op1) -> (x1, x1, op1)
2082 x2 = &x1;
2083 op = &op1;
2084 }
2085 // (x1, x2, op)
2086 if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) throw Error(ERR_BAD_COMBINATION);
2087 opVex(x1, x2, *op, type, code0, imm8);
2088 }
2089 void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE)
2090 {
2091 if (!op3.isMEM() && (x2.getKind() != op3.getKind())) throw Error(ERR_BAD_COMBINATION);
2092 opVex(k, &x2, op3, type, code0, imm8);
2093 }
2094 // (x, x/m), (y, x/m256), (z, y/m)
2095 void checkCvt1(const Operand& x, const Operand& op) const
2096 {
2097 if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) throw Error(ERR_BAD_COMBINATION);
2098 }
2099 // (x, x/m), (x, y/m256), (y, z/m)
2100 void checkCvt2(const Xmm& x, const Operand& op) const
2101 {
2102 if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) throw Error(ERR_BAD_COMBINATION);
2103 }
2104 void opCvt2(const Xmm& x, const Operand& op, int type, int code)
2105 {
2106 checkCvt2(x, op);
2107 Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
2108 opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
2109 }
2110 void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8 code)
2111 {
2112 if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) throw Error(ERR_BAD_SIZE_OF_REGISTER);
2113 Xmm x(op.getIdx());
2114 const Operand *p = op.isREG() ? &x : &op;
2115 opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code);
2116 }
2117 const Xmm& cvtIdx0(const Operand& x) const
2118 {
2119 return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
2120 }
2121 // support (x, x/m, imm), (y, y/m, imm)
2122 void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, int imm8 = NONE)
2123 {
2124 opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8);
2125 }
2126 // QQQ:need to refactor
2127 void opSp1(const Reg& reg, const Operand& op, uint8 pref, uint8 code0, uint8 code1)
2128 {
2129 if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
2130 bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
2131 if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION);
2132 if (is16bit) db(0x66);
2133 db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
2134 }
2135 void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int mode)
2136 {
2137 const RegExp& regExp = addr.getRegExp();
2138 if (!regExp.isVsib(128 | 256)) throw Error(ERR_BAD_VSIB_ADDRESSING);
2139 const int y_vx_y = 0;
2140 const int y_vy_y = 1;
2141// const int x_vy_x = 2;
2142 const bool isAddrYMM = regExp.getIndex().getBit() == 256;
2143 if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) {
2144 bool isOK = false;
2145 if (mode == y_vx_y) {
2146 isOK = x1.isYMM() && !isAddrYMM && x2.isYMM();
2147 } else if (mode == y_vy_y) {
2148 isOK = x1.isYMM() && isAddrYMM && x2.isYMM();
2149 } else { // x_vy_x
2150 isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM();
2151 }
2152 if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING);
2153 }
2154 opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code);
2155 }
2156 enum {
2157 xx_yy_zz = 0,
2158 xx_yx_zy = 1,
2159 xx_xy_yz = 2
2160 };
2161 void checkGather2(const Xmm& x1, const Reg& x2, int mode) const
2162 {
2163 if (x1.isXMM() && x2.isXMM()) return;
2164 switch (mode) {
2165 case xx_yy_zz: if ((x1.isYMM() && x2.isYMM()) || (x1.isZMM() && x2.isZMM())) return;
2166 break;
2167 case xx_yx_zy: if ((x1.isYMM() && x2.isXMM()) || (x1.isZMM() && x2.isYMM())) return;
2168 break;
2169 case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return;
2170 break;
2171 }
2172 throw Error(ERR_BAD_VSIB_ADDRESSING);
2173 }
2174 void opGather2(const Xmm& x, const Address& addr, int type, uint8 code, int mode)
2175 {
2176 if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
2177 checkGather2(x, addr.getRegExp().getIndex(), mode);
2178 opVex(x, 0, addr, type, code);
2179 }
2180 /*
2181 xx_xy_yz ; mode = true
2182 xx_xy_xz ; mode = false
2183 */
2184 void opVmov(const Operand& op, const Xmm& x, int type, uint8 code, bool mode)
2185 {
2186 if (mode) {
2187 if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) throw Error(ERR_BAD_COMBINATION);
2188 } else {
2189 if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION);
2190 }
2191 opVex(x, 0, op, type, code);
2192 }
2193 void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8 code, Operand::Kind kind)
2194 {
2195 if (addr.hasZero()) throw Error(ERR_INVALID_ZERO);
2196 if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING);
2197 opVex(x, 0, addr, type, code);
2198 }
2199public:
2200 unsigned int getVersion() const { return VERSION; }
2201 using CodeArray::db;
2202 const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
2203 const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
2204 const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
2205 const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
2206 const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7;
2207 const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7;
2208 const Ymm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7;
2209 const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
2210 const Reg16 ax, cx, dx, bx, sp, bp, si, di;
2211 const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
2212 const AddressFrame ptr, byte, word, dword, qword, xword, yword, zword; // xword is same as oword of NASM
2213 const AddressFrame ptr_b, xword_b, yword_b, zword_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b}
2214 const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
2215 const Opmask k0, k1, k2, k3, k4, k5, k6, k7;
2216 const BoundsReg bnd0, bnd1, bnd2, bnd3;
2217 const EvexModifierRounding T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_sae; // {sae}, {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae}
2218 const EvexModifierZero T_z; // {z}
2219#ifdef XBYAK64
2220 const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
2221 const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
2222 const Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w;
2223 const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b;
2224 const Reg8 spl, bpl, sil, dil;
2225 const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
2226 const Xmm xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23;
2227 const Xmm xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31;
2228 const Ymm ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
2229 const Ymm ymm16, ymm17, ymm18, ymm19, ymm20, ymm21, ymm22, ymm23;
2230 const Ymm ymm24, ymm25, ymm26, ymm27, ymm28, ymm29, ymm30, ymm31;
2231 const Zmm zmm8, zmm9, zmm10, zmm11, zmm12, zmm13, zmm14, zmm15;
2232 const Zmm zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23;
2233 const Zmm zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31;
2234 const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience
2235 const Xmm &xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23;
2236 const Xmm &xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31;
2237 const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15;
2238 const Ymm &ym16, &ym17, &ym18, &ym19, &ym20, &ym21, &ym22, &ym23;
2239 const Ymm &ym24, &ym25, &ym26, &ym27, &ym28, &ym29, &ym30, &ym31;
2240 const Zmm &zm8, &zm9, &zm10, &zm11, &zm12, &zm13, &zm14, &zm15;
2241 const Zmm &zm16, &zm17, &zm18, &zm19, &zm20, &zm21, &zm22, &zm23;
2242 const Zmm &zm24, &zm25, &zm26, &zm27, &zm28, &zm29, &zm30, &zm31;
2243 const RegRip rip;
2244#endif
2245#ifndef XBYAK_DISABLE_SEGMENT
2246 const Segment es, cs, ss, ds, fs, gs;
2247#endif
2248 void L(const std::string& label) { labelMgr_.defineSlabel(label); }
2249 void L(Label& label) { labelMgr_.defineClabel(label); }
2250 Label L() { Label label; L(label); return label; }
2251 void inLocalLabel() { labelMgr_.enterLocal(); }
2252 void outLocalLabel() { labelMgr_.leaveLocal(); }
2253 /*
2254 assign src to dst
2255 require
2256 dst : does not used by L()
2257 src : used by L()
2258 */
2259 void assignL(Label& dst, const Label& src) { labelMgr_.assign(dst, src); }
2260 /*
2261 put address of label to buffer
2262 @note the put size is 4(32-bit), 8(64-bit)
2263 */
2264 void putL(std::string label) { putL_inner(label); }
2265 void putL(const Label& label) { putL_inner(label); }
2266
2267 void jmp(const Operand& op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); }
2268 void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
2269 void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
2270 void jmp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
2271 void jmp(const void *addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); }
2272
2273 void call(const Operand& op) { opR_ModM(op, 16 | i32e, 2, 0xFF, NONE, NONE, true); }
2274 // call(string label), not const std::string&
2275 void call(std::string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
2276 void call(const char *label) { call(std::string(label)); }
2277 void call(const Label& label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
2278 // call(function pointer)
2279#ifdef XBYAK_VARIADIC_TEMPLATE
2280 template<class Ret, class... Params>
2281 void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
2282#endif
2283 void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
2284
2285 void test(const Operand& op, const Reg& reg)
2286 {
2287 opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), 0x84);
2288 }
2289 void test(const Operand& op, uint32 imm)
2290 {
2291 verifyMemHasSize(op);
2292 int immSize = (std::min)(op.getBit() / 8, 4U);
2293 if (op.isREG() && op.getIdx() == 0) { // al, ax, eax
2294 rex(op);
2295 db(0xA8 | (op.isBit(8) ? 0 : 1));
2296 } else {
2297 opR_ModM(op, 0, 0, 0xF6, NONE, NONE, false, immSize);
2298 }
2299 db(imm, immSize);
2300 }
2301 void imul(const Reg& reg, const Operand& op)
2302 {
2303 opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x0F, 0xAF);
2304 }
2305 void imul(const Reg& reg, const Operand& op, int imm)
2306 {
2307 int s = inner::IsInDisp8(imm) ? 1 : 0;
2308 int immSize = s ? 1 : reg.isREG(16) ? 2 : 4;
2309 opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x69 | (s << 1), NONE, NONE, immSize);
2310 db(imm, immSize);
2311 }
2312 void push(const Operand& op) { opPushPop(op, 0xFF, 6, 0x50); }
2313 void pop(const Operand& op) { opPushPop(op, 0x8F, 0, 0x58); }
2314 void push(const AddressFrame& af, uint32 imm)
2315 {
2316 if (af.bit_ == 8 && inner::IsInDisp8(imm)) {
2317 db(0x6A); db(imm);
2318 } else if (af.bit_ == 16 && isInDisp16(imm)) {
2319 db(0x66); db(0x68); dw(imm);
2320 } else {
2321 db(0x68); dd(imm);
2322 }
2323 }
2324 /* use "push(word, 4)" if you want "push word 4" */
2325 void push(uint32 imm)
2326 {
2327 if (inner::IsInDisp8(imm)) {
2328 push(byte, imm);
2329 } else {
2330 push(dword, imm);
2331 }
2332 }
2333 void mov(const Operand& reg1, const Operand& reg2)
2334 {
2335 const Reg *reg = 0;
2336 const Address *addr = 0;
2337 uint8 code = 0;
2338 if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp]
2339 reg = &reg1.getReg();
2340 addr= &reg2.getAddress();
2341 code = 0xA0;
2342 } else
2343 if (reg1.isMEM() && reg2.isREG() && reg2.getIdx() == 0) { // mov [disp], eax|ax|al
2344 reg = &reg2.getReg();
2345 addr= &reg1.getAddress();
2346 code = 0xA2;
2347 }
2348#ifdef XBYAK64
2349 if (addr && addr->is64bitDisp()) {
2350 if (code) {
2351 rex(*reg);
2352 db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3);
2353 db(addr->getDisp(), 8);
2354 } else {
2355 throw Error(ERR_BAD_COMBINATION);
2356 }
2357 } else
2358#else
2359 if (code && addr->isOnlyDisp()) {
2360 rex(*reg, *addr);
2361 db(code | (reg->isBit(8) ? 0 : 1));
2362 dd(static_cast<uint32>(addr->getDisp()));
2363 } else
2364#endif
2365 {
2366 opRM_RM(reg1, reg2, 0x88);
2367 }
2368 }
2369 void mov(const Operand& op, size_t imm)
2370 {
2371 if (op.isREG()) {
2372 const int size = mov_imm(op.getReg(), imm);
2373 db(imm, size);
2374 } else if (op.isMEM()) {
2375 verifyMemHasSize(op);
2376 int immSize = op.getBit() / 8;
2377 if (immSize <= 4) {
2378 sint64 s = sint64(imm) >> (immSize * 8);
2379 if (s != 0 && s != -1) throw Error(ERR_IMM_IS_TOO_BIG);
2380 } else {
2381 if (!inner::IsInInt32(imm)) throw Error(ERR_IMM_IS_TOO_BIG);
2382 immSize = 4;
2383 }
2384 opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize);
2385 db(static_cast<uint32>(imm), immSize);
2386 } else {
2387 throw Error(ERR_BAD_COMBINATION);
2388 }
2389 }
2390 void mov(const NativeReg& reg, const char *label) // can't use std::string
2391 {
2392 if (label == 0) {
2393 mov(static_cast<const Operand&>(reg), 0); // call imm
2394 return;
2395 }
2396 mov_imm(reg, dummyAddr);
2397 putL(label);
2398 }
2399 void mov(const NativeReg& reg, const Label& label)
2400 {
2401 mov_imm(reg, dummyAddr);
2402 putL(label);
2403 }
2404 void xchg(const Operand& op1, const Operand& op2)
2405 {
2406 const Operand *p1 = &op1, *p2 = &op2;
2407 if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) {
2408 p1 = &op2; p2 = &op1;
2409 }
2410 if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION);
2411 if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0)
2412#ifdef XBYAK64
2413 && (p2->getIdx() != 0 || !p1->isREG(32))
2414#endif
2415 ) {
2416 rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7));
2417 return;
2418 }
2419 opModRM(*p1, *p2, (p1->isREG() && p2->isREG() && (p1->getBit() == p2->getBit())), p2->isMEM(), 0x86 | (p1->isBit(8) ? 0 : 1));
2420 }
2421
2422#ifndef XBYAK_DISABLE_SEGMENT
2423 void push(const Segment& seg)
2424 {
2425 switch (seg.getIdx()) {
2426 case Segment::es: db(0x06); break;
2427 case Segment::cs: db(0x0E); break;
2428 case Segment::ss: db(0x16); break;
2429 case Segment::ds: db(0x1E); break;
2430 case Segment::fs: db(0x0F); db(0xA0); break;
2431 case Segment::gs: db(0x0F); db(0xA8); break;
2432 default:
2433 assert(0);
2434 }
2435 }
2436 void pop(const Segment& seg)
2437 {
2438 switch (seg.getIdx()) {
2439 case Segment::es: db(0x07); break;
2440 case Segment::cs: throw Error(ERR_BAD_COMBINATION);
2441 case Segment::ss: db(0x17); break;
2442 case Segment::ds: db(0x1F); break;
2443 case Segment::fs: db(0x0F); db(0xA1); break;
2444 case Segment::gs: db(0x0F); db(0xA9); break;
2445 default:
2446 assert(0);
2447 }
2448 }
2449 void putSeg(const Segment& seg)
2450 {
2451 switch (seg.getIdx()) {
2452 case Segment::es: db(0x2E); break;
2453 case Segment::cs: db(0x36); break;
2454 case Segment::ss: db(0x3E); break;
2455 case Segment::ds: db(0x26); break;
2456 case Segment::fs: db(0x64); break;
2457 case Segment::gs: db(0x65); break;
2458 default:
2459 assert(0);
2460 }
2461 }
2462 void mov(const Operand& op, const Segment& seg)
2463 {
2464 opModRM(Reg8(seg.getIdx()), op, op.isREG(16|i32e), op.isMEM(), 0x8C);
2465 }
2466 void mov(const Segment& seg, const Operand& op)
2467 {
2468 opModRM(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, op.isREG(16|i32e), op.isMEM(), 0x8E);
2469 }
2470#endif
2471
2472 enum { NONE = 256 };
2473 // constructor
2474 CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0, Allocator *allocator = 0)
2475 : CodeArray(maxSize, userPtr, allocator)
2476 , mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7)
2477 , xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7)
2478 , ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7)
2479 , zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7)
2480 // for my convenience
2481 , xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7)
2482 , ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7)
2483 , zm0(zmm0), zm1(zmm1), zm2(zmm2), zm3(zmm3), zm4(zmm4), zm5(zmm5), zm6(zmm6), zm7(zmm7)
2484
2485 , eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI)
2486 , ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI)
2487 , al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH)
2488 , ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512)
2489 , ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true)
2490 , st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7)
2491 , k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7)
2492 , bnd0(0), bnd1(1), bnd2(2), bnd3(3)
2493 , T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE)
2494 , T_z()
2495#ifdef XBYAK64
2496 , rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15)
2497 , r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15)
2498 , r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15)
2499 , r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15)
2500 , spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true)
2501 , xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15)
2502 , xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23)
2503 , xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31)
2504 , ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15)
2505 , ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23)
2506 , ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31)
2507 , zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15)
2508 , zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23)
2509 , zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31)
2510 // for my convenience
2511 , xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15)
2512 , xm16(xmm16), xm17(xmm17), xm18(xmm18), xm19(xmm19), xm20(xmm20), xm21(xmm21), xm22(xmm22), xm23(xmm23)
2513 , xm24(xmm24), xm25(xmm25), xm26(xmm26), xm27(xmm27), xm28(xmm28), xm29(xmm29), xm30(xmm30), xm31(xmm31)
2514 , ym8(ymm8), ym9(ymm9), ym10(ymm10), ym11(ymm11), ym12(ymm12), ym13(ymm13), ym14(ymm14), ym15(ymm15)
2515 , ym16(ymm16), ym17(ymm17), ym18(ymm18), ym19(ymm19), ym20(ymm20), ym21(ymm21), ym22(ymm22), ym23(ymm23)
2516 , ym24(ymm24), ym25(ymm25), ym26(ymm26), ym27(ymm27), ym28(ymm28), ym29(ymm29), ym30(ymm30), ym31(ymm31)
2517 , zm8(zmm8), zm9(zmm9), zm10(zmm10), zm11(zmm11), zm12(zmm12), zm13(zmm13), zm14(zmm14), zm15(zmm15)
2518 , zm16(zmm16), zm17(zmm17), zm18(zmm18), zm19(zmm19), zm20(zmm20), zm21(zmm21), zm22(zmm22), zm23(zmm23)
2519 , zm24(zmm24), zm25(zmm25), zm26(zmm26), zm27(zmm27), zm28(zmm28), zm29(zmm29), zm30(zmm30), zm31(zmm31)
2520 , rip()
2521#endif
2522#ifndef XBYAK_DISABLE_SEGMENT
2523 , es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
2524#endif
2525 {
2526 labelMgr_.set(this);
2527 }
2528 void reset()
2529 {
2530 resetSize();
2531 labelMgr_.reset();
2532 labelMgr_.set(this);
2533 }
2534 bool hasUndefinedLabel() const { return labelMgr_.hasUndefSlabel() || labelMgr_.hasUndefClabel(); }
2535 /*
2536 MUST call ready() to complete generating code if you use AutoGrow mode.
2537 It is not necessary for the other mode if hasUndefinedLabel() is true.
2538 */
2539 void ready(ProtectMode mode = PROTECT_RWE)
2540 {
2541 if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND);
2542 if (isAutoGrow()) {
2543 calcJmpAddress();
2544 if (useProtect()) setProtectMode(mode);
2545 }
2546 }
2547 // set read/exec
2548 void readyRE() { return ready(PROTECT_RE); }
2549#ifdef XBYAK_TEST
2550 void dump(bool doClear = true)
2551 {
2552 CodeArray::dump();
2553 if (doClear) size_ = 0;
2554 }
2555#endif
2556
2557#ifdef XBYAK_UNDEF_JNL
2558 #undef jnl
2559#endif
2560
2561 /*
2562 use single byte nop if useMultiByteNop = false
2563 */
2564 void nop(size_t size = 1, bool useMultiByteNop = true)
2565 {
2566 if (!useMultiByteNop) {
2567 for (size_t i = 0; i < size; i++) {
2568 db(0x90);
2569 }
2570 return;
2571 }
2572 /*
2573 Intel Architectures Software Developer's Manual Volume 2
2574 recommended multi-byte sequence of NOP instruction
2575 AMD and Intel seem to agree on the same sequences for up to 9 bytes:
2576 https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf
2577 */
2578 static const uint8 nopTbl[9][9] = {
2579 {0x90},
2580 {0x66, 0x90},
2581 {0x0F, 0x1F, 0x00},
2582 {0x0F, 0x1F, 0x40, 0x00},
2583 {0x0F, 0x1F, 0x44, 0x00, 0x00},
2584 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
2585 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
2586 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
2587 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
2588 };
2589 const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]);
2590 while (size > 0) {
2591 size_t len = (std::min)(n, size);
2592 const uint8 *seq = nopTbl[len - 1];
2593 db(seq, len);
2594 size -= len;
2595 }
2596 }
2597
2598#ifndef XBYAK_DONT_READ_LIST
2599#include "xbyak_mnemonic.h"
2600 /*
2601 use single byte nop if useMultiByteNop = false
2602 */
2603 void align(size_t x = 16, bool useMultiByteNop = true)
2604 {
2605 if (x == 1) return;
2606 if (x < 1 || (x & (x - 1))) throw Error(ERR_BAD_ALIGN);
2607 if (isAutoGrow() && x > inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", (int)x);
2608 size_t remain = size_t(getCurr()) % x;
2609 if (remain) {
2610 nop(x - remain, useMultiByteNop);
2611 }
2612 }
2613#endif
2614};
2615
2616namespace util {
2617static const Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7);
2618static const Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7);
2619static const Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7);
2620static const Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7);
2621static const Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI);
2622static const Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI);
2623static const Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH);
2624static const AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512);
2625static const AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true);
2626static const Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7);
2627static const Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7);
2628static const BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3);
2629static const EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE);
2630static const EvexModifierZero T_z;
2631#ifdef XBYAK64
2632static const Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15);
2633static const Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15);
2634static const Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15);
2635static const Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true);
2636static const Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15);
2637static const Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23);
2638static const Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31);
2639static const Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15);
2640static const Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23);
2641static const Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31);
2642static const Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15);
2643static const Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23);
2644static const Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31);
2645static const RegRip rip;
2646#endif
2647#ifndef XBYAK_DISABLE_SEGMENT
2648static const Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs);
2649#endif
2650} // util
2651
2652#ifdef _MSC_VER
2653 #pragma warning(pop)
2654#endif
2655
2656} // end of namespace
2657
2658#endif // XBYAK_XBYAK_H_
2659