| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | // |
| 5 | // File: AMD64/VirtualCallStubCpu.hpp |
| 6 | // |
| 7 | |
| 8 | |
| 9 | |
| 10 | // |
| 11 | |
| 12 | // See code:VirtualCallStubManager for details |
| 13 | // |
| 14 | // ============================================================================ |
| 15 | |
| 16 | #ifndef _VIRTUAL_CALL_STUB_AMD64_H |
| 17 | #define _VIRTUAL_CALL_STUB_AMD64_H |
| 18 | |
| 19 | #include "dbginterface.h" |
| 20 | |
| 21 | //#define STUB_LOGGING |
| 22 | |
| 23 | #pragma pack(push, 1) |
| 24 | // since we are placing code, we want byte packing of the structs |
| 25 | |
| 26 | #define USES_LOOKUP_STUBS 1 |
| 27 | |
| 28 | /********************************************************************************************* |
| 29 | Stubs that contain code are all part of larger structs called Holders. There is a |
| 30 | Holder for each kind of stub, i.e XXXStub is contained with XXXHolder. Holders are |
| 31 | essentially an implementation trick that allowed rearranging the code sequences more |
| 32 | easily while trying out different alternatives, and for dealing with any alignment |
| 33 | issues in a way that was mostly immune to the actually code sequences. These Holders |
| 34 | should be revisited when the stub code sequences are fixed, since in many cases they |
| 35 | add extra space to a stub that is not really needed. |
| 36 | |
| 37 | Stubs are placed in cache and hash tables. Since unaligned access of data in memory |
| 38 | is very slow, the keys used in those tables should be aligned. The things used as keys |
| 39 | typically also occur in the generated code, e.g. a token as an immediate part of an instruction. |
| 40 | For now, to avoid alignment computations as different code strategies are tried out, the key |
| 41 | fields are all in the Holders. Eventually, many of these fields should be dropped, and the instruction |
| 42 | streams aligned so that the immediate fields fall on aligned boundaries. |
| 43 | */ |
| 44 | |
| 45 | #if USES_LOOKUP_STUBS |
| 46 | |
| 47 | struct LookupStub; |
| 48 | struct LookupHolder; |
| 49 | |
| 50 | /*LookupStub************************************************************************************** |
| 51 | Virtual and interface call sites are initially setup to point at LookupStubs. |
| 52 | This is because the runtime type of the <this> pointer is not yet known, |
| 53 | so the target cannot be resolved. Note: if the jit is able to determine the runtime type |
| 54 | of the <this> pointer, it should be generating a direct call not a virtual or interface call. |
| 55 | This stub pushes a lookup token onto the stack to identify the sought after method, and then |
| 56 | jumps into the EE (VirtualCallStubManager::ResolveWorkerStub) to effectuate the lookup and |
| 57 | transfer of control to the appropriate target method implementation, perhaps patching of the call site |
| 58 | along the way to point to a more appropriate stub. Hence callsites that point to LookupStubs |
| 59 | get quickly changed to point to another kind of stub. |
| 60 | */ |
| 61 | struct LookupStub |
| 62 | { |
| 63 | inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } |
| 64 | |
| 65 | inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } |
| 66 | inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } |
| 67 | |
| 68 | private: |
| 69 | friend struct LookupHolder; |
| 70 | |
| 71 | // The lookup entry point starts with a nop in order to allow us to quickly see |
| 72 | // if the stub is lookup stub or a dispatch stub. We can read thye first byte |
| 73 | // of a stub to find out what kind of a stub we have. |
| 74 | |
| 75 | BYTE _entryPoint [3]; // 90 nop |
| 76 | // 48 B8 mov rax, |
| 77 | size_t _token; // xx xx xx xx xx xx xx xx 64-bit address |
| 78 | BYTE part2 [3]; // 50 push rax |
| 79 | // 48 B8 mov rax, |
| 80 | size_t _resolveWorkerAddr; // xx xx xx xx xx xx xx xx 64-bit address |
| 81 | BYTE part3 [2]; // FF E0 jmp rax |
| 82 | }; |
| 83 | |
| 84 | /* LookupHolders are the containers for LookupStubs, they provide for any alignment of |
| 85 | stubs as necessary. In the case of LookupStubs, alignment is necessary since |
| 86 | LookupStubs are placed in a hash table keyed by token. */ |
| 87 | struct LookupHolder |
| 88 | { |
| 89 | static void InitializeStatic(); |
| 90 | |
| 91 | void Initialize(PCODE resolveWorkerTarget, size_t dispatchToken); |
| 92 | |
| 93 | LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } |
| 94 | |
| 95 | static LookupHolder* FromLookupEntry(PCODE lookupEntry); |
| 96 | |
| 97 | private: |
| 98 | friend struct LookupStub; |
| 99 | |
| 100 | LookupStub _stub; |
| 101 | }; |
| 102 | |
| 103 | #endif // USES_LOOKUP_STUBS |
| 104 | |
| 105 | struct DispatchStub; |
| 106 | struct DispatchStubShort; |
| 107 | struct DispatchStubLong; |
| 108 | struct DispatchHolder; |
| 109 | |
| 110 | /*DispatchStub************************************************************************************** |
| 111 | The structure of a full dispatch stub in memory is a DispatchStub followed contiguously in memory |
| 112 | by either a DispatchStubShort of a DispatchStubLong. DispatchStubShort is used when the resolve |
| 113 | stub (failTarget()) is reachable by a rel32 (DISPL) jump. We make a pretty good effort to make sure |
| 114 | that the stub heaps are set up so that this is the case. If we allocate enough stubs that the heap |
| 115 | end up allocating in a new block that is further away than a DISPL jump can go, then we end up using |
| 116 | a DispatchStubLong which is bigger but is a full 64-bit jump. */ |
| 117 | |
| 118 | /*DispatchStubShort********************************************************************************* |
| 119 | This is the logical continuation of DispatchStub for the case when the failure target is within |
| 120 | a rel32 jump (DISPL). */ |
| 121 | struct DispatchStubShort |
| 122 | { |
| 123 | friend struct DispatchHolder; |
| 124 | friend struct DispatchStub; |
| 125 | |
| 126 | static BOOL isShortStub(LPCBYTE pCode); |
| 127 | inline PCODE implTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _implTarget; } |
| 128 | inline PCODE failTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) &_failDispl + sizeof(DISPL) + _failDispl; } |
| 129 | |
| 130 | private: |
| 131 | BYTE part1 [2]; // 0f 85 jne |
| 132 | DISPL _failDispl; // xx xx xx xx failEntry ;must be forward jmp for perf reasons |
| 133 | BYTE part2 [2]; // 48 B8 mov rax, |
| 134 | size_t _implTarget; // xx xx xx xx xx xx xx xx 64-bit address |
| 135 | BYTE part3 [2]; // FF E0 jmp rax |
| 136 | |
| 137 | // 31 bytes long, need 1 byte of padding to 8-byte align. |
| 138 | BYTE alignPad [1]; // cc |
| 139 | }; |
| 140 | |
| 141 | inline BOOL DispatchStubShort::isShortStub(LPCBYTE pCode) |
| 142 | { |
| 143 | LIMITED_METHOD_CONTRACT; |
| 144 | return reinterpret_cast<DispatchStubShort const *>(pCode)->part1[0] == 0x0f; |
| 145 | } |
| 146 | |
| 147 | |
| 148 | /*DispatchStubLong********************************************************************************** |
| 149 | This is the logical continuation of DispatchStub for the case when the failure target is not |
| 150 | reachable by a rel32 jump (DISPL). */ |
| 151 | struct DispatchStubLong |
| 152 | { |
| 153 | friend struct DispatchHolder; |
| 154 | friend struct DispatchStub; |
| 155 | |
| 156 | static inline BOOL isLongStub(LPCBYTE pCode); |
| 157 | inline PCODE implTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _implTarget; } |
| 158 | inline PCODE failTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _failTarget; } |
| 159 | |
| 160 | private: |
| 161 | BYTE part1 [1]; // 75 jne |
| 162 | BYTE _failDispl; // xx failLabel |
| 163 | BYTE part2 [2]; // 48 B8 mov rax, |
| 164 | size_t _implTarget; // xx xx xx xx xx xx xx xx 64-bit address |
| 165 | BYTE part3 [2]; // FF E0 jmp rax |
| 166 | // failLabel: |
| 167 | BYTE part4 [2]; // 48 B8 mov rax, |
| 168 | size_t _failTarget; // xx xx xx xx xx xx xx xx 64-bit address |
| 169 | BYTE part5 [2]; // FF E0 jmp rax |
| 170 | |
| 171 | // 39 bytes long, need 1 byte of padding to 8-byte align. |
| 172 | BYTE alignPad [1]; // cc |
| 173 | }; |
| 174 | |
| 175 | inline BOOL DispatchStubLong::isLongStub(LPCBYTE pCode) |
| 176 | { |
| 177 | LIMITED_METHOD_CONTRACT; |
| 178 | return reinterpret_cast<DispatchStubLong const *>(pCode)->part1[0] == 0x75; |
| 179 | } |
| 180 | |
| 181 | /*DispatchStub************************************************************************************** |
| 182 | Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs. |
| 183 | A dispatch stub has an expected type (expectedMT), target address (target) and fail address (failure). |
| 184 | If the calling frame does in fact have the <this> type be of the expected type, then |
| 185 | control is transfered to the target address, the method implementation. If not, |
| 186 | then control is transfered to the fail address, a fail stub (see below) where a polymorphic |
| 187 | lookup is done to find the correct address to go to. |
| 188 | |
| 189 | implementation note: Order, choice of instructions, and branch directions |
| 190 | should be carefully tuned since it can have an inordinate effect on performance. Particular |
| 191 | attention needs to be paid to the effects on the BTB and branch prediction, both in the small |
| 192 | and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions. |
| 193 | Note that since this stub is only used for mostly monomorphic callsites (ones that are not, get patched |
| 194 | to something else), therefore the conditional jump "jne failure" is mostly not taken, and hence it is important |
| 195 | that the branch prediction staticly predict this, which means it must be a forward jump. The alternative |
| 196 | is to reverse the order of the jumps and make sure that the resulting conditional jump "je implTarget" |
| 197 | is statically predicted as taken, i.e a backward jump. The current choice was taken since it was easier |
| 198 | to control the placement of the stubs than control the placement of the jitted code and the stubs. */ |
| 199 | struct DispatchStub |
| 200 | { |
| 201 | friend struct DispatchHolder; |
| 202 | |
| 203 | enum DispatchStubType |
| 204 | { |
| 205 | e_TYPE_SHORT, |
| 206 | e_TYPE_LONG, |
| 207 | }; |
| 208 | |
| 209 | inline DispatchStubType type() const |
| 210 | { |
| 211 | LIMITED_METHOD_CONTRACT; |
| 212 | CONSISTENCY_CHECK(DispatchStubShort::isShortStub(reinterpret_cast<LPCBYTE>(this + 1)) |
| 213 | || DispatchStubLong::isLongStub(reinterpret_cast<LPCBYTE>(this + 1))); |
| 214 | return DispatchStubShort::isShortStub((BYTE *)(this + 1)) ? e_TYPE_SHORT : e_TYPE_LONG; |
| 215 | } |
| 216 | |
| 217 | inline static size_t size(DispatchStubType type) |
| 218 | { |
| 219 | STATIC_CONTRACT_LEAF; |
| 220 | return sizeof(DispatchStub) + |
| 221 | ((type == e_TYPE_SHORT) ? sizeof(DispatchStubShort) : sizeof(DispatchStubLong)); |
| 222 | } |
| 223 | |
| 224 | inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } |
| 225 | inline size_t expectedMT() const { LIMITED_METHOD_CONTRACT; return _expectedMT; } |
| 226 | inline size_t size() const { WRAPPER_NO_CONTRACT; return size(type()); } |
| 227 | |
| 228 | inline PCODE implTarget() const |
| 229 | { |
| 230 | LIMITED_METHOD_CONTRACT; |
| 231 | if (type() == e_TYPE_SHORT) |
| 232 | return getShortStub()->implTarget(); |
| 233 | else |
| 234 | return getLongStub()->implTarget(); |
| 235 | } |
| 236 | |
| 237 | inline PCODE failTarget() const |
| 238 | { |
| 239 | if (type() == e_TYPE_SHORT) |
| 240 | return getShortStub()->failTarget(); |
| 241 | else |
| 242 | return getLongStub()->failTarget(); |
| 243 | } |
| 244 | |
| 245 | private: |
| 246 | inline DispatchStubShort const *getShortStub() const |
| 247 | { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStubShort const *>(this + 1); } |
| 248 | |
| 249 | inline DispatchStubLong const *getLongStub() const |
| 250 | { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStubLong const *>(this + 1); } |
| 251 | |
| 252 | BYTE _entryPoint [2]; // 48 B8 mov rax, |
| 253 | size_t _expectedMT; // xx xx xx xx xx xx xx xx 64-bit address |
| 254 | BYTE part1 [3]; // 48 39 XX cmp [THIS_REG], rax |
| 255 | |
| 256 | // Followed by either DispatchStubShort or DispatchStubLong, depending |
| 257 | // on whether we were able to make a rel32 or had to make an abs64 jump |
| 258 | // to the resolve stub on failure. |
| 259 | |
| 260 | }; |
| 261 | |
| 262 | /* DispatchHolders are the containers for DispatchStubs, they provide for any alignment of |
| 263 | stubs as necessary. DispatchStubs are placed in a hashtable and in a cache. The keys for both |
| 264 | are the pair expectedMT and token. Efficiency of the of the hash table is not a big issue, |
| 265 | since lookups in it are fairly rare. Efficiency of the cache is paramount since it is accessed frequently |
| 266 | (see ResolveStub below). Currently we are storing both of these fields in the DispatchHolder to simplify |
| 267 | alignment issues. If inlineMT in the stub itself was aligned, then it could be the expectedMT field. |
| 268 | While the token field can be logically gotten by following the failure target to the failEntryPoint |
| 269 | of the ResolveStub and then to the token over there, for perf reasons of cache access, it is duplicated here. |
| 270 | This allows us to use DispatchStubs in the cache. The alternative is to provide some other immutable struct |
| 271 | for the cache composed of the triplet (expectedMT, token, target) and some sort of reclaimation scheme when |
| 272 | they are thrown out of the cache via overwrites (since concurrency will make the obvious approaches invalid). |
| 273 | */ |
| 274 | |
| 275 | /* @workaround for ee resolution - Since the EE does not currently have a resolver function that |
| 276 | does what we want, see notes in implementation of VirtualCallStubManager::Resolver, we are |
| 277 | using dispatch stubs to siumulate what we want. That means that inlineTarget, which should be immutable |
| 278 | is in fact written. Hence we have moved target out into the holder and aligned it so we can |
| 279 | atomically update it. When we get a resolver function that does what we want, we can drop this field, |
| 280 | and live with just the inlineTarget field in the stub itself, since immutability will hold.*/ |
| 281 | struct DispatchHolder |
| 282 | { |
| 283 | static void InitializeStatic(); |
| 284 | |
| 285 | void Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT, |
| 286 | DispatchStub::DispatchStubType type); |
| 287 | |
| 288 | static size_t GetHolderSize(DispatchStub::DispatchStubType type) |
| 289 | { STATIC_CONTRACT_WRAPPER; return DispatchStub::size(type); } |
| 290 | |
| 291 | static BOOL CanShortJumpDispatchStubReachFailTarget(PCODE failTarget, LPCBYTE stubMemory) |
| 292 | { |
| 293 | STATIC_CONTRACT_WRAPPER; |
| 294 | LPCBYTE pFrom = stubMemory + sizeof(DispatchStub) + offsetof(DispatchStubShort, part2[0]); |
| 295 | size_t cbRelJump = failTarget - (PCODE)pFrom; |
| 296 | return FitsInI4(cbRelJump); |
| 297 | } |
| 298 | |
| 299 | DispatchStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStub *>(this); } |
| 300 | |
| 301 | static DispatchHolder* FromDispatchEntry(PCODE dispatchEntry); |
| 302 | |
| 303 | private: |
| 304 | // DispatchStub follows here. It is dynamically sized on allocation |
| 305 | // because it could be a DispatchStubLong or a DispatchStubShort |
| 306 | }; |
| 307 | |
| 308 | struct ResolveStub; |
| 309 | struct ResolveHolder; |
| 310 | |
| 311 | /*ResolveStub************************************************************************************** |
| 312 | Polymorphic call sites and monomorphic calls that fail end up in a ResolverStub. There is only |
| 313 | one resolver stub built for any given token, even though there may be many call sites that |
| 314 | use that token and many distinct <this> types that are used in the calling call frames. A resolver stub |
| 315 | actually has two entry points, one for polymorphic call sites and one for dispatch stubs that fail on their |
| 316 | expectedMT test. There is a third part of the resolver stub that enters the ee when a decision should |
| 317 | be made about changing the callsite. Therefore, we have defined the resolver stub as three distinct pieces, |
| 318 | even though they are actually allocated as a single contiguous block of memory. These pieces are: |
| 319 | |
| 320 | A ResolveStub has two entry points: |
| 321 | |
| 322 | FailEntry - where the dispatch stub goes if the expected MT test fails. This piece of the stub does |
| 323 | a check to see how often we are actually failing. If failures are frequent, control transfers to the |
| 324 | patch piece to cause the call site to be changed from a mostly monomorphic callsite |
| 325 | (calls dispatch stub) to a polymorphic callsize (calls resolve stub). If failures are rare, control |
| 326 | transfers to the resolve piece (see ResolveStub). The failEntryPoint decrements a counter |
| 327 | every time it is entered. The ee at various times will add a large chunk to the counter. |
| 328 | |
| 329 | ResolveEntry - does a lookup via in a cache by hashing the actual type of the calling frame s |
| 330 | <this> and the token identifying the (contract,method) pair desired. If found, control is transfered |
| 331 | to the method implementation. If not found in the cache, the token is pushed and the ee is entered via |
| 332 | the ResolveWorkerStub to do a full lookup and eventual transfer to the correct method implementation. Since |
| 333 | there is a different resolve stub for every token, the token can be inlined and the token can be pre-hashed. |
| 334 | The effectiveness of this approach is highly sensitive to the effectiveness of the hashing algorithm used, |
| 335 | as well as its speed. It turns out it is very important to make the hash function sensitive to all |
| 336 | of the bits of the method table, as method tables are laid out in memory in a very non-random way. Before |
| 337 | making any changes to the code sequences here, it is very important to measure and tune them as perf |
| 338 | can vary greatly, in unexpected ways, with seeming minor changes. |
| 339 | |
| 340 | Implementation note - Order, choice of instructions, and branch directions |
| 341 | should be carefully tuned since it can have an inordinate effect on performance. Particular |
| 342 | attention needs to be paid to the effects on the BTB and branch prediction, both in the small |
| 343 | and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions. |
| 344 | Note that this stub is called in highly polymorphic cases, but the cache should have been sized |
| 345 | and the hash function chosen to maximize the cache hit case. Hence the cmp/jcc instructions should |
| 346 | mostly be going down the cache hit route, and it is important that this be statically predicted as so. |
| 347 | Hence the 3 jcc instrs need to be forward jumps. As structured, there is only one jmp/jcc that typically |
| 348 | gets put in the BTB since all the others typically fall straight thru. Minimizing potential BTB entries |
| 349 | is important. */ |
| 350 | |
| 351 | struct ResolveStub |
| 352 | { |
| 353 | inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0]; } |
| 354 | inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; } |
| 355 | inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; } |
| 356 | |
| 357 | inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; } |
| 358 | inline UINT32 hashedToken() { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE; } |
| 359 | inline size_t cacheAddress() { LIMITED_METHOD_CONTRACT; return _cacheAddress; } |
| 360 | inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; } |
| 361 | inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); } |
| 362 | |
| 363 | private: |
| 364 | friend struct ResolveHolder; |
| 365 | |
| 366 | BYTE _resolveEntryPoint[3];// resolveStub: |
| 367 | // 52 push rdx |
| 368 | // 49 BA mov r10, |
| 369 | size_t _cacheAddress; // xx xx xx xx xx xx xx xx 64-bit address |
| 370 | BYTE part1 [15]; // 48 8B XX mov rax, [THIS_REG] ; Compute hash = ((MT + MT>>12) ^ prehash) |
| 371 | // 48 8B D0 mov rdx, rax ; rdx <- current MethodTable |
| 372 | // 48 C1 E8 0C shr rax, 12 |
| 373 | // 48 03 C2 add rax, rdx |
| 374 | // 48 35 xor rax, |
| 375 | UINT32 _hashedToken; // xx xx xx xx hashedtoken ; xor with pre-hashed token |
| 376 | BYTE part2 [2]; // 48 25 and rax, |
| 377 | UINT32 mask; // xx xx xx xx cache_mask ; and with cache mask |
| 378 | BYTE part3 [6]; // 4A 8B 04 10 mov rax, [r10 + rax] ; get cache entry address |
| 379 | // 49 BA mov r10, |
| 380 | size_t _token; // xx xx xx xx xx xx xx xx 64-bit address |
| 381 | BYTE part4 [3]; // 48 3B 50 cmp rdx, [rax+ ; compare our MT vs. cache MT |
| 382 | BYTE mtOffset; // xx ResolverCacheElem.pMT] |
| 383 | BYTE part5 [1]; // 75 jne |
| 384 | BYTE toMiss1; // xx miss ; must be forward jump, for perf reasons |
| 385 | BYTE part6 [3]; // 4C 3B 50 cmp r10, [rax+ ; compare our token vs. cache token |
| 386 | BYTE tokenOffset; // xx ResolverCacheElem.token] |
| 387 | BYTE part7 [1]; // 75 jne |
| 388 | BYTE toMiss2; // xx miss ; must be forward jump, for perf reasons |
| 389 | BYTE part8 [3]; // 48 8B 40 mov rax, [rax+ ; setup rax with method impl address |
| 390 | BYTE targetOffset; // xx ResolverCacheElem.target] |
| 391 | BYTE part9 [3]; // 5A pop rdx |
| 392 | // FF E0 jmp rax |
| 393 | // failStub: |
| 394 | BYTE _failEntryPoint [2]; // 48 B8 mov rax, |
| 395 | INT32* _pCounter; // xx xx xx xx xx xx xx xx 64-bit address |
| 396 | BYTE part11 [4]; // 83 00 FF add dword ptr [rax], -1 |
| 397 | // 7d jnl |
| 398 | BYTE toResolveStub1; // xx resolveStub |
| 399 | BYTE part12 [4]; // 49 83 CB 01 or r11, 1 |
| 400 | BYTE _slowEntryPoint [3]; // 52 slow: push rdx |
| 401 | // 49 BA mov r10, |
| 402 | size_t _tokenSlow; // xx xx xx xx xx xx xx xx 64-bit address |
| 403 | // BYTE miss [5]; // 5A miss: pop rdx ; don't pop rdx |
| 404 | // // 41 52 push r10 ; don't push r10 leave it setup with token |
| 405 | BYTE miss [3]; // 50 push rax ; push ptr to cache elem |
| 406 | // 48 B8 mov rax, |
| 407 | size_t _resolveWorker; // xx xx xx xx xx xx xx xx 64-bit address |
| 408 | BYTE part10 [2]; // FF E0 jmp rax |
| 409 | }; |
| 410 | |
| 411 | /* ResolveHolders are the containers for ResolveStubs, They provide |
| 412 | for any alignment of the stubs as necessary. The stubs are placed in a hash table keyed by |
| 413 | the token for which they are built. Efficiency of access requires that this token be aligned. |
| 414 | For now, we have copied that field into the ResolveHolder itself, if the resolve stub is arranged such that |
| 415 | any of its inlined tokens (non-prehashed) is aligned, then the token field in the ResolveHolder |
| 416 | is not needed. */ |
| 417 | struct ResolveHolder |
| 418 | { |
| 419 | static void InitializeStatic(); |
| 420 | |
| 421 | void Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, |
| 422 | size_t dispatchToken, UINT32 hashedToken, |
| 423 | void * cacheAddr, INT32* counterAddr); |
| 424 | |
| 425 | ResolveStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } |
| 426 | |
| 427 | static ResolveHolder* FromFailEntry(PCODE resolveEntry); |
| 428 | static ResolveHolder* FromResolveEntry(PCODE resolveEntry); |
| 429 | |
| 430 | private: |
| 431 | ResolveStub _stub; |
| 432 | }; |
| 433 | |
| 434 | /*VTableCallStub************************************************************************************** |
| 435 | These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed |
| 436 | in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the |
| 437 | vtable pointer, and finally jumps to the target method at a given slot in the vtable. |
| 438 | */ |
| 439 | struct VTableCallStub |
| 440 | { |
| 441 | friend struct VTableCallHolder; |
| 442 | |
| 443 | inline size_t size() |
| 444 | { |
| 445 | LIMITED_METHOD_CONTRACT; |
| 446 | |
| 447 | BYTE* pStubCode = (BYTE *)this; |
| 448 | |
| 449 | size_t cbSize = 3; // First mov instruction |
| 450 | cbSize += (pStubCode[cbSize + 2] == 0x80 ? 7 : 4); // Either 48 8B 80 or 48 8B 40: mov rax,[rax+offset] |
| 451 | cbSize += (pStubCode[cbSize + 1] == 0xa0 ? 6 : 3); // Either FF A0 or FF 60: jmp qword ptr [rax+slot] |
| 452 | cbSize += 4; // Slot value (data storage, not a real instruction) |
| 453 | |
| 454 | return cbSize; |
| 455 | } |
| 456 | |
| 457 | inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; } |
| 458 | |
| 459 | inline size_t token() |
| 460 | { |
| 461 | LIMITED_METHOD_CONTRACT; |
| 462 | DWORD slot = *(DWORD*)(reinterpret_cast<BYTE*>(this) + size() - 4); |
| 463 | return DispatchToken::CreateDispatchToken(slot).To_SIZE_T(); |
| 464 | } |
| 465 | |
| 466 | private: |
| 467 | BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details. |
| 468 | }; |
| 469 | |
| 470 | /* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of |
| 471 | stubs as necessary. */ |
| 472 | struct VTableCallHolder |
| 473 | { |
| 474 | void Initialize(unsigned slot); |
| 475 | |
| 476 | VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub *>(this); } |
| 477 | |
| 478 | static size_t GetHolderSize(unsigned slot) |
| 479 | { |
| 480 | STATIC_CONTRACT_WRAPPER; |
| 481 | unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; |
| 482 | unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; |
| 483 | return 3 + (offsetOfIndirection >= 0x80 ? 7 : 4) + (offsetAfterIndirection >= 0x80 ? 6 : 3) + 4; |
| 484 | } |
| 485 | |
| 486 | static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry) { LIMITED_METHOD_CONTRACT; return (VTableCallHolder*)entry; } |
| 487 | |
| 488 | private: |
| 489 | // VTableCallStub follows here. It is dynamically sized on allocation because it could |
| 490 | // use short/long instruction sizes for mov/jmp, depending on the slot value. |
| 491 | }; |
| 492 | #pragma pack(pop) |
| 493 | |
| 494 | #ifdef DECLARE_DATA |
| 495 | |
| 496 | LookupStub lookupInit; |
| 497 | DispatchStub dispatchInit; |
| 498 | DispatchStubShort dispatchShortInit; |
| 499 | DispatchStubLong dispatchLongInit; |
| 500 | ResolveStub resolveInit; |
| 501 | |
| 502 | #define INSTR_INT3 0xcc |
| 503 | #define INSTR_NOP 0x90 |
| 504 | |
| 505 | #ifndef DACCESS_COMPILE |
| 506 | |
| 507 | #include "asmconstants.h" |
| 508 | |
| 509 | #ifdef STUB_LOGGING |
| 510 | extern size_t g_lookup_inline_counter; |
| 511 | extern size_t g_call_inline_counter; |
| 512 | extern size_t g_miss_inline_counter; |
| 513 | extern size_t g_call_cache_counter; |
| 514 | extern size_t g_miss_cache_counter; |
| 515 | #endif |
| 516 | |
| 517 | /* Template used to generate the stub. We generate a stub by allocating a block of |
| 518 | memory and copy the template over it and just update the specific fields that need |
| 519 | to be changed. |
| 520 | */ |
| 521 | |
| 522 | void LookupHolder::InitializeStatic() |
| 523 | { |
| 524 | static_assert_no_msg((sizeof(LookupHolder) % sizeof(void*)) == 0); |
| 525 | |
| 526 | // The first instruction of a LookupStub is nop |
| 527 | // and we use it in order to differentiate the first two bytes |
| 528 | // of a LookupStub and a ResolveStub |
| 529 | lookupInit._entryPoint [0] = INSTR_NOP; |
| 530 | lookupInit._entryPoint [1] = 0x48; |
| 531 | lookupInit._entryPoint [2] = 0xB8; |
| 532 | lookupInit._token = 0xcccccccccccccccc; |
| 533 | lookupInit.part2 [0] = 0x50; |
| 534 | lookupInit.part2 [1] = 0x48; |
| 535 | lookupInit.part2 [2] = 0xB8; |
| 536 | lookupInit._resolveWorkerAddr = 0xcccccccccccccccc; |
| 537 | lookupInit.part3 [0] = 0xFF; |
| 538 | lookupInit.part3 [1] = 0xE0; |
| 539 | } |
| 540 | |
| 541 | void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken) |
| 542 | { |
| 543 | _stub = lookupInit; |
| 544 | |
| 545 | //fill in the stub specific fields |
| 546 | _stub._token = dispatchToken; |
| 547 | _stub._resolveWorkerAddr = (size_t) resolveWorkerTarget; |
| 548 | } |
| 549 | |
| 550 | /* Template used to generate the stub. We generate a stub by allocating a block of |
| 551 | memory and copy the template over it and just update the specific fields that need |
| 552 | to be changed. |
| 553 | */ |
| 554 | |
| 555 | void DispatchHolder::InitializeStatic() |
| 556 | { |
| 557 | // Check that _expectedMT is aligned in the DispatchHolder |
| 558 | static_assert_no_msg(((sizeof(DispatchStub)+sizeof(DispatchStubShort)) % sizeof(void*)) == 0); |
| 559 | static_assert_no_msg(((sizeof(DispatchStub)+sizeof(DispatchStubLong)) % sizeof(void*)) == 0); |
| 560 | CONSISTENCY_CHECK((offsetof(DispatchStubLong, part4[0]) - offsetof(DispatchStubLong, part2[0])) < INT8_MAX); |
| 561 | |
| 562 | // Common dispatch stub initialization |
| 563 | dispatchInit._entryPoint [0] = 0x48; |
| 564 | dispatchInit._entryPoint [1] = 0xB8; |
| 565 | dispatchInit._expectedMT = 0xcccccccccccccccc; |
| 566 | dispatchInit.part1 [0] = 0x48; |
| 567 | dispatchInit.part1 [1] = 0x39; |
| 568 | #ifdef UNIX_AMD64_ABI |
| 569 | dispatchInit.part1 [2] = 0x07; // RDI |
| 570 | #else |
| 571 | dispatchInit.part1 [2] = 0x01; // RCX |
| 572 | #endif |
| 573 | |
| 574 | // Short dispatch stub initialization |
| 575 | dispatchShortInit.part1 [0] = 0x0F; |
| 576 | dispatchShortInit.part1 [1] = 0x85; |
| 577 | dispatchShortInit._failDispl = 0xcccccccc; |
| 578 | dispatchShortInit.part2 [0] = 0x48; |
| 579 | dispatchShortInit.part2 [1] = 0xb8; |
| 580 | dispatchShortInit._implTarget = 0xcccccccccccccccc; |
| 581 | dispatchShortInit.part3 [0] = 0xFF; |
| 582 | dispatchShortInit.part3 [1] = 0xE0; |
| 583 | dispatchShortInit.alignPad [0] = INSTR_INT3; |
| 584 | |
| 585 | // Long dispatch stub initialization |
| 586 | dispatchLongInit.part1 [0] = 0x75; |
| 587 | dispatchLongInit._failDispl = BYTE(&dispatchLongInit.part4[0] - &dispatchLongInit.part2[0]); |
| 588 | dispatchLongInit.part2 [0] = 0x48; |
| 589 | dispatchLongInit.part2 [1] = 0xb8; |
| 590 | dispatchLongInit._implTarget = 0xcccccccccccccccc; |
| 591 | dispatchLongInit.part3 [0] = 0xFF; |
| 592 | dispatchLongInit.part3 [1] = 0xE0; |
| 593 | // failLabel: |
| 594 | dispatchLongInit.part4 [0] = 0x48; |
| 595 | dispatchLongInit.part4 [1] = 0xb8; |
| 596 | dispatchLongInit._failTarget = 0xcccccccccccccccc; |
| 597 | dispatchLongInit.part5 [0] = 0xFF; |
| 598 | dispatchLongInit.part5 [1] = 0xE0; |
| 599 | dispatchLongInit.alignPad [0] = INSTR_INT3; |
| 600 | }; |
| 601 | |
| 602 | void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT, |
| 603 | DispatchStub::DispatchStubType type) |
| 604 | { |
| 605 | // |
| 606 | // Initialize the common area |
| 607 | // |
| 608 | |
| 609 | // initialize the static data |
| 610 | *stub() = dispatchInit; |
| 611 | |
| 612 | // fill in the dynamic data |
| 613 | stub()->_expectedMT = expectedMT; |
| 614 | |
| 615 | // |
| 616 | // Initialize the short/long areas |
| 617 | // |
| 618 | if (type == DispatchStub::e_TYPE_SHORT) |
| 619 | { |
| 620 | DispatchStubShort *shortStub = const_cast<DispatchStubShort *>(stub()->getShortStub()); |
| 621 | |
| 622 | // initialize the static data |
| 623 | *shortStub = dispatchShortInit; |
| 624 | |
| 625 | // fill in the dynamic data |
| 626 | size_t displ = (failTarget - ((PCODE) &shortStub->_failDispl + sizeof(DISPL))); |
| 627 | CONSISTENCY_CHECK(FitsInI4(displ)); |
| 628 | shortStub->_failDispl = (DISPL) displ; |
| 629 | shortStub->_implTarget = (size_t) implTarget; |
| 630 | CONSISTENCY_CHECK((PCODE)&shortStub->_failDispl + sizeof(DISPL) + shortStub->_failDispl == failTarget); |
| 631 | } |
| 632 | else |
| 633 | { |
| 634 | CONSISTENCY_CHECK(type == DispatchStub::e_TYPE_LONG); |
| 635 | DispatchStubLong *longStub = const_cast<DispatchStubLong *>(stub()->getLongStub()); |
| 636 | |
| 637 | // initialize the static data |
| 638 | *longStub = dispatchLongInit; |
| 639 | |
| 640 | // fill in the dynamic data |
| 641 | longStub->_implTarget = implTarget; |
| 642 | longStub->_failTarget = failTarget; |
| 643 | } |
| 644 | } |
| 645 | |
| 646 | /* Template used to generate the stub. We generate a stub by allocating a block of |
| 647 | memory and copy the template over it and just update the specific fields that need |
| 648 | to be changed. |
| 649 | */ |
| 650 | |
| 651 | void ResolveHolder::InitializeStatic() |
| 652 | { |
| 653 | static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == 0); |
| 654 | |
| 655 | resolveInit._resolveEntryPoint [0] = 0x52; |
| 656 | resolveInit._resolveEntryPoint [1] = 0x49; |
| 657 | resolveInit._resolveEntryPoint [2] = 0xBA; |
| 658 | resolveInit._cacheAddress = 0xcccccccccccccccc; |
| 659 | resolveInit.part1 [ 0] = 0x48; |
| 660 | resolveInit.part1 [ 1] = 0x8B; |
| 661 | #ifdef UNIX_AMD64_ABI |
| 662 | resolveInit.part1 [ 2] = 0x07; // RDI |
| 663 | #else |
| 664 | resolveInit.part1 [ 2] = 0x01; // RCX |
| 665 | #endif |
| 666 | resolveInit.part1 [ 3] = 0x48; |
| 667 | resolveInit.part1 [ 4] = 0x8B; |
| 668 | resolveInit.part1 [ 5] = 0xD0; |
| 669 | resolveInit.part1 [ 6] = 0x48; |
| 670 | resolveInit.part1 [ 7] = 0xC1; |
| 671 | resolveInit.part1 [ 8] = 0xE8; |
| 672 | resolveInit.part1 [ 9] = CALL_STUB_CACHE_NUM_BITS; |
| 673 | resolveInit.part1 [10] = 0x48; |
| 674 | resolveInit.part1 [11] = 0x03; |
| 675 | resolveInit.part1 [12] = 0xC2; |
| 676 | resolveInit.part1 [13] = 0x48; |
| 677 | resolveInit.part1 [14] = 0x35; |
| 678 | // Review truncation from unsigned __int64 to UINT32 of a constant value. |
| 679 | #if defined(_MSC_VER) |
| 680 | #pragma warning(push) |
| 681 | #pragma warning(disable:4305 4309) |
| 682 | #endif // defined(_MSC_VER) |
| 683 | |
| 684 | resolveInit._hashedToken = 0xcccccccc; |
| 685 | |
| 686 | #if defined(_MSC_VER) |
| 687 | #pragma warning(pop) |
| 688 | #endif // defined(_MSC_VER) |
| 689 | |
| 690 | resolveInit.part2 [ 0] = 0x48; |
| 691 | resolveInit.part2 [ 1] = 0x25; |
| 692 | resolveInit.mask = CALL_STUB_CACHE_MASK*sizeof(void *); |
| 693 | resolveInit.part3 [0] = 0x4A; |
| 694 | resolveInit.part3 [1] = 0x8B; |
| 695 | resolveInit.part3 [2] = 0x04; |
| 696 | resolveInit.part3 [3] = 0x10; |
| 697 | resolveInit.part3 [4] = 0x49; |
| 698 | resolveInit.part3 [5] = 0xBA; |
| 699 | resolveInit._token = 0xcccccccccccccccc; |
| 700 | resolveInit.part4 [0] = 0x48; |
| 701 | resolveInit.part4 [1] = 0x3B; |
| 702 | resolveInit.part4 [2] = 0x50; |
| 703 | resolveInit.mtOffset = offsetof(ResolveCacheElem,pMT) & 0xFF; |
| 704 | resolveInit.part5 [0] = 0x75; |
| 705 | resolveInit.toMiss1 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss1)+1) & 0xFF; |
| 706 | resolveInit.part6 [0] = 0x4C; |
| 707 | resolveInit.part6 [1] = 0x3B; |
| 708 | resolveInit.part6 [2] = 0x50; |
| 709 | resolveInit.tokenOffset = offsetof(ResolveCacheElem,token) & 0xFF; |
| 710 | resolveInit.part7 [0] = 0x75; |
| 711 | resolveInit.toMiss2 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss2)+1) & 0xFF; |
| 712 | resolveInit.part8 [0] = 0x48; |
| 713 | resolveInit.part8 [1] = 0x8B; |
| 714 | resolveInit.part8 [2] = 0x40; |
| 715 | resolveInit.targetOffset = offsetof(ResolveCacheElem,target) & 0xFF; |
| 716 | resolveInit.part9 [0] = 0x5A; |
| 717 | resolveInit.part9 [1] = 0xFF; |
| 718 | resolveInit.part9 [2] = 0xE0; |
| 719 | resolveInit._failEntryPoint [0] = 0x48; |
| 720 | resolveInit._failEntryPoint [1] = 0xB8; |
| 721 | resolveInit._pCounter = (INT32*) (size_t) 0xcccccccccccccccc; |
| 722 | resolveInit.part11 [0] = 0x83; |
| 723 | resolveInit.part11 [1] = 0x00; |
| 724 | resolveInit.part11 [2] = 0xFF; |
| 725 | resolveInit.part11 [3] = 0x7D; |
| 726 | resolveInit.toResolveStub1 = (offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, toResolveStub1)+1)) & 0xFF; |
| 727 | resolveInit.part12 [0] = 0x49; |
| 728 | resolveInit.part12 [1] = 0x83; |
| 729 | resolveInit.part12 [2] = 0xCB; |
| 730 | resolveInit.part12 [3] = 0x01; |
| 731 | resolveInit._slowEntryPoint [0] = 0x52; |
| 732 | resolveInit._slowEntryPoint [1] = 0x49; |
| 733 | resolveInit._slowEntryPoint [2] = 0xBA; |
| 734 | resolveInit._tokenSlow = 0xcccccccccccccccc; |
| 735 | resolveInit.miss [0] = 0x50; |
| 736 | resolveInit.miss [1] = 0x48; |
| 737 | resolveInit.miss [2] = 0xB8; |
| 738 | resolveInit._resolveWorker = 0xcccccccccccccccc; |
| 739 | resolveInit.part10 [0] = 0xFF; |
| 740 | resolveInit.part10 [1] = 0xE0; |
| 741 | }; |
| 742 | |
| 743 | void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, |
| 744 | size_t dispatchToken, UINT32 hashedToken, |
| 745 | void * cacheAddr, INT32* counterAddr) |
| 746 | { |
| 747 | _stub = resolveInit; |
| 748 | |
| 749 | //fill in the stub specific fields |
| 750 | _stub._cacheAddress = (size_t) cacheAddr; |
| 751 | _stub._hashedToken = hashedToken << LOG2_PTRSIZE; |
| 752 | _stub._token = dispatchToken; |
| 753 | _stub._tokenSlow = dispatchToken; |
| 754 | _stub._resolveWorker = (size_t) resolveWorkerTarget; |
| 755 | _stub._pCounter = counterAddr; |
| 756 | } |
| 757 | |
| 758 | ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry) |
| 759 | { |
| 760 | LIMITED_METHOD_CONTRACT; |
| 761 | ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) ); |
| 762 | _ASSERTE(resolveHolder->_stub._resolveEntryPoint[1] == resolveInit._resolveEntryPoint[1]); |
| 763 | return resolveHolder; |
| 764 | } |
| 765 | |
| 766 | #endif // DACCESS_COMPILE |
| 767 | |
| 768 | LookupHolder* LookupHolder::FromLookupEntry(PCODE lookupEntry) |
| 769 | { |
| 770 | LIMITED_METHOD_CONTRACT; |
| 771 | LookupHolder* lookupHolder = (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint) ); |
| 772 | _ASSERTE(lookupHolder->_stub._entryPoint[2] == lookupInit._entryPoint[2]); |
| 773 | return lookupHolder; |
| 774 | } |
| 775 | |
| 776 | |
| 777 | DispatchHolder* DispatchHolder::FromDispatchEntry(PCODE dispatchEntry) |
| 778 | { |
| 779 | LIMITED_METHOD_CONTRACT; |
| 780 | DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchStub, _entryPoint) ); |
| 781 | _ASSERTE(dispatchHolder->stub()->_entryPoint[1] == dispatchInit._entryPoint[1]); |
| 782 | return dispatchHolder; |
| 783 | } |
| 784 | |
| 785 | |
| 786 | ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry) |
| 787 | { |
| 788 | LIMITED_METHOD_CONTRACT; |
| 789 | ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) ); |
| 790 | _ASSERTE(resolveHolder->_stub._resolveEntryPoint[1] == resolveInit._resolveEntryPoint[1]); |
| 791 | return resolveHolder; |
| 792 | } |
| 793 | |
| 794 | void VTableCallHolder::Initialize(unsigned slot) |
| 795 | { |
| 796 | unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; |
| 797 | unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; |
| 798 | _ASSERTE(MethodTable::VTableIndir_t::isRelative == false /* TODO: NYI */); |
| 799 | |
| 800 | VTableCallStub* pStub = stub(); |
| 801 | BYTE* p = (BYTE*)pStub->entryPoint(); |
| 802 | |
| 803 | #ifdef UNIX_AMD64_ABI |
| 804 | // mov rax,[rdi] : rax = MethodTable pointer |
| 805 | *(UINT32 *)p = 0x078b48; p += 3; |
| 806 | #else |
| 807 | // mov rax,[rcx] : rax = MethodTable pointer |
| 808 | *(UINT32 *)p = 0x018b48; p += 3; |
| 809 | #endif |
| 810 | |
| 811 | // mov rax,[rax+vtable offset] : rax = vtable pointer |
| 812 | if (offsetOfIndirection >= 0x80) |
| 813 | { |
| 814 | *(UINT32*)p = 0x00808b48; p += 3; |
| 815 | *(UINT32*)p = offsetOfIndirection; p += 4; |
| 816 | } |
| 817 | else |
| 818 | { |
| 819 | *(UINT32*)p = 0x00408b48; p += 3; |
| 820 | *p++ = (BYTE)offsetOfIndirection; |
| 821 | } |
| 822 | |
| 823 | // jmp qword ptr [rax+slot] |
| 824 | if (offsetAfterIndirection >= 0x80) |
| 825 | { |
| 826 | *(UINT32*)p = 0xa0ff; p += 2; |
| 827 | *(UINT32*)p = offsetAfterIndirection; p += 4; |
| 828 | } |
| 829 | else |
| 830 | { |
| 831 | *(UINT16*)p = 0x60ff; p += 2; |
| 832 | *p++ = (BYTE)offsetAfterIndirection; |
| 833 | } |
| 834 | |
| 835 | // Store the slot value here for convenience. Not a real instruction (unreachable anyways) |
| 836 | *(UINT32*)p = slot; p += 4; |
| 837 | |
| 838 | _ASSERT(p == (BYTE*)stub()->entryPoint() + VTableCallHolder::GetHolderSize(slot)); |
| 839 | _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot)); |
| 840 | } |
| 841 | |
| 842 | VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress) |
| 843 | { |
| 844 | #ifdef DACCESS_COMPILE |
| 845 | return SK_BREAKPOINT; // Dac always uses the slower lookup |
| 846 | #else |
| 847 | StubKind stubKind = SK_UNKNOWN; |
| 848 | |
| 849 | EX_TRY |
| 850 | { |
| 851 | // If stubStartAddress is completely bogus, then this might AV, |
| 852 | // so we protect it with SEH. An AV here is OK. |
| 853 | AVInRuntimeImplOkayHolder AVOkay; |
| 854 | |
| 855 | WORD firstWord = *((WORD*) stubStartAddress); |
| 856 | |
| 857 | if (firstWord == 0xB848) |
| 858 | { |
| 859 | stubKind = SK_DISPATCH; |
| 860 | } |
| 861 | else if (firstWord == 0x4890) |
| 862 | { |
| 863 | stubKind = SK_LOOKUP; |
| 864 | } |
| 865 | else if (firstWord == 0x4952) |
| 866 | { |
| 867 | stubKind = SK_RESOLVE; |
| 868 | } |
| 869 | else if (firstWord == 0x48F8) |
| 870 | { |
| 871 | stubKind = SK_LOOKUP; |
| 872 | } |
| 873 | else if (firstWord == 0x8B48) |
| 874 | { |
| 875 | stubKind = SK_VTABLECALL; |
| 876 | } |
| 877 | else |
| 878 | { |
| 879 | BYTE firstByte = ((BYTE*) stubStartAddress)[0]; |
| 880 | BYTE secondByte = ((BYTE*) stubStartAddress)[1]; |
| 881 | |
| 882 | if ((firstByte == INSTR_INT3) || (secondByte == INSTR_INT3)) |
| 883 | { |
| 884 | stubKind = SK_BREAKPOINT; |
| 885 | } |
| 886 | } |
| 887 | } |
| 888 | EX_CATCH |
| 889 | { |
| 890 | stubKind = SK_UNKNOWN; |
| 891 | } |
| 892 | EX_END_CATCH(SwallowAllExceptions); |
| 893 | |
| 894 | return stubKind; |
| 895 | |
| 896 | #endif // DACCESS_COMPILE |
| 897 | } |
| 898 | |
| 899 | #endif //DECLARE_DATA |
| 900 | |
| 901 | #endif // _VIRTUAL_CALL_STUB_AMD64_H |
| 902 | |