1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4//
5// File: AMD64/VirtualCallStubCpu.hpp
6//
7
8
9
10//
11
12// See code:VirtualCallStubManager for details
13//
14// ============================================================================
15
16#ifndef _VIRTUAL_CALL_STUB_AMD64_H
17#define _VIRTUAL_CALL_STUB_AMD64_H
18
19#include "dbginterface.h"
20
21//#define STUB_LOGGING
22
23#pragma pack(push, 1)
24// since we are placing code, we want byte packing of the structs
25
26#define USES_LOOKUP_STUBS 1
27
28/*********************************************************************************************
29Stubs that contain code are all part of larger structs called Holders. There is a
30Holder for each kind of stub, i.e XXXStub is contained with XXXHolder. Holders are
31essentially an implementation trick that allowed rearranging the code sequences more
32easily while trying out different alternatives, and for dealing with any alignment
33issues in a way that was mostly immune to the actually code sequences. These Holders
34should be revisited when the stub code sequences are fixed, since in many cases they
35add extra space to a stub that is not really needed.
36
37Stubs are placed in cache and hash tables. Since unaligned access of data in memory
38is very slow, the keys used in those tables should be aligned. The things used as keys
39typically also occur in the generated code, e.g. a token as an immediate part of an instruction.
40For now, to avoid alignment computations as different code strategies are tried out, the key
41fields are all in the Holders. Eventually, many of these fields should be dropped, and the instruction
42streams aligned so that the immediate fields fall on aligned boundaries.
43*/
44
45#if USES_LOOKUP_STUBS
46
47struct LookupStub;
48struct LookupHolder;
49
50/*LookupStub**************************************************************************************
51Virtual and interface call sites are initially setup to point at LookupStubs.
52This is because the runtime type of the <this> pointer is not yet known,
53so the target cannot be resolved. Note: if the jit is able to determine the runtime type
54of the <this> pointer, it should be generating a direct call not a virtual or interface call.
55This stub pushes a lookup token onto the stack to identify the sought after method, and then
56jumps into the EE (VirtualCallStubManager::ResolveWorkerStub) to effectuate the lookup and
57transfer of control to the appropriate target method implementation, perhaps patching of the call site
58along the way to point to a more appropriate stub. Hence callsites that point to LookupStubs
59get quickly changed to point to another kind of stub.
60*/
61struct LookupStub
62{
63 inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
64
65 inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
66 inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
67
68private:
69 friend struct LookupHolder;
70
71 // The lookup entry point starts with a nop in order to allow us to quickly see
72 // if the stub is lookup stub or a dispatch stub. We can read thye first byte
73 // of a stub to find out what kind of a stub we have.
74
75 BYTE _entryPoint [3]; // 90 nop
76 // 48 B8 mov rax,
77 size_t _token; // xx xx xx xx xx xx xx xx 64-bit address
78 BYTE part2 [3]; // 50 push rax
79 // 48 B8 mov rax,
80 size_t _resolveWorkerAddr; // xx xx xx xx xx xx xx xx 64-bit address
81 BYTE part3 [2]; // FF E0 jmp rax
82};
83
84/* LookupHolders are the containers for LookupStubs, they provide for any alignment of
85stubs as necessary. In the case of LookupStubs, alignment is necessary since
86LookupStubs are placed in a hash table keyed by token. */
87struct LookupHolder
88{
89 static void InitializeStatic();
90
91 void Initialize(PCODE resolveWorkerTarget, size_t dispatchToken);
92
93 LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
94
95 static LookupHolder* FromLookupEntry(PCODE lookupEntry);
96
97private:
98 friend struct LookupStub;
99
100 LookupStub _stub;
101};
102
103#endif // USES_LOOKUP_STUBS
104
105struct DispatchStub;
106struct DispatchStubShort;
107struct DispatchStubLong;
108struct DispatchHolder;
109
110/*DispatchStub**************************************************************************************
111The structure of a full dispatch stub in memory is a DispatchStub followed contiguously in memory
112by either a DispatchStubShort of a DispatchStubLong. DispatchStubShort is used when the resolve
113stub (failTarget()) is reachable by a rel32 (DISPL) jump. We make a pretty good effort to make sure
114that the stub heaps are set up so that this is the case. If we allocate enough stubs that the heap
115end up allocating in a new block that is further away than a DISPL jump can go, then we end up using
116a DispatchStubLong which is bigger but is a full 64-bit jump. */
117
118/*DispatchStubShort*********************************************************************************
119This is the logical continuation of DispatchStub for the case when the failure target is within
120a rel32 jump (DISPL). */
121struct DispatchStubShort
122{
123 friend struct DispatchHolder;
124 friend struct DispatchStub;
125
126 static BOOL isShortStub(LPCBYTE pCode);
127 inline PCODE implTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _implTarget; }
128 inline PCODE failTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) &_failDispl + sizeof(DISPL) + _failDispl; }
129
130private:
131 BYTE part1 [2]; // 0f 85 jne
132 DISPL _failDispl; // xx xx xx xx failEntry ;must be forward jmp for perf reasons
133 BYTE part2 [2]; // 48 B8 mov rax,
134 size_t _implTarget; // xx xx xx xx xx xx xx xx 64-bit address
135 BYTE part3 [2]; // FF E0 jmp rax
136
137 // 31 bytes long, need 1 byte of padding to 8-byte align.
138 BYTE alignPad [1]; // cc
139};
140
141inline BOOL DispatchStubShort::isShortStub(LPCBYTE pCode)
142{
143 LIMITED_METHOD_CONTRACT;
144 return reinterpret_cast<DispatchStubShort const *>(pCode)->part1[0] == 0x0f;
145}
146
147
148/*DispatchStubLong**********************************************************************************
149This is the logical continuation of DispatchStub for the case when the failure target is not
150reachable by a rel32 jump (DISPL). */
151struct DispatchStubLong
152{
153 friend struct DispatchHolder;
154 friend struct DispatchStub;
155
156 static inline BOOL isLongStub(LPCBYTE pCode);
157 inline PCODE implTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _implTarget; }
158 inline PCODE failTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _failTarget; }
159
160private:
161 BYTE part1 [1]; // 75 jne
162 BYTE _failDispl; // xx failLabel
163 BYTE part2 [2]; // 48 B8 mov rax,
164 size_t _implTarget; // xx xx xx xx xx xx xx xx 64-bit address
165 BYTE part3 [2]; // FF E0 jmp rax
166 // failLabel:
167 BYTE part4 [2]; // 48 B8 mov rax,
168 size_t _failTarget; // xx xx xx xx xx xx xx xx 64-bit address
169 BYTE part5 [2]; // FF E0 jmp rax
170
171 // 39 bytes long, need 1 byte of padding to 8-byte align.
172 BYTE alignPad [1]; // cc
173};
174
175inline BOOL DispatchStubLong::isLongStub(LPCBYTE pCode)
176{
177 LIMITED_METHOD_CONTRACT;
178 return reinterpret_cast<DispatchStubLong const *>(pCode)->part1[0] == 0x75;
179}
180
181/*DispatchStub**************************************************************************************
182Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs.
183A dispatch stub has an expected type (expectedMT), target address (target) and fail address (failure).
184If the calling frame does in fact have the <this> type be of the expected type, then
185control is transfered to the target address, the method implementation. If not,
186then control is transfered to the fail address, a fail stub (see below) where a polymorphic
187lookup is done to find the correct address to go to.
188
189implementation note: Order, choice of instructions, and branch directions
190should be carefully tuned since it can have an inordinate effect on performance. Particular
191attention needs to be paid to the effects on the BTB and branch prediction, both in the small
192and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
193Note that since this stub is only used for mostly monomorphic callsites (ones that are not, get patched
194to something else), therefore the conditional jump "jne failure" is mostly not taken, and hence it is important
195that the branch prediction staticly predict this, which means it must be a forward jump. The alternative
196is to reverse the order of the jumps and make sure that the resulting conditional jump "je implTarget"
197is statically predicted as taken, i.e a backward jump. The current choice was taken since it was easier
198to control the placement of the stubs than control the placement of the jitted code and the stubs. */
199struct DispatchStub
200{
201 friend struct DispatchHolder;
202
203 enum DispatchStubType
204 {
205 e_TYPE_SHORT,
206 e_TYPE_LONG,
207 };
208
209 inline DispatchStubType type() const
210 {
211 LIMITED_METHOD_CONTRACT;
212 CONSISTENCY_CHECK(DispatchStubShort::isShortStub(reinterpret_cast<LPCBYTE>(this + 1))
213 || DispatchStubLong::isLongStub(reinterpret_cast<LPCBYTE>(this + 1)));
214 return DispatchStubShort::isShortStub((BYTE *)(this + 1)) ? e_TYPE_SHORT : e_TYPE_LONG;
215 }
216
217 inline static size_t size(DispatchStubType type)
218 {
219 STATIC_CONTRACT_LEAF;
220 return sizeof(DispatchStub) +
221 ((type == e_TYPE_SHORT) ? sizeof(DispatchStubShort) : sizeof(DispatchStubLong));
222 }
223
224 inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
225 inline size_t expectedMT() const { LIMITED_METHOD_CONTRACT; return _expectedMT; }
226 inline size_t size() const { WRAPPER_NO_CONTRACT; return size(type()); }
227
228 inline PCODE implTarget() const
229 {
230 LIMITED_METHOD_CONTRACT;
231 if (type() == e_TYPE_SHORT)
232 return getShortStub()->implTarget();
233 else
234 return getLongStub()->implTarget();
235 }
236
237 inline PCODE failTarget() const
238 {
239 if (type() == e_TYPE_SHORT)
240 return getShortStub()->failTarget();
241 else
242 return getLongStub()->failTarget();
243 }
244
245private:
246 inline DispatchStubShort const *getShortStub() const
247 { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStubShort const *>(this + 1); }
248
249 inline DispatchStubLong const *getLongStub() const
250 { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStubLong const *>(this + 1); }
251
252 BYTE _entryPoint [2]; // 48 B8 mov rax,
253 size_t _expectedMT; // xx xx xx xx xx xx xx xx 64-bit address
254 BYTE part1 [3]; // 48 39 XX cmp [THIS_REG], rax
255
256 // Followed by either DispatchStubShort or DispatchStubLong, depending
257 // on whether we were able to make a rel32 or had to make an abs64 jump
258 // to the resolve stub on failure.
259
260};
261
262/* DispatchHolders are the containers for DispatchStubs, they provide for any alignment of
263stubs as necessary. DispatchStubs are placed in a hashtable and in a cache. The keys for both
264are the pair expectedMT and token. Efficiency of the of the hash table is not a big issue,
265since lookups in it are fairly rare. Efficiency of the cache is paramount since it is accessed frequently
266(see ResolveStub below). Currently we are storing both of these fields in the DispatchHolder to simplify
267alignment issues. If inlineMT in the stub itself was aligned, then it could be the expectedMT field.
268While the token field can be logically gotten by following the failure target to the failEntryPoint
269of the ResolveStub and then to the token over there, for perf reasons of cache access, it is duplicated here.
270This allows us to use DispatchStubs in the cache. The alternative is to provide some other immutable struct
271for the cache composed of the triplet (expectedMT, token, target) and some sort of reclaimation scheme when
272they are thrown out of the cache via overwrites (since concurrency will make the obvious approaches invalid).
273*/
274
275/* @workaround for ee resolution - Since the EE does not currently have a resolver function that
276does what we want, see notes in implementation of VirtualCallStubManager::Resolver, we are
277using dispatch stubs to siumulate what we want. That means that inlineTarget, which should be immutable
278is in fact written. Hence we have moved target out into the holder and aligned it so we can
279atomically update it. When we get a resolver function that does what we want, we can drop this field,
280and live with just the inlineTarget field in the stub itself, since immutability will hold.*/
281struct DispatchHolder
282{
283 static void InitializeStatic();
284
285 void Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT,
286 DispatchStub::DispatchStubType type);
287
288 static size_t GetHolderSize(DispatchStub::DispatchStubType type)
289 { STATIC_CONTRACT_WRAPPER; return DispatchStub::size(type); }
290
291 static BOOL CanShortJumpDispatchStubReachFailTarget(PCODE failTarget, LPCBYTE stubMemory)
292 {
293 STATIC_CONTRACT_WRAPPER;
294 LPCBYTE pFrom = stubMemory + sizeof(DispatchStub) + offsetof(DispatchStubShort, part2[0]);
295 size_t cbRelJump = failTarget - (PCODE)pFrom;
296 return FitsInI4(cbRelJump);
297 }
298
299 DispatchStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStub *>(this); }
300
301 static DispatchHolder* FromDispatchEntry(PCODE dispatchEntry);
302
303private:
304 // DispatchStub follows here. It is dynamically sized on allocation
305 // because it could be a DispatchStubLong or a DispatchStubShort
306};
307
308struct ResolveStub;
309struct ResolveHolder;
310
311/*ResolveStub**************************************************************************************
312Polymorphic call sites and monomorphic calls that fail end up in a ResolverStub. There is only
313one resolver stub built for any given token, even though there may be many call sites that
314use that token and many distinct <this> types that are used in the calling call frames. A resolver stub
315actually has two entry points, one for polymorphic call sites and one for dispatch stubs that fail on their
316expectedMT test. There is a third part of the resolver stub that enters the ee when a decision should
317be made about changing the callsite. Therefore, we have defined the resolver stub as three distinct pieces,
318even though they are actually allocated as a single contiguous block of memory. These pieces are:
319
320A ResolveStub has two entry points:
321
322FailEntry - where the dispatch stub goes if the expected MT test fails. This piece of the stub does
323a check to see how often we are actually failing. If failures are frequent, control transfers to the
324patch piece to cause the call site to be changed from a mostly monomorphic callsite
325(calls dispatch stub) to a polymorphic callsize (calls resolve stub). If failures are rare, control
326transfers to the resolve piece (see ResolveStub). The failEntryPoint decrements a counter
327every time it is entered. The ee at various times will add a large chunk to the counter.
328
329ResolveEntry - does a lookup via in a cache by hashing the actual type of the calling frame s
330<this> and the token identifying the (contract,method) pair desired. If found, control is transfered
331to the method implementation. If not found in the cache, the token is pushed and the ee is entered via
332the ResolveWorkerStub to do a full lookup and eventual transfer to the correct method implementation. Since
333there is a different resolve stub for every token, the token can be inlined and the token can be pre-hashed.
334The effectiveness of this approach is highly sensitive to the effectiveness of the hashing algorithm used,
335as well as its speed. It turns out it is very important to make the hash function sensitive to all
336of the bits of the method table, as method tables are laid out in memory in a very non-random way. Before
337making any changes to the code sequences here, it is very important to measure and tune them as perf
338can vary greatly, in unexpected ways, with seeming minor changes.
339
340Implementation note - Order, choice of instructions, and branch directions
341should be carefully tuned since it can have an inordinate effect on performance. Particular
342attention needs to be paid to the effects on the BTB and branch prediction, both in the small
343and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
344Note that this stub is called in highly polymorphic cases, but the cache should have been sized
345and the hash function chosen to maximize the cache hit case. Hence the cmp/jcc instructions should
346mostly be going down the cache hit route, and it is important that this be statically predicted as so.
347Hence the 3 jcc instrs need to be forward jumps. As structured, there is only one jmp/jcc that typically
348gets put in the BTB since all the others typically fall straight thru. Minimizing potential BTB entries
349is important. */
350
351struct ResolveStub
352{
353 inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[0]; }
354 inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[0]; }
355 inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[0]; }
356
357 inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; }
358 inline UINT32 hashedToken() { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE; }
359 inline size_t cacheAddress() { LIMITED_METHOD_CONTRACT; return _cacheAddress; }
360 inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
361 inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
362
363private:
364 friend struct ResolveHolder;
365
366 BYTE _resolveEntryPoint[3];// resolveStub:
367 // 52 push rdx
368 // 49 BA mov r10,
369 size_t _cacheAddress; // xx xx xx xx xx xx xx xx 64-bit address
370 BYTE part1 [15]; // 48 8B XX mov rax, [THIS_REG] ; Compute hash = ((MT + MT>>12) ^ prehash)
371 // 48 8B D0 mov rdx, rax ; rdx <- current MethodTable
372 // 48 C1 E8 0C shr rax, 12
373 // 48 03 C2 add rax, rdx
374 // 48 35 xor rax,
375 UINT32 _hashedToken; // xx xx xx xx hashedtoken ; xor with pre-hashed token
376 BYTE part2 [2]; // 48 25 and rax,
377 UINT32 mask; // xx xx xx xx cache_mask ; and with cache mask
378 BYTE part3 [6]; // 4A 8B 04 10 mov rax, [r10 + rax] ; get cache entry address
379 // 49 BA mov r10,
380 size_t _token; // xx xx xx xx xx xx xx xx 64-bit address
381 BYTE part4 [3]; // 48 3B 50 cmp rdx, [rax+ ; compare our MT vs. cache MT
382 BYTE mtOffset; // xx ResolverCacheElem.pMT]
383 BYTE part5 [1]; // 75 jne
384 BYTE toMiss1; // xx miss ; must be forward jump, for perf reasons
385 BYTE part6 [3]; // 4C 3B 50 cmp r10, [rax+ ; compare our token vs. cache token
386 BYTE tokenOffset; // xx ResolverCacheElem.token]
387 BYTE part7 [1]; // 75 jne
388 BYTE toMiss2; // xx miss ; must be forward jump, for perf reasons
389 BYTE part8 [3]; // 48 8B 40 mov rax, [rax+ ; setup rax with method impl address
390 BYTE targetOffset; // xx ResolverCacheElem.target]
391 BYTE part9 [3]; // 5A pop rdx
392 // FF E0 jmp rax
393 // failStub:
394 BYTE _failEntryPoint [2]; // 48 B8 mov rax,
395 INT32* _pCounter; // xx xx xx xx xx xx xx xx 64-bit address
396 BYTE part11 [4]; // 83 00 FF add dword ptr [rax], -1
397 // 7d jnl
398 BYTE toResolveStub1; // xx resolveStub
399 BYTE part12 [4]; // 49 83 CB 01 or r11, 1
400 BYTE _slowEntryPoint [3]; // 52 slow: push rdx
401 // 49 BA mov r10,
402 size_t _tokenSlow; // xx xx xx xx xx xx xx xx 64-bit address
403// BYTE miss [5]; // 5A miss: pop rdx ; don't pop rdx
404// // 41 52 push r10 ; don't push r10 leave it setup with token
405 BYTE miss [3]; // 50 push rax ; push ptr to cache elem
406 // 48 B8 mov rax,
407 size_t _resolveWorker; // xx xx xx xx xx xx xx xx 64-bit address
408 BYTE part10 [2]; // FF E0 jmp rax
409};
410
411/* ResolveHolders are the containers for ResolveStubs, They provide
412for any alignment of the stubs as necessary. The stubs are placed in a hash table keyed by
413the token for which they are built. Efficiency of access requires that this token be aligned.
414For now, we have copied that field into the ResolveHolder itself, if the resolve stub is arranged such that
415any of its inlined tokens (non-prehashed) is aligned, then the token field in the ResolveHolder
416is not needed. */
417struct ResolveHolder
418{
419 static void InitializeStatic();
420
421 void Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
422 size_t dispatchToken, UINT32 hashedToken,
423 void * cacheAddr, INT32* counterAddr);
424
425 ResolveStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
426
427 static ResolveHolder* FromFailEntry(PCODE resolveEntry);
428 static ResolveHolder* FromResolveEntry(PCODE resolveEntry);
429
430private:
431 ResolveStub _stub;
432};
433
434/*VTableCallStub**************************************************************************************
435These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed
436in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the
437vtable pointer, and finally jumps to the target method at a given slot in the vtable.
438*/
439struct VTableCallStub
440{
441 friend struct VTableCallHolder;
442
443 inline size_t size()
444 {
445 LIMITED_METHOD_CONTRACT;
446
447 BYTE* pStubCode = (BYTE *)this;
448
449 size_t cbSize = 3; // First mov instruction
450 cbSize += (pStubCode[cbSize + 2] == 0x80 ? 7 : 4); // Either 48 8B 80 or 48 8B 40: mov rax,[rax+offset]
451 cbSize += (pStubCode[cbSize + 1] == 0xa0 ? 6 : 3); // Either FF A0 or FF 60: jmp qword ptr [rax+slot]
452 cbSize += 4; // Slot value (data storage, not a real instruction)
453
454 return cbSize;
455 }
456
457 inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[0]; }
458
459 inline size_t token()
460 {
461 LIMITED_METHOD_CONTRACT;
462 DWORD slot = *(DWORD*)(reinterpret_cast<BYTE*>(this) + size() - 4);
463 return DispatchToken::CreateDispatchToken(slot).To_SIZE_T();
464 }
465
466private:
467 BYTE _entryPoint[0]; // Dynamically sized stub. See Initialize() for more details.
468};
469
470/* VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of
471stubs as necessary. */
472struct VTableCallHolder
473{
474 void Initialize(unsigned slot);
475
476 VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub *>(this); }
477
478 static size_t GetHolderSize(unsigned slot)
479 {
480 STATIC_CONTRACT_WRAPPER;
481 unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
482 unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
483 return 3 + (offsetOfIndirection >= 0x80 ? 7 : 4) + (offsetAfterIndirection >= 0x80 ? 6 : 3) + 4;
484 }
485
486 static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry) { LIMITED_METHOD_CONTRACT; return (VTableCallHolder*)entry; }
487
488private:
489 // VTableCallStub follows here. It is dynamically sized on allocation because it could
490 // use short/long instruction sizes for mov/jmp, depending on the slot value.
491};
492#pragma pack(pop)
493
494#ifdef DECLARE_DATA
495
496LookupStub lookupInit;
497DispatchStub dispatchInit;
498DispatchStubShort dispatchShortInit;
499DispatchStubLong dispatchLongInit;
500ResolveStub resolveInit;
501
502#define INSTR_INT3 0xcc
503#define INSTR_NOP 0x90
504
505#ifndef DACCESS_COMPILE
506
507#include "asmconstants.h"
508
509#ifdef STUB_LOGGING
510extern size_t g_lookup_inline_counter;
511extern size_t g_call_inline_counter;
512extern size_t g_miss_inline_counter;
513extern size_t g_call_cache_counter;
514extern size_t g_miss_cache_counter;
515#endif
516
517/* Template used to generate the stub. We generate a stub by allocating a block of
518 memory and copy the template over it and just update the specific fields that need
519 to be changed.
520*/
521
522void LookupHolder::InitializeStatic()
523{
524 static_assert_no_msg((sizeof(LookupHolder) % sizeof(void*)) == 0);
525
526 // The first instruction of a LookupStub is nop
527 // and we use it in order to differentiate the first two bytes
528 // of a LookupStub and a ResolveStub
529 lookupInit._entryPoint [0] = INSTR_NOP;
530 lookupInit._entryPoint [1] = 0x48;
531 lookupInit._entryPoint [2] = 0xB8;
532 lookupInit._token = 0xcccccccccccccccc;
533 lookupInit.part2 [0] = 0x50;
534 lookupInit.part2 [1] = 0x48;
535 lookupInit.part2 [2] = 0xB8;
536 lookupInit._resolveWorkerAddr = 0xcccccccccccccccc;
537 lookupInit.part3 [0] = 0xFF;
538 lookupInit.part3 [1] = 0xE0;
539}
540
541void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken)
542{
543 _stub = lookupInit;
544
545 //fill in the stub specific fields
546 _stub._token = dispatchToken;
547 _stub._resolveWorkerAddr = (size_t) resolveWorkerTarget;
548}
549
550/* Template used to generate the stub. We generate a stub by allocating a block of
551 memory and copy the template over it and just update the specific fields that need
552 to be changed.
553*/
554
555void DispatchHolder::InitializeStatic()
556{
557 // Check that _expectedMT is aligned in the DispatchHolder
558 static_assert_no_msg(((sizeof(DispatchStub)+sizeof(DispatchStubShort)) % sizeof(void*)) == 0);
559 static_assert_no_msg(((sizeof(DispatchStub)+sizeof(DispatchStubLong)) % sizeof(void*)) == 0);
560 CONSISTENCY_CHECK((offsetof(DispatchStubLong, part4[0]) - offsetof(DispatchStubLong, part2[0])) < INT8_MAX);
561
562 // Common dispatch stub initialization
563 dispatchInit._entryPoint [0] = 0x48;
564 dispatchInit._entryPoint [1] = 0xB8;
565 dispatchInit._expectedMT = 0xcccccccccccccccc;
566 dispatchInit.part1 [0] = 0x48;
567 dispatchInit.part1 [1] = 0x39;
568#ifdef UNIX_AMD64_ABI
569 dispatchInit.part1 [2] = 0x07; // RDI
570#else
571 dispatchInit.part1 [2] = 0x01; // RCX
572#endif
573
574 // Short dispatch stub initialization
575 dispatchShortInit.part1 [0] = 0x0F;
576 dispatchShortInit.part1 [1] = 0x85;
577 dispatchShortInit._failDispl = 0xcccccccc;
578 dispatchShortInit.part2 [0] = 0x48;
579 dispatchShortInit.part2 [1] = 0xb8;
580 dispatchShortInit._implTarget = 0xcccccccccccccccc;
581 dispatchShortInit.part3 [0] = 0xFF;
582 dispatchShortInit.part3 [1] = 0xE0;
583 dispatchShortInit.alignPad [0] = INSTR_INT3;
584
585 // Long dispatch stub initialization
586 dispatchLongInit.part1 [0] = 0x75;
587 dispatchLongInit._failDispl = BYTE(&dispatchLongInit.part4[0] - &dispatchLongInit.part2[0]);
588 dispatchLongInit.part2 [0] = 0x48;
589 dispatchLongInit.part2 [1] = 0xb8;
590 dispatchLongInit._implTarget = 0xcccccccccccccccc;
591 dispatchLongInit.part3 [0] = 0xFF;
592 dispatchLongInit.part3 [1] = 0xE0;
593 // failLabel:
594 dispatchLongInit.part4 [0] = 0x48;
595 dispatchLongInit.part4 [1] = 0xb8;
596 dispatchLongInit._failTarget = 0xcccccccccccccccc;
597 dispatchLongInit.part5 [0] = 0xFF;
598 dispatchLongInit.part5 [1] = 0xE0;
599 dispatchLongInit.alignPad [0] = INSTR_INT3;
600};
601
602void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT,
603 DispatchStub::DispatchStubType type)
604{
605 //
606 // Initialize the common area
607 //
608
609 // initialize the static data
610 *stub() = dispatchInit;
611
612 // fill in the dynamic data
613 stub()->_expectedMT = expectedMT;
614
615 //
616 // Initialize the short/long areas
617 //
618 if (type == DispatchStub::e_TYPE_SHORT)
619 {
620 DispatchStubShort *shortStub = const_cast<DispatchStubShort *>(stub()->getShortStub());
621
622 // initialize the static data
623 *shortStub = dispatchShortInit;
624
625 // fill in the dynamic data
626 size_t displ = (failTarget - ((PCODE) &shortStub->_failDispl + sizeof(DISPL)));
627 CONSISTENCY_CHECK(FitsInI4(displ));
628 shortStub->_failDispl = (DISPL) displ;
629 shortStub->_implTarget = (size_t) implTarget;
630 CONSISTENCY_CHECK((PCODE)&shortStub->_failDispl + sizeof(DISPL) + shortStub->_failDispl == failTarget);
631 }
632 else
633 {
634 CONSISTENCY_CHECK(type == DispatchStub::e_TYPE_LONG);
635 DispatchStubLong *longStub = const_cast<DispatchStubLong *>(stub()->getLongStub());
636
637 // initialize the static data
638 *longStub = dispatchLongInit;
639
640 // fill in the dynamic data
641 longStub->_implTarget = implTarget;
642 longStub->_failTarget = failTarget;
643 }
644}
645
646/* Template used to generate the stub. We generate a stub by allocating a block of
647 memory and copy the template over it and just update the specific fields that need
648 to be changed.
649*/
650
651void ResolveHolder::InitializeStatic()
652{
653 static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == 0);
654
655 resolveInit._resolveEntryPoint [0] = 0x52;
656 resolveInit._resolveEntryPoint [1] = 0x49;
657 resolveInit._resolveEntryPoint [2] = 0xBA;
658 resolveInit._cacheAddress = 0xcccccccccccccccc;
659 resolveInit.part1 [ 0] = 0x48;
660 resolveInit.part1 [ 1] = 0x8B;
661#ifdef UNIX_AMD64_ABI
662 resolveInit.part1 [ 2] = 0x07; // RDI
663#else
664 resolveInit.part1 [ 2] = 0x01; // RCX
665#endif
666 resolveInit.part1 [ 3] = 0x48;
667 resolveInit.part1 [ 4] = 0x8B;
668 resolveInit.part1 [ 5] = 0xD0;
669 resolveInit.part1 [ 6] = 0x48;
670 resolveInit.part1 [ 7] = 0xC1;
671 resolveInit.part1 [ 8] = 0xE8;
672 resolveInit.part1 [ 9] = CALL_STUB_CACHE_NUM_BITS;
673 resolveInit.part1 [10] = 0x48;
674 resolveInit.part1 [11] = 0x03;
675 resolveInit.part1 [12] = 0xC2;
676 resolveInit.part1 [13] = 0x48;
677 resolveInit.part1 [14] = 0x35;
678// Review truncation from unsigned __int64 to UINT32 of a constant value.
679#if defined(_MSC_VER)
680#pragma warning(push)
681#pragma warning(disable:4305 4309)
682#endif // defined(_MSC_VER)
683
684 resolveInit._hashedToken = 0xcccccccc;
685
686#if defined(_MSC_VER)
687#pragma warning(pop)
688#endif // defined(_MSC_VER)
689
690 resolveInit.part2 [ 0] = 0x48;
691 resolveInit.part2 [ 1] = 0x25;
692 resolveInit.mask = CALL_STUB_CACHE_MASK*sizeof(void *);
693 resolveInit.part3 [0] = 0x4A;
694 resolveInit.part3 [1] = 0x8B;
695 resolveInit.part3 [2] = 0x04;
696 resolveInit.part3 [3] = 0x10;
697 resolveInit.part3 [4] = 0x49;
698 resolveInit.part3 [5] = 0xBA;
699 resolveInit._token = 0xcccccccccccccccc;
700 resolveInit.part4 [0] = 0x48;
701 resolveInit.part4 [1] = 0x3B;
702 resolveInit.part4 [2] = 0x50;
703 resolveInit.mtOffset = offsetof(ResolveCacheElem,pMT) & 0xFF;
704 resolveInit.part5 [0] = 0x75;
705 resolveInit.toMiss1 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss1)+1) & 0xFF;
706 resolveInit.part6 [0] = 0x4C;
707 resolveInit.part6 [1] = 0x3B;
708 resolveInit.part6 [2] = 0x50;
709 resolveInit.tokenOffset = offsetof(ResolveCacheElem,token) & 0xFF;
710 resolveInit.part7 [0] = 0x75;
711 resolveInit.toMiss2 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss2)+1) & 0xFF;
712 resolveInit.part8 [0] = 0x48;
713 resolveInit.part8 [1] = 0x8B;
714 resolveInit.part8 [2] = 0x40;
715 resolveInit.targetOffset = offsetof(ResolveCacheElem,target) & 0xFF;
716 resolveInit.part9 [0] = 0x5A;
717 resolveInit.part9 [1] = 0xFF;
718 resolveInit.part9 [2] = 0xE0;
719 resolveInit._failEntryPoint [0] = 0x48;
720 resolveInit._failEntryPoint [1] = 0xB8;
721 resolveInit._pCounter = (INT32*) (size_t) 0xcccccccccccccccc;
722 resolveInit.part11 [0] = 0x83;
723 resolveInit.part11 [1] = 0x00;
724 resolveInit.part11 [2] = 0xFF;
725 resolveInit.part11 [3] = 0x7D;
726 resolveInit.toResolveStub1 = (offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, toResolveStub1)+1)) & 0xFF;
727 resolveInit.part12 [0] = 0x49;
728 resolveInit.part12 [1] = 0x83;
729 resolveInit.part12 [2] = 0xCB;
730 resolveInit.part12 [3] = 0x01;
731 resolveInit._slowEntryPoint [0] = 0x52;
732 resolveInit._slowEntryPoint [1] = 0x49;
733 resolveInit._slowEntryPoint [2] = 0xBA;
734 resolveInit._tokenSlow = 0xcccccccccccccccc;
735 resolveInit.miss [0] = 0x50;
736 resolveInit.miss [1] = 0x48;
737 resolveInit.miss [2] = 0xB8;
738 resolveInit._resolveWorker = 0xcccccccccccccccc;
739 resolveInit.part10 [0] = 0xFF;
740 resolveInit.part10 [1] = 0xE0;
741};
742
743void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
744 size_t dispatchToken, UINT32 hashedToken,
745 void * cacheAddr, INT32* counterAddr)
746{
747 _stub = resolveInit;
748
749 //fill in the stub specific fields
750 _stub._cacheAddress = (size_t) cacheAddr;
751 _stub._hashedToken = hashedToken << LOG2_PTRSIZE;
752 _stub._token = dispatchToken;
753 _stub._tokenSlow = dispatchToken;
754 _stub._resolveWorker = (size_t) resolveWorkerTarget;
755 _stub._pCounter = counterAddr;
756}
757
758ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry)
759{
760 LIMITED_METHOD_CONTRACT;
761 ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) );
762 _ASSERTE(resolveHolder->_stub._resolveEntryPoint[1] == resolveInit._resolveEntryPoint[1]);
763 return resolveHolder;
764}
765
766#endif // DACCESS_COMPILE
767
768LookupHolder* LookupHolder::FromLookupEntry(PCODE lookupEntry)
769{
770 LIMITED_METHOD_CONTRACT;
771 LookupHolder* lookupHolder = (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint) );
772 _ASSERTE(lookupHolder->_stub._entryPoint[2] == lookupInit._entryPoint[2]);
773 return lookupHolder;
774}
775
776
777DispatchHolder* DispatchHolder::FromDispatchEntry(PCODE dispatchEntry)
778{
779 LIMITED_METHOD_CONTRACT;
780 DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchStub, _entryPoint) );
781 _ASSERTE(dispatchHolder->stub()->_entryPoint[1] == dispatchInit._entryPoint[1]);
782 return dispatchHolder;
783}
784
785
786ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
787{
788 LIMITED_METHOD_CONTRACT;
789 ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) );
790 _ASSERTE(resolveHolder->_stub._resolveEntryPoint[1] == resolveInit._resolveEntryPoint[1]);
791 return resolveHolder;
792}
793
794void VTableCallHolder::Initialize(unsigned slot)
795{
796 unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
797 unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
798 _ASSERTE(MethodTable::VTableIndir_t::isRelative == false /* TODO: NYI */);
799
800 VTableCallStub* pStub = stub();
801 BYTE* p = (BYTE*)pStub->entryPoint();
802
803#ifdef UNIX_AMD64_ABI
804 // mov rax,[rdi] : rax = MethodTable pointer
805 *(UINT32 *)p = 0x078b48; p += 3;
806#else
807 // mov rax,[rcx] : rax = MethodTable pointer
808 *(UINT32 *)p = 0x018b48; p += 3;
809#endif
810
811 // mov rax,[rax+vtable offset] : rax = vtable pointer
812 if (offsetOfIndirection >= 0x80)
813 {
814 *(UINT32*)p = 0x00808b48; p += 3;
815 *(UINT32*)p = offsetOfIndirection; p += 4;
816 }
817 else
818 {
819 *(UINT32*)p = 0x00408b48; p += 3;
820 *p++ = (BYTE)offsetOfIndirection;
821 }
822
823 // jmp qword ptr [rax+slot]
824 if (offsetAfterIndirection >= 0x80)
825 {
826 *(UINT32*)p = 0xa0ff; p += 2;
827 *(UINT32*)p = offsetAfterIndirection; p += 4;
828 }
829 else
830 {
831 *(UINT16*)p = 0x60ff; p += 2;
832 *p++ = (BYTE)offsetAfterIndirection;
833 }
834
835 // Store the slot value here for convenience. Not a real instruction (unreachable anyways)
836 *(UINT32*)p = slot; p += 4;
837
838 _ASSERT(p == (BYTE*)stub()->entryPoint() + VTableCallHolder::GetHolderSize(slot));
839 _ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot));
840}
841
842VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress)
843{
844#ifdef DACCESS_COMPILE
845 return SK_BREAKPOINT; // Dac always uses the slower lookup
846#else
847 StubKind stubKind = SK_UNKNOWN;
848
849 EX_TRY
850 {
851 // If stubStartAddress is completely bogus, then this might AV,
852 // so we protect it with SEH. An AV here is OK.
853 AVInRuntimeImplOkayHolder AVOkay;
854
855 WORD firstWord = *((WORD*) stubStartAddress);
856
857 if (firstWord == 0xB848)
858 {
859 stubKind = SK_DISPATCH;
860 }
861 else if (firstWord == 0x4890)
862 {
863 stubKind = SK_LOOKUP;
864 }
865 else if (firstWord == 0x4952)
866 {
867 stubKind = SK_RESOLVE;
868 }
869 else if (firstWord == 0x48F8)
870 {
871 stubKind = SK_LOOKUP;
872 }
873 else if (firstWord == 0x8B48)
874 {
875 stubKind = SK_VTABLECALL;
876 }
877 else
878 {
879 BYTE firstByte = ((BYTE*) stubStartAddress)[0];
880 BYTE secondByte = ((BYTE*) stubStartAddress)[1];
881
882 if ((firstByte == INSTR_INT3) || (secondByte == INSTR_INT3))
883 {
884 stubKind = SK_BREAKPOINT;
885 }
886 }
887 }
888 EX_CATCH
889 {
890 stubKind = SK_UNKNOWN;
891 }
892 EX_END_CATCH(SwallowAllExceptions);
893
894 return stubKind;
895
896#endif // DACCESS_COMPILE
897}
898
899#endif //DECLARE_DATA
900
901#endif // _VIRTUAL_CALL_STUB_AMD64_H
902