virtualcallstubcpu.hpp source code [CoreCLR/vm/amd64/virtualcallstubcpu.hpp]

1	// Licensed to the .NET Foundation under one or more agreements.
2	// The .NET Foundation licenses this file to you under the MIT license.
3	// See the LICENSE file in the project root for more information.
4	//
5	// File: AMD64/VirtualCallStubCpu.hpp
6	//
7
8
9
10	//
11
12	// See code:VirtualCallStubManager for details
13	//
14	// ============================================================================
15
16	#ifndef _VIRTUAL_CALL_STUB_AMD64_H
17	#define _VIRTUAL_CALL_STUB_AMD64_H
18
19	#include "dbginterface.h"
20
21	//#define STUB_LOGGING
22
23	#pragma pack(push, 1)
24	// since we are placing code, we want byte packing of the structs
25
26	#define USES_LOOKUP_STUBS 1
27
28	/*********************************************************************************************
29	Stubs that contain code are all part of larger structs called Holders. There is a
30	Holder for each kind of stub, i.e XXXStub is contained with XXXHolder. Holders are
31	essentially an implementation trick that allowed rearranging the code sequences more
32	easily while trying out different alternatives, and for dealing with any alignment
33	issues in a way that was mostly immune to the actually code sequences. These Holders
34	should be revisited when the stub code sequences are fixed, since in many cases they
35	add extra space to a stub that is not really needed.
36
37	Stubs are placed in cache and hash tables. Since unaligned access of data in memory
38	is very slow, the keys used in those tables should be aligned. The things used as keys
39	typically also occur in the generated code, e.g. a token as an immediate part of an instruction.
40	For now, to avoid alignment computations as different code strategies are tried out, the key
41	fields are all in the Holders. Eventually, many of these fields should be dropped, and the instruction
42	streams aligned so that the immediate fields fall on aligned boundaries.
43	*/
44
45	#if USES_LOOKUP_STUBS
46
47	struct LookupStub;
48	struct LookupHolder;
49
50	/LookupStub*************************************************************************************
51	Virtual and interface call sites are initially setup to point at LookupStubs.
52	This is because the runtime type of the <this> pointer is not yet known,
53	so the target cannot be resolved. Note: if the jit is able to determine the runtime type
54	of the <this> pointer, it should be generating a direct call not a virtual or interface call.
55	This stub pushes a lookup token onto the stack to identify the sought after method, and then
56	jumps into the EE (VirtualCallStubManager::ResolveWorkerStub) to effectuate the lookup and
57	transfer of control to the appropriate target method implementation, perhaps patching of the call site
58	along the way to point to a more appropriate stub. Hence callsites that point to LookupStubs
59	get quickly changed to point to another kind of stub.
60	*/
61	struct LookupStub
62	{
63	inline PCODE entryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[`0`]; }
64
65	inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
66	inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
67
68	private:
69	friend struct LookupHolder;
70
71	// The lookup entry point starts with a nop in order to allow us to quickly see
72	// if the stub is lookup stub or a dispatch stub. We can read thye first byte
73	// of a stub to find out what kind of a stub we have.
74
75	BYTE _entryPoint [`3`]; // 90 nop
76	// 48 B8 mov rax,
77	size_t _token; // xx xx xx xx xx xx xx xx 64-bit address
78	BYTE part2 [`3`]; // 50 push rax
79	// 48 B8 mov rax,
80	size_t _resolveWorkerAddr; // xx xx xx xx xx xx xx xx 64-bit address
81	BYTE part3 [`2`]; // FF E0 jmp rax
82	};
83
84	/ LookupHolders are the containers for LookupStubs, they provide for any alignment of*
85	stubs as necessary. In the case of LookupStubs, alignment is necessary since
86	LookupStubs are placed in a hash table keyed by token. /*
87	struct LookupHolder
88	{
89	static void InitializeStatic();
90
91	void Initialize(PCODE resolveWorkerTarget, size_t dispatchToken);
92
93	LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
94
95	static LookupHolder* FromLookupEntry(PCODE lookupEntry);
96
97	private:
98	friend struct LookupStub;
99
100	LookupStub _stub;
101	};
102
103	#endif // USES_LOOKUP_STUBS
104
105	struct DispatchStub;
106	struct DispatchStubShort;
107	struct DispatchStubLong;
108	struct DispatchHolder;
109
110	/DispatchStub*************************************************************************************
111	The structure of a full dispatch stub in memory is a DispatchStub followed contiguously in memory
112	by either a DispatchStubShort of a DispatchStubLong. DispatchStubShort is used when the resolve
113	stub (failTarget()) is reachable by a rel32 (DISPL) jump. We make a pretty good effort to make sure
114	that the stub heaps are set up so that this is the case. If we allocate enough stubs that the heap
115	end up allocating in a new block that is further away than a DISPL jump can go, then we end up using
116	a DispatchStubLong which is bigger but is a full 64-bit jump. /*
117
118	/DispatchStubShort********************************************************************************
119	This is the logical continuation of DispatchStub for the case when the failure target is within
120	a rel32 jump (DISPL). /*
121	struct DispatchStubShort
122	{
123	friend struct DispatchHolder;
124	friend struct DispatchStub;
125
126	static BOOL isShortStub(LPCBYTE pCode);
127	inline PCODE implTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _implTarget; }
128	inline PCODE failTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) &_failDispl + sizeof(DISPL) + _failDispl; }
129
130	private:
131	BYTE part1 [`2`]; // 0f 85 jne
132	DISPL _failDispl; // xx xx xx xx failEntry ;must be forward jmp for perf reasons
133	BYTE part2 [`2`]; // 48 B8 mov rax,
134	size_t _implTarget; // xx xx xx xx xx xx xx xx 64-bit address
135	BYTE part3 [`2`]; // FF E0 jmp rax
136
137	// 31 bytes long, need 1 byte of padding to 8-byte align.
138	BYTE alignPad [`1`]; // cc
139	};
140
141	inline BOOL DispatchStubShort::isShortStub(LPCBYTE pCode)
142	{
143	LIMITED_METHOD_CONTRACT;
144	return reinterpret_cast<DispatchStubShort const *>(pCode)->part1[`0`] == `0x0f`;
145	}
146
147
148	/DispatchStubLong*********************************************************************************
149	This is the logical continuation of DispatchStub for the case when the failure target is not
150	reachable by a rel32 jump (DISPL). /*
151	struct DispatchStubLong
152	{
153	friend struct DispatchHolder;
154	friend struct DispatchStub;
155
156	static inline BOOL isLongStub(LPCBYTE pCode);
157	inline PCODE implTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _implTarget; }
158	inline PCODE failTarget() const { LIMITED_METHOD_CONTRACT; return (PCODE) _failTarget; }
159
160	private:
161	BYTE part1 [`1`]; // 75 jne
162	BYTE _failDispl; // xx failLabel
163	BYTE part2 [`2`]; // 48 B8 mov rax,
164	size_t _implTarget; // xx xx xx xx xx xx xx xx 64-bit address
165	BYTE part3 [`2`]; // FF E0 jmp rax
166	// failLabel:
167	BYTE part4 [`2`]; // 48 B8 mov rax,
168	size_t _failTarget; // xx xx xx xx xx xx xx xx 64-bit address
169	BYTE part5 [`2`]; // FF E0 jmp rax
170
171	// 39 bytes long, need 1 byte of padding to 8-byte align.
172	BYTE alignPad [`1`]; // cc
173	};
174
175	inline BOOL DispatchStubLong::isLongStub(LPCBYTE pCode)
176	{
177	LIMITED_METHOD_CONTRACT;
178	return reinterpret_cast<DispatchStubLong const *>(pCode)->part1[`0`] == `0x75`;
179	}
180
181	/DispatchStub*************************************************************************************
182	Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs.
183	A dispatch stub has an expected type (expectedMT), target address (target) and fail address (failure).
184	If the calling frame does in fact have the <this> type be of the expected type, then
185	control is transfered to the target address, the method implementation. If not,
186	then control is transfered to the fail address, a fail stub (see below) where a polymorphic
187	lookup is done to find the correct address to go to.
188
189	implementation note: Order, choice of instructions, and branch directions
190	should be carefully tuned since it can have an inordinate effect on performance. Particular
191	attention needs to be paid to the effects on the BTB and branch prediction, both in the small
192	and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
193	Note that since this stub is only used for mostly monomorphic callsites (ones that are not, get patched
194	to something else), therefore the conditional jump "jne failure" is mostly not taken, and hence it is important
195	that the branch prediction staticly predict this, which means it must be a forward jump. The alternative
196	is to reverse the order of the jumps and make sure that the resulting conditional jump "je implTarget"
197	is statically predicted as taken, i.e a backward jump. The current choice was taken since it was easier
198	to control the placement of the stubs than control the placement of the jitted code and the stubs. /*
199	struct DispatchStub
200	{
201	friend struct DispatchHolder;
202
203	enum DispatchStubType
204	{
205	e_TYPE_SHORT,
206	e_TYPE_LONG,
207	};
208
209	inline DispatchStubType type() const
210	{
211	LIMITED_METHOD_CONTRACT;
212	CONSISTENCY_CHECK(DispatchStubShort::isShortStub(reinterpret_cast<LPCBYTE>(this + `1`))
213	\|\| DispatchStubLong::isLongStub(reinterpret_cast<LPCBYTE>(this + `1`)));
214	return DispatchStubShort::isShortStub((BYTE )(this* + `1`)) ? e_TYPE_SHORT : e_TYPE_LONG;
215	}
216
217	inline static size_t size(DispatchStubType type)
218	{
219	STATIC_CONTRACT_LEAF;
220	return sizeof(DispatchStub) +
221	((type == e_TYPE_SHORT) ? sizeof(DispatchStubShort) : sizeof(DispatchStubLong));
222	}
223
224	inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[`0`]; }
225	inline size_t expectedMT() const { LIMITED_METHOD_CONTRACT; return _expectedMT; }
226	inline size_t size() const { WRAPPER_NO_CONTRACT; return size(type()); }
227
228	inline PCODE implTarget() const
229	{
230	LIMITED_METHOD_CONTRACT;
231	if (type() == e_TYPE_SHORT)
232	return getShortStub()->implTarget();
233	else
234	return getLongStub()->implTarget();
235	}
236
237	inline PCODE failTarget() const
238	{
239	if (type() == e_TYPE_SHORT)
240	return getShortStub()->failTarget();
241	else
242	return getLongStub()->failTarget();
243	}
244
245	private:
246	inline DispatchStubShort const getShortStub() const*
247	{ LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStubShort const >(this* + `1`); }
248
249	inline DispatchStubLong const getLongStub() const*
250	{ LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStubLong const >(this* + `1`); }
251
252	BYTE _entryPoint [`2`]; // 48 B8 mov rax,
253	size_t _expectedMT; // xx xx xx xx xx xx xx xx 64-bit address
254	BYTE part1 [`3`]; // 48 39 XX cmp [THIS_REG], rax
255
256	// Followed by either DispatchStubShort or DispatchStubLong, depending
257	// on whether we were able to make a rel32 or had to make an abs64 jump
258	// to the resolve stub on failure.
259
260	};
261
262	/ DispatchHolders are the containers for DispatchStubs, they provide for any alignment of*
263	stubs as necessary. DispatchStubs are placed in a hashtable and in a cache. The keys for both
264	are the pair expectedMT and token. Efficiency of the of the hash table is not a big issue,
265	since lookups in it are fairly rare. Efficiency of the cache is paramount since it is accessed frequently
266	(see ResolveStub below). Currently we are storing both of these fields in the DispatchHolder to simplify
267	alignment issues. If inlineMT in the stub itself was aligned, then it could be the expectedMT field.
268	While the token field can be logically gotten by following the failure target to the failEntryPoint
269	of the ResolveStub and then to the token over there, for perf reasons of cache access, it is duplicated here.
270	This allows us to use DispatchStubs in the cache. The alternative is to provide some other immutable struct
271	for the cache composed of the triplet (expectedMT, token, target) and some sort of reclaimation scheme when
272	they are thrown out of the cache via overwrites (since concurrency will make the obvious approaches invalid).
273	*/
274
275	/ @workaround for ee resolution - Since the EE does not currently have a resolver function that*
276	does what we want, see notes in implementation of VirtualCallStubManager::Resolver, we are
277	using dispatch stubs to siumulate what we want. That means that inlineTarget, which should be immutable
278	is in fact written. Hence we have moved target out into the holder and aligned it so we can
279	atomically update it. When we get a resolver function that does what we want, we can drop this field,
280	and live with just the inlineTarget field in the stub itself, since immutability will hold./*
281	struct DispatchHolder
282	{
283	static void InitializeStatic();
284
285	void Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT,
286	DispatchStub::DispatchStubType type);
287
288	static size_t GetHolderSize(DispatchStub::DispatchStubType type)
289	{ STATIC_CONTRACT_WRAPPER; return DispatchStub::size(type); }
290
291	static BOOL CanShortJumpDispatchStubReachFailTarget(PCODE failTarget, LPCBYTE stubMemory)
292	{
293	STATIC_CONTRACT_WRAPPER;
294	LPCBYTE pFrom = stubMemory + sizeof(DispatchStub) + offsetof(DispatchStubShort, part2[`0`]);
295	size_t cbRelJump = failTarget - (PCODE)pFrom;
296	return FitsInI4(cbRelJump);
297	}
298
299	DispatchStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<DispatchStub >(this*); }
300
301	static DispatchHolder* FromDispatchEntry(PCODE dispatchEntry);
302
303	private:
304	// DispatchStub follows here. It is dynamically sized on allocation
305	// because it could be a DispatchStubLong or a DispatchStubShort
306	};
307
308	struct ResolveStub;
309	struct ResolveHolder;
310
311	/ResolveStub*************************************************************************************
312	Polymorphic call sites and monomorphic calls that fail end up in a ResolverStub. There is only
313	one resolver stub built for any given token, even though there may be many call sites that
314	use that token and many distinct <this> types that are used in the calling call frames. A resolver stub
315	actually has two entry points, one for polymorphic call sites and one for dispatch stubs that fail on their
316	expectedMT test. There is a third part of the resolver stub that enters the ee when a decision should
317	be made about changing the callsite. Therefore, we have defined the resolver stub as three distinct pieces,
318	even though they are actually allocated as a single contiguous block of memory. These pieces are:
319
320	A ResolveStub has two entry points:
321
322	FailEntry - where the dispatch stub goes if the expected MT test fails. This piece of the stub does
323	a check to see how often we are actually failing. If failures are frequent, control transfers to the
324	patch piece to cause the call site to be changed from a mostly monomorphic callsite
325	(calls dispatch stub) to a polymorphic callsize (calls resolve stub). If failures are rare, control
326	transfers to the resolve piece (see ResolveStub). The failEntryPoint decrements a counter
327	every time it is entered. The ee at various times will add a large chunk to the counter.
328
329	ResolveEntry - does a lookup via in a cache by hashing the actual type of the calling frame s
330	<this> and the token identifying the (contract,method) pair desired. If found, control is transfered
331	to the method implementation. If not found in the cache, the token is pushed and the ee is entered via
332	the ResolveWorkerStub to do a full lookup and eventual transfer to the correct method implementation. Since
333	there is a different resolve stub for every token, the token can be inlined and the token can be pre-hashed.
334	The effectiveness of this approach is highly sensitive to the effectiveness of the hashing algorithm used,
335	as well as its speed. It turns out it is very important to make the hash function sensitive to all
336	of the bits of the method table, as method tables are laid out in memory in a very non-random way. Before
337	making any changes to the code sequences here, it is very important to measure and tune them as perf
338	can vary greatly, in unexpected ways, with seeming minor changes.
339
340	Implementation note - Order, choice of instructions, and branch directions
341	should be carefully tuned since it can have an inordinate effect on performance. Particular
342	attention needs to be paid to the effects on the BTB and branch prediction, both in the small
343	and in the large, i.e. it needs to run well in the face of BTB overflow--using static predictions.
344	Note that this stub is called in highly polymorphic cases, but the cache should have been sized
345	and the hash function chosen to maximize the cache hit case. Hence the cmp/jcc instructions should
346	mostly be going down the cache hit route, and it is important that this be statically predicted as so.
347	Hence the 3 jcc instrs need to be forward jumps. As structured, there is only one jmp/jcc that typically
348	gets put in the BTB since all the others typically fall straight thru. Minimizing potential BTB entries
349	is important. /*
350
351	struct ResolveStub
352	{
353	inline PCODE failEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_failEntryPoint[`0`]; }
354	inline PCODE resolveEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_resolveEntryPoint[`0`]; }
355	inline PCODE slowEntryPoint() { LIMITED_METHOD_CONTRACT; return (PCODE)&_slowEntryPoint[`0`]; }
356
357	inline INT32* pCounter() { LIMITED_METHOD_CONTRACT; return _pCounter; }
358	inline UINT32 hashedToken() { LIMITED_METHOD_CONTRACT; return _hashedToken >> LOG2_PTRSIZE; }
359	inline size_t cacheAddress() { LIMITED_METHOD_CONTRACT; return _cacheAddress; }
360	inline size_t token() { LIMITED_METHOD_CONTRACT; return _token; }
361	inline size_t size() { LIMITED_METHOD_CONTRACT; return sizeof(LookupStub); }
362
363	private:
364	friend struct ResolveHolder;
365
366	BYTE _resolveEntryPoint[`3`];// resolveStub:
367	// 52 push rdx
368	// 49 BA mov r10,
369	size_t _cacheAddress; // xx xx xx xx xx xx xx xx 64-bit address
370	BYTE part1 [`15`]; // 48 8B XX mov rax, [THIS_REG] ; Compute hash = ((MT + MT>>12) ^ prehash)
371	// 48 8B D0 mov rdx, rax ; rdx <- current MethodTable
372	// 48 C1 E8 0C shr rax, 12
373	// 48 03 C2 add rax, rdx
374	// 48 35 xor rax,
375	UINT32 _hashedToken; // xx xx xx xx hashedtoken ; xor with pre-hashed token
376	BYTE part2 [`2`]; // 48 25 and rax,
377	UINT32 mask; // xx xx xx xx cache_mask ; and with cache mask
378	BYTE part3 [`6`]; // 4A 8B 04 10 mov rax, [r10 + rax] ; get cache entry address
379	// 49 BA mov r10,
380	size_t _token; // xx xx xx xx xx xx xx xx 64-bit address
381	BYTE part4 [`3`]; // 48 3B 50 cmp rdx, [rax+ ; compare our MT vs. cache MT
382	BYTE mtOffset; // xx ResolverCacheElem.pMT]
383	BYTE part5 [`1`]; // 75 jne
384	BYTE toMiss1; // xx miss ; must be forward jump, for perf reasons
385	BYTE part6 [`3`]; // 4C 3B 50 cmp r10, [rax+ ; compare our token vs. cache token
386	BYTE tokenOffset; // xx ResolverCacheElem.token]
387	BYTE part7 [`1`]; // 75 jne
388	BYTE toMiss2; // xx miss ; must be forward jump, for perf reasons
389	BYTE part8 [`3`]; // 48 8B 40 mov rax, [rax+ ; setup rax with method impl address
390	BYTE targetOffset; // xx ResolverCacheElem.target]
391	BYTE part9 [`3`]; // 5A pop rdx
392	// FF E0 jmp rax
393	// failStub:
394	BYTE _failEntryPoint [`2`]; // 48 B8 mov rax,
395	INT32* _pCounter; // xx xx xx xx xx xx xx xx 64-bit address
396	BYTE part11 [`4`]; // 83 00 FF add dword ptr [rax], -1
397	// 7d jnl
398	BYTE toResolveStub1; // xx resolveStub
399	BYTE part12 [`4`]; // 49 83 CB 01 or r11, 1
400	BYTE _slowEntryPoint [`3`]; // 52 slow: push rdx
401	// 49 BA mov r10,
402	size_t _tokenSlow; // xx xx xx xx xx xx xx xx 64-bit address
403	// BYTE miss [5]; // 5A miss: pop rdx ; don't pop rdx
404	// // 41 52 push r10 ; don't push r10 leave it setup with token
405	BYTE miss [`3`]; // 50 push rax ; push ptr to cache elem
406	// 48 B8 mov rax,
407	size_t _resolveWorker; // xx xx xx xx xx xx xx xx 64-bit address
408	BYTE part10 [`2`]; // FF E0 jmp rax
409	};
410
411	/ ResolveHolders are the containers for ResolveStubs, They provide*
412	for any alignment of the stubs as necessary. The stubs are placed in a hash table keyed by
413	the token for which they are built. Efficiency of access requires that this token be aligned.
414	For now, we have copied that field into the ResolveHolder itself, if the resolve stub is arranged such that
415	any of its inlined tokens (non-prehashed) is aligned, then the token field in the ResolveHolder
416	is not needed. /*
417	struct ResolveHolder
418	{
419	static void InitializeStatic();
420
421	void Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
422	size_t dispatchToken, UINT32 hashedToken,
423	void * cacheAddr, INT32* counterAddr);
424
425	ResolveStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; }
426
427	static ResolveHolder* FromFailEntry(PCODE resolveEntry);
428	static ResolveHolder* FromResolveEntry(PCODE resolveEntry);
429
430	private:
431	ResolveStub _stub;
432	};
433
434	/VTableCallStub*************************************************************************************
435	These are jump stubs that perform a vtable-base virtual call. These stubs assume that an object is placed
436	in the first argument register (this pointer). From there, the stub extracts the MethodTable pointer, followed by the
437	vtable pointer, and finally jumps to the target method at a given slot in the vtable.
438	*/
439	struct VTableCallStub
440	{
441	friend struct VTableCallHolder;
442
443	inline size_t size()
444	{
445	LIMITED_METHOD_CONTRACT;
446
447	BYTE* pStubCode = (BYTE )this*;
448
449	size_t cbSize = `3`; // First mov instruction
450	cbSize += (pStubCode[cbSize + `2`] == `0x80` ? `7` : `4`); // Either 48 8B 80 or 48 8B 40: mov rax,[rax+offset]
451	cbSize += (pStubCode[cbSize + `1`] == `0xa0` ? `6` : `3`); // Either FF A0 or FF 60: jmp qword ptr [rax+slot]
452	cbSize += `4`; // Slot value (data storage, not a real instruction)
453
454	return cbSize;
455	}
456
457	inline PCODE entryPoint() const { LIMITED_METHOD_CONTRACT; return (PCODE)&_entryPoint[`0`]; }
458
459	inline size_t token()
460	{
461	LIMITED_METHOD_CONTRACT;
462	DWORD slot = (DWORD)(reinterpret_cast<BYTE>(this*) + size() - `4`);
463	return DispatchToken::CreateDispatchToken(slot).To_SIZE_T();
464	}
465
466	private:
467	BYTE _entryPoint[`0`]; // Dynamically sized stub. See Initialize() for more details.
468	};
469
470	/ VTableCallHolders are the containers for VTableCallStubs, they provide for any alignment of*
471	stubs as necessary. /*
472	struct VTableCallHolder
473	{
474	void Initialize(unsigned slot);
475
476	VTableCallStub* stub() { LIMITED_METHOD_CONTRACT; return reinterpret_cast<VTableCallStub >(this*); }
477
478	static size_t GetHolderSize(unsigned slot)
479	{
480	STATIC_CONTRACT_WRAPPER;
481	unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
482	unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
483	return `3` + (offsetOfIndirection >= `0x80` ? `7` : `4`) + (offsetAfterIndirection >= `0x80` ? `6` : `3`) + `4`;
484	}
485
486	static VTableCallHolder* VTableCallHolder::FromVTableCallEntry(PCODE entry) { LIMITED_METHOD_CONTRACT; return (VTableCallHolder*)entry; }
487
488	private:
489	// VTableCallStub follows here. It is dynamically sized on allocation because it could
490	// use short/long instruction sizes for mov/jmp, depending on the slot value.
491	};
492	#pragma pack(pop)
493
494	#ifdef DECLARE_DATA
495
496	LookupStub lookupInit;
497	DispatchStub dispatchInit;
498	DispatchStubShort dispatchShortInit;
499	DispatchStubLong dispatchLongInit;
500	ResolveStub resolveInit;
501
502	#define INSTR_INT3 0xcc
503	#define INSTR_NOP 0x90
504
505	#ifndef DACCESS_COMPILE
506
507	#include "asmconstants.h"
508
509	#ifdef STUB_LOGGING
510	extern size_t g_lookup_inline_counter;
511	extern size_t g_call_inline_counter;
512	extern size_t g_miss_inline_counter;
513	extern size_t g_call_cache_counter;
514	extern size_t g_miss_cache_counter;
515	#endif
516
517	/ Template used to generate the stub. We generate a stub by allocating a block of*
518	memory and copy the template over it and just update the specific fields that need
519	to be changed.
520	*/
521
522	void LookupHolder::InitializeStatic()
523	{
524	static_assert_no_msg((sizeof(LookupHolder) % sizeof(void*)) == `0`);
525
526	// The first instruction of a LookupStub is nop
527	// and we use it in order to differentiate the first two bytes
528	// of a LookupStub and a ResolveStub
529	lookupInit._entryPoint [`0`] = INSTR_NOP;
530	lookupInit._entryPoint [`1`] = `0x48`;
531	lookupInit._entryPoint [`2`] = `0xB8`;
532	lookupInit._token = `0xcccccccccccccccc`;
533	lookupInit.part2 [`0`] = `0x50`;
534	lookupInit.part2 [`1`] = `0x48`;
535	lookupInit.part2 [`2`] = `0xB8`;
536	lookupInit._resolveWorkerAddr = `0xcccccccccccccccc`;
537	lookupInit.part3 [`0`] = `0xFF`;
538	lookupInit.part3 [`1`] = `0xE0`;
539	}
540
541	void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken)
542	{
543	_stub = lookupInit;
544
545	//fill in the stub specific fields
546	_stub._token = dispatchToken;
547	_stub._resolveWorkerAddr = (size_t) resolveWorkerTarget;
548	}
549
550	/ Template used to generate the stub. We generate a stub by allocating a block of*
551	memory and copy the template over it and just update the specific fields that need
552	to be changed.
553	*/
554
555	void DispatchHolder::InitializeStatic()
556	{
557	// Check that _expectedMT is aligned in the DispatchHolder
558	static_assert_no_msg(((sizeof(DispatchStub)+sizeof(DispatchStubShort)) % sizeof(void*)) == `0`);
559	static_assert_no_msg(((sizeof(DispatchStub)+sizeof(DispatchStubLong)) % sizeof(void*)) == `0`);
560	CONSISTENCY_CHECK((offsetof(DispatchStubLong, part4[`0`]) - offsetof(DispatchStubLong, part2[`0`])) < INT8_MAX);
561
562	// Common dispatch stub initialization
563	dispatchInit._entryPoint [`0`] = `0x48`;
564	dispatchInit._entryPoint [`1`] = `0xB8`;
565	dispatchInit._expectedMT = `0xcccccccccccccccc`;
566	dispatchInit.part1 [`0`] = `0x48`;
567	dispatchInit.part1 [`1`] = `0x39`;
568	#ifdef UNIX_AMD64_ABI
569	dispatchInit.part1 [`2`] = `0x07`; // RDI
570	#else
571	dispatchInit.part1 [`2`] = `0x01`; // RCX
572	#endif
573
574	// Short dispatch stub initialization
575	dispatchShortInit.part1 [`0`] = `0x0F`;
576	dispatchShortInit.part1 [`1`] = `0x85`;
577	dispatchShortInit._failDispl = `0xcccccccc`;
578	dispatchShortInit.part2 [`0`] = `0x48`;
579	dispatchShortInit.part2 [`1`] = `0xb8`;
580	dispatchShortInit._implTarget = `0xcccccccccccccccc`;
581	dispatchShortInit.part3 [`0`] = `0xFF`;
582	dispatchShortInit.part3 [`1`] = `0xE0`;
583	dispatchShortInit.alignPad [`0`] = INSTR_INT3;
584
585	// Long dispatch stub initialization
586	dispatchLongInit.part1 [`0`] = `0x75`;
587	dispatchLongInit._failDispl = BYTE(&dispatchLongInit.part4[`0`] - &dispatchLongInit.part2[`0`]);
588	dispatchLongInit.part2 [`0`] = `0x48`;
589	dispatchLongInit.part2 [`1`] = `0xb8`;
590	dispatchLongInit._implTarget = `0xcccccccccccccccc`;
591	dispatchLongInit.part3 [`0`] = `0xFF`;
592	dispatchLongInit.part3 [`1`] = `0xE0`;
593	// failLabel:
594	dispatchLongInit.part4 [`0`] = `0x48`;
595	dispatchLongInit.part4 [`1`] = `0xb8`;
596	dispatchLongInit._failTarget = `0xcccccccccccccccc`;
597	dispatchLongInit.part5 [`0`] = `0xFF`;
598	dispatchLongInit.part5 [`1`] = `0xE0`;
599	dispatchLongInit.alignPad [`0`] = INSTR_INT3;
600	};
601
602	void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT,
603	DispatchStub::DispatchStubType type)
604	{
605	//
606	// Initialize the common area
607	//
608
609	// initialize the static data
610	*stub() = dispatchInit;
611
612	// fill in the dynamic data
613	stub()->_expectedMT = expectedMT;
614
615	//
616	// Initialize the short/long areas
617	//
618	if (type == DispatchStub::e_TYPE_SHORT)
619	{
620	DispatchStubShort shortStub = const_cast<DispatchStubShort >(stub()->getShortStub());
621
622	// initialize the static data
623	*shortStub = dispatchShortInit;
624
625	// fill in the dynamic data
626	size_t displ = (failTarget - ((PCODE) &shortStub->_failDispl + sizeof(DISPL)));
627	CONSISTENCY_CHECK(FitsInI4(displ));
628	shortStub->_failDispl = (DISPL) displ;
629	shortStub->_implTarget = (size_t) implTarget;
630	CONSISTENCY_CHECK((PCODE)&shortStub->_failDispl + sizeof(DISPL) + shortStub->_failDispl == failTarget);
631	}
632	else
633	{
634	CONSISTENCY_CHECK(type == DispatchStub::e_TYPE_LONG);
635	DispatchStubLong longStub = const_cast<DispatchStubLong >(stub()->getLongStub());
636
637	// initialize the static data
638	*longStub = dispatchLongInit;
639
640	// fill in the dynamic data
641	longStub->_implTarget = implTarget;
642	longStub->_failTarget = failTarget;
643	}
644	}
645
646	/ Template used to generate the stub. We generate a stub by allocating a block of*
647	memory and copy the template over it and just update the specific fields that need
648	to be changed.
649	*/
650
651	void ResolveHolder::InitializeStatic()
652	{
653	static_assert_no_msg((sizeof(ResolveHolder) % sizeof(void*)) == `0`);
654
655	resolveInit._resolveEntryPoint [`0`] = `0x52`;
656	resolveInit._resolveEntryPoint [`1`] = `0x49`;
657	resolveInit._resolveEntryPoint [`2`] = `0xBA`;
658	resolveInit._cacheAddress = `0xcccccccccccccccc`;
659	resolveInit.part1 [ `0`] = `0x48`;
660	resolveInit.part1 [ `1`] = `0x8B`;
661	#ifdef UNIX_AMD64_ABI
662	resolveInit.part1 [ `2`] = `0x07`; // RDI
663	#else
664	resolveInit.part1 [ `2`] = `0x01`; // RCX
665	#endif
666	resolveInit.part1 [ `3`] = `0x48`;
667	resolveInit.part1 [ `4`] = `0x8B`;
668	resolveInit.part1 [ `5`] = `0xD0`;
669	resolveInit.part1 [ `6`] = `0x48`;
670	resolveInit.part1 [ `7`] = `0xC1`;
671	resolveInit.part1 [ `8`] = `0xE8`;
672	resolveInit.part1 [ `9`] = CALL_STUB_CACHE_NUM_BITS;
673	resolveInit.part1 [`10`] = `0x48`;
674	resolveInit.part1 [`11`] = `0x03`;
675	resolveInit.part1 [`12`] = `0xC2`;
676	resolveInit.part1 [`13`] = `0x48`;
677	resolveInit.part1 [`14`] = `0x35`;
678	// Review truncation from unsigned __int64 to UINT32 of a constant value.
679	#if defined(_MSC_VER)
680	#pragma warning(push)
681	#pragma warning(disable:4305 4309)
682	#endif // defined(_MSC_VER)
683
684	resolveInit._hashedToken = `0xcccccccc`;
685
686	#if defined(_MSC_VER)
687	#pragma warning(pop)
688	#endif // defined(_MSC_VER)
689
690	resolveInit.part2 [ `0`] = `0x48`;
691	resolveInit.part2 [ `1`] = `0x25`;
692	resolveInit.mask = CALL_STUB_CACHE_MASK*sizeof(void *);
693	resolveInit.part3 [`0`] = `0x4A`;
694	resolveInit.part3 [`1`] = `0x8B`;
695	resolveInit.part3 [`2`] = `0x04`;
696	resolveInit.part3 [`3`] = `0x10`;
697	resolveInit.part3 [`4`] = `0x49`;
698	resolveInit.part3 [`5`] = `0xBA`;
699	resolveInit._token = `0xcccccccccccccccc`;
700	resolveInit.part4 [`0`] = `0x48`;
701	resolveInit.part4 [`1`] = `0x3B`;
702	resolveInit.part4 [`2`] = `0x50`;
703	resolveInit.mtOffset = offsetof(ResolveCacheElem,pMT) & `0xFF`;
704	resolveInit.part5 [`0`] = `0x75`;
705	resolveInit.toMiss1 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss1)+`1`) & `0xFF`;
706	resolveInit.part6 [`0`] = `0x4C`;
707	resolveInit.part6 [`1`] = `0x3B`;
708	resolveInit.part6 [`2`] = `0x50`;
709	resolveInit.tokenOffset = offsetof(ResolveCacheElem,token) & `0xFF`;
710	resolveInit.part7 [`0`] = `0x75`;
711	resolveInit.toMiss2 = offsetof(ResolveStub,miss)-(offsetof(ResolveStub,toMiss2)+`1`) & `0xFF`;
712	resolveInit.part8 [`0`] = `0x48`;
713	resolveInit.part8 [`1`] = `0x8B`;
714	resolveInit.part8 [`2`] = `0x40`;
715	resolveInit.targetOffset = offsetof(ResolveCacheElem,target) & `0xFF`;
716	resolveInit.part9 [`0`] = `0x5A`;
717	resolveInit.part9 [`1`] = `0xFF`;
718	resolveInit.part9 [`2`] = `0xE0`;
719	resolveInit._failEntryPoint [`0`] = `0x48`;
720	resolveInit._failEntryPoint [`1`] = `0xB8`;
721	resolveInit._pCounter = (INT32*) (size_t) `0xcccccccccccccccc`;
722	resolveInit.part11 [`0`] = `0x83`;
723	resolveInit.part11 [`1`] = `0x00`;
724	resolveInit.part11 [`2`] = `0xFF`;
725	resolveInit.part11 [`3`] = `0x7D`;
726	resolveInit.toResolveStub1 = (offsetof(ResolveStub, _resolveEntryPoint) - (offsetof(ResolveStub, toResolveStub1)+`1`)) & `0xFF`;
727	resolveInit.part12 [`0`] = `0x49`;
728	resolveInit.part12 [`1`] = `0x83`;
729	resolveInit.part12 [`2`] = `0xCB`;
730	resolveInit.part12 [`3`] = `0x01`;
731	resolveInit._slowEntryPoint [`0`] = `0x52`;
732	resolveInit._slowEntryPoint [`1`] = `0x49`;
733	resolveInit._slowEntryPoint [`2`] = `0xBA`;
734	resolveInit._tokenSlow = `0xcccccccccccccccc`;
735	resolveInit.miss [`0`] = `0x50`;
736	resolveInit.miss [`1`] = `0x48`;
737	resolveInit.miss [`2`] = `0xB8`;
738	resolveInit._resolveWorker = `0xcccccccccccccccc`;
739	resolveInit.part10 [`0`] = `0xFF`;
740	resolveInit.part10 [`1`] = `0xE0`;
741	};
742
743	void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget,
744	size_t dispatchToken, UINT32 hashedToken,
745	void * cacheAddr, INT32* counterAddr)
746	{
747	_stub = resolveInit;
748
749	//fill in the stub specific fields
750	_stub._cacheAddress = (size_t) cacheAddr;
751	_stub._hashedToken = hashedToken << LOG2_PTRSIZE;
752	_stub._token = dispatchToken;
753	_stub._tokenSlow = dispatchToken;
754	_stub._resolveWorker = (size_t) resolveWorkerTarget;
755	_stub._pCounter = counterAddr;
756	}
757
758	ResolveHolder* ResolveHolder::FromFailEntry(PCODE failEntry)
759	{
760	LIMITED_METHOD_CONTRACT;
761	ResolveHolder* resolveHolder = (ResolveHolder*) ( failEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _failEntryPoint) );
762	_ASSERTE(resolveHolder->_stub._resolveEntryPoint[`1`] == resolveInit._resolveEntryPoint[`1`]);
763	return resolveHolder;
764	}
765
766	#endif // DACCESS_COMPILE
767
768	LookupHolder* LookupHolder::FromLookupEntry(PCODE lookupEntry)
769	{
770	LIMITED_METHOD_CONTRACT;
771	LookupHolder* lookupHolder = (LookupHolder*) ( lookupEntry - offsetof(LookupHolder, _stub) - offsetof(LookupStub, _entryPoint) );
772	_ASSERTE(lookupHolder->_stub._entryPoint[`2`] == lookupInit._entryPoint[`2`]);
773	return lookupHolder;
774	}
775
776
777	DispatchHolder* DispatchHolder::FromDispatchEntry(PCODE dispatchEntry)
778	{
779	LIMITED_METHOD_CONTRACT;
780	DispatchHolder* dispatchHolder = (DispatchHolder*) ( dispatchEntry - offsetof(DispatchStub, _entryPoint) );
781	_ASSERTE(dispatchHolder->stub()->_entryPoint[`1`] == dispatchInit._entryPoint[`1`]);
782	return dispatchHolder;
783	}
784
785
786	ResolveHolder* ResolveHolder::FromResolveEntry(PCODE resolveEntry)
787	{
788	LIMITED_METHOD_CONTRACT;
789	ResolveHolder* resolveHolder = (ResolveHolder*) ( resolveEntry - offsetof(ResolveHolder, _stub) - offsetof(ResolveStub, _resolveEntryPoint) );
790	_ASSERTE(resolveHolder->_stub._resolveEntryPoint[`1`] == resolveInit._resolveEntryPoint[`1`]);
791	return resolveHolder;
792	}
793
794	void VTableCallHolder::Initialize(unsigned slot)
795	{
796	unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
797	unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
798	_ASSERTE(MethodTable::VTableIndir_t::isRelative == false / TODO: NYI /);
799
800	VTableCallStub* pStub = stub();
801	BYTE* p = (BYTE*)pStub->entryPoint();
802
803	#ifdef UNIX_AMD64_ABI
804	// mov rax,[rdi] : rax = MethodTable pointer
805	(UINT32 )p = `0x078b48`; p += `3`;
806	#else
807	// mov rax,[rcx] : rax = MethodTable pointer
808	(UINT32 )p = `0x018b48`; p += `3`;
809	#endif
810
811	// mov rax,[rax+vtable offset] : rax = vtable pointer
812	if (offsetOfIndirection >= `0x80`)
813	{
814	(UINT32)p = `0x00808b48`; p += `3`;
815	(UINT32)p = offsetOfIndirection; p += `4`;
816	}
817	else
818	{
819	(UINT32)p = `0x00408b48`; p += `3`;
820	*p++ = (BYTE)offsetOfIndirection;
821	}
822
823	// jmp qword ptr [rax+slot]
824	if (offsetAfterIndirection >= `0x80`)
825	{
826	(UINT32)p = `0xa0ff`; p += `2`;
827	(UINT32)p = offsetAfterIndirection; p += `4`;
828	}
829	else
830	{
831	(UINT16)p = `0x60ff`; p += `2`;
832	*p++ = (BYTE)offsetAfterIndirection;
833	}
834
835	// Store the slot value here for convenience. Not a real instruction (unreachable anyways)
836	(UINT32)p = slot; p += `4`;
837
838	_ASSERT(p == (BYTE*)stub()->entryPoint() + VTableCallHolder::GetHolderSize(slot));
839	_ASSERT(stub()->size() == VTableCallHolder::GetHolderSize(slot));
840	}
841
842	VirtualCallStubManager::StubKind VirtualCallStubManager::predictStubKind(PCODE stubStartAddress)
843	{
844	#ifdef DACCESS_COMPILE
845	return SK_BREAKPOINT; // Dac always uses the slower lookup
846	#else
847	StubKind stubKind = SK_UNKNOWN;
848
849	EX_TRY
850	{
851	// If stubStartAddress is completely bogus, then this might AV,
852	// so we protect it with SEH. An AV here is OK.
853	AVInRuntimeImplOkayHolder AVOkay;
854
855	WORD firstWord = ((WORD) stubStartAddress);
856
857	if (firstWord == `0xB848`)
858	{
859	stubKind = SK_DISPATCH;
860	}
861	else if (firstWord == `0x4890`)
862	{
863	stubKind = SK_LOOKUP;
864	}
865	else if (firstWord == `0x4952`)
866	{
867	stubKind = SK_RESOLVE;
868	}
869	else if (firstWord == `0x48F8`)
870	{
871	stubKind = SK_LOOKUP;
872	}
873	else if (firstWord == `0x8B48`)
874	{
875	stubKind = SK_VTABLECALL;
876	}
877	else
878	{
879	BYTE firstByte = ((BYTE*) stubStartAddress)[`0`];
880	BYTE secondByte = ((BYTE*) stubStartAddress)[`1`];
881
882	if ((firstByte == INSTR_INT3) \|\| (secondByte == INSTR_INT3))
883	{
884	stubKind = SK_BREAKPOINT;
885	}
886	}
887	}
888	EX_CATCH
889	{
890	stubKind = SK_UNKNOWN;
891	}
892	EX_END_CATCH(SwallowAllExceptions);
893
894	return stubKind;
895
896	#endif // DACCESS_COMPILE
897	}
898
899	#endif //DECLARE_DATA
900
901	#endif // _VIRTUAL_CALL_STUB_AMD64_H
902

Browse the source code of CoreCLR/vm/amd64/virtualcallstubcpu.hpp