1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5
6// NOTE on Frame Size C_ASSERT usage in this file
7// if the frame size changes then the stubs have to be revisited for correctness
8// kindly revist the logic and then update the constants so that the C_ASSERT will again fire
9// if someone changes the frame size. You are expected to keep this hard coded constant
10// up to date so that changes in the frame size trigger errors at compile time if the code is not altered
11
12// Precompiled Header
13
14#include "common.h"
15
16#include "field.h"
17#include "stublink.h"
18
19#include "frames.h"
20#include "excep.h"
21#include "dllimport.h"
22#include "log.h"
23#include "comdelegate.h"
24#include "array.h"
25#include "jitinterface.h"
26#include "codeman.h"
27#include "dbginterface.h"
28#include "eeprofinterfaces.h"
29#include "eeconfig.h"
30#ifdef _TARGET_X86_
31#include "asmconstants.h"
32#endif // _TARGET_X86_
33#include "class.h"
34#include "stublink.inl"
35
36#ifdef FEATURE_COMINTEROP
37#include "comtoclrcall.h"
38#include "runtimecallablewrapper.h"
39#include "comcache.h"
40#include "olevariant.h"
41#include "notifyexternals.h"
42#endif // FEATURE_COMINTEROP
43
44#ifdef FEATURE_PREJIT
45#include "compile.h"
46#endif
47
48#if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
49#include <psapi.h>
50#endif
51
52
53#ifndef DACCESS_COMPILE
54
55extern "C" VOID __cdecl StubRareEnable(Thread *pThread);
56#ifdef FEATURE_COMINTEROP
57extern "C" HRESULT __cdecl StubRareDisableHR(Thread *pThread);
58#endif // FEATURE_COMINTEROP
59extern "C" VOID __cdecl StubRareDisableTHROW(Thread *pThread, Frame *pFrame);
60
61#ifndef FEATURE_ARRAYSTUB_AS_IL
62extern "C" VOID __cdecl ArrayOpStubNullException(void);
63extern "C" VOID __cdecl ArrayOpStubRangeException(void);
64extern "C" VOID __cdecl ArrayOpStubTypeMismatchException(void);
65
66#if defined(_TARGET_AMD64_)
67#define EXCEPTION_HELPERS(base) \
68 extern "C" VOID __cdecl base##_RSIRDI_ScratchArea(void); \
69 extern "C" VOID __cdecl base##_ScratchArea(void); \
70 extern "C" VOID __cdecl base##_RSIRDI(void); \
71 extern "C" VOID __cdecl base(void)
72EXCEPTION_HELPERS(ArrayOpStubNullException);
73EXCEPTION_HELPERS(ArrayOpStubRangeException);
74EXCEPTION_HELPERS(ArrayOpStubTypeMismatchException);
75#undef EXCEPTION_HELPERS
76#endif // !_TARGET_AMD64_
77#endif // !FEATURE_ARRAYSTUB_AS_IL
78
79#if defined(_TARGET_AMD64_)
80#if defined(_DEBUG)
81extern "C" VOID __cdecl DebugCheckStubUnwindInfo();
82#endif // _DEBUG
83#endif // _TARGET_AMD64_
84
85// Presumably this code knows what it is doing with TLS. If we are hiding these
86// services from normal code, reveal them here.
87#ifdef TlsGetValue
88#undef TlsGetValue
89#endif
90
91#ifdef FEATURE_COMINTEROP
92Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame);
93#endif
94
95
96
97#ifdef _TARGET_AMD64_
98
99BOOL IsPreservedReg (X86Reg reg)
100{
101 UINT16 PreservedRegMask =
102 (1 << kRBX)
103 | (1 << kRBP)
104 | (1 << kRSI)
105 | (1 << kRDI)
106 | (1 << kR12)
107 | (1 << kR13)
108 | (1 << kR14)
109 | (1 << kR15);
110 return PreservedRegMask & (1 << reg);
111}
112
113#endif // _TARGET_AMD64_
114
115#ifdef _TARGET_AMD64_
116//-----------------------------------------------------------------------
117// InstructionFormat for near Jump and short Jump
118//-----------------------------------------------------------------------
119
120//X64EmitTailcallWithRSPAdjust
121class X64NearJumpSetup : public InstructionFormat
122{
123 public:
124 X64NearJumpSetup() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
125 | InstructionFormat::k64Small | InstructionFormat::k64
126 )
127 {
128 LIMITED_METHOD_CONTRACT;
129 }
130
131 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
132 {
133 LIMITED_METHOD_CONTRACT
134 switch (refsize)
135 {
136 case k8:
137 return 0;
138
139 case k32:
140 return 0;
141
142 case k64Small:
143 return 5;
144
145 case k64:
146 return 10;
147
148 default:
149 _ASSERTE(!"unexpected refsize");
150 return 0;
151
152 }
153 }
154
155 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
156 {
157 LIMITED_METHOD_CONTRACT
158 if (k8 == refsize)
159 {
160 // do nothing, X64NearJump will take care of this
161 }
162 else if (k32 == refsize)
163 {
164 // do nothing, X64NearJump will take care of this
165 }
166 else if (k64Small == refsize)
167 {
168 UINT64 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
169 _ASSERTE(FitsInU4(TargetAddress));
170
171 // mov eax, imm32 ; zero-extended
172 pOutBuffer[0] = 0xB8;
173 *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
174 }
175 else if (k64 == refsize)
176 {
177 // mov rax, imm64
178 pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
179 pOutBuffer[1] = 0xB8;
180 *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
181 }
182 else
183 {
184 _ASSERTE(!"unreached");
185 }
186 }
187
188 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
189 {
190 STATIC_CONTRACT_NOTHROW;
191 STATIC_CONTRACT_GC_NOTRIGGER;
192 STATIC_CONTRACT_FORBID_FAULT;
193
194
195 if (fExternal)
196 {
197 switch (refsize)
198 {
199 case InstructionFormat::k8:
200 // For external, we don't have enough info to predict
201 // the offset.
202 return FALSE;
203
204 case InstructionFormat::k32:
205 return sizeof(PVOID) <= sizeof(UINT32);
206
207 case InstructionFormat::k64Small:
208 return FitsInI4(offset);
209
210 case InstructionFormat::k64:
211 // intentional fallthru
212 case InstructionFormat::kAllowAlways:
213 return TRUE;
214
215 default:
216 _ASSERTE(0);
217 return FALSE;
218 }
219 }
220 else
221 {
222 switch (refsize)
223 {
224 case InstructionFormat::k8:
225 return FitsInI1(offset);
226
227 case InstructionFormat::k32:
228 return FitsInI4(offset);
229
230 case InstructionFormat::k64Small:
231 // EmitInstruction emits a non-relative jmp for
232 // k64Small. We don't have enough info to predict the
233 // target address. (Even if we did, this would only
234 // handle the set of unsigned offsets with bit 31 set
235 // and no higher bits set, too uncommon/hard to test.)
236 return FALSE;
237
238 case InstructionFormat::k64:
239 // intentional fallthru
240 case InstructionFormat::kAllowAlways:
241 return TRUE;
242 default:
243 _ASSERTE(0);
244 return FALSE;
245 }
246 }
247 }
248};
249
250class X64NearJumpExecute : public InstructionFormat
251{
252 public:
253 X64NearJumpExecute() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
254 | InstructionFormat::k64Small | InstructionFormat::k64
255 )
256 {
257 LIMITED_METHOD_CONTRACT;
258 }
259
260 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
261 {
262 LIMITED_METHOD_CONTRACT
263 switch (refsize)
264 {
265 case k8:
266 return 2;
267
268 case k32:
269 return 5;
270
271 case k64Small:
272 return 3;
273
274 case k64:
275 return 3;
276
277 default:
278 _ASSERTE(!"unexpected refsize");
279 return 0;
280
281 }
282 }
283
284 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
285 {
286 LIMITED_METHOD_CONTRACT
287 if (k8 == refsize)
288 {
289 pOutBuffer[0] = 0xeb;
290 *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
291 }
292 else if (k32 == refsize)
293 {
294 pOutBuffer[0] = 0xe9;
295 *((__int32*)(pOutBuffer+1)) = (__int32)fixedUpReference;
296 }
297 else if (k64Small == refsize)
298 {
299 // REX.W jmp rax
300 pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
301 pOutBuffer[1] = 0xFF;
302 pOutBuffer[2] = 0xE0;
303 }
304 else if (k64 == refsize)
305 {
306 // REX.W jmp rax
307 pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
308 pOutBuffer[1] = 0xFF;
309 pOutBuffer[2] = 0xE0;
310 }
311 else
312 {
313 _ASSERTE(!"unreached");
314 }
315 }
316
317 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
318 {
319 STATIC_CONTRACT_NOTHROW;
320 STATIC_CONTRACT_GC_NOTRIGGER;
321 STATIC_CONTRACT_FORBID_FAULT;
322
323
324 if (fExternal)
325 {
326 switch (refsize)
327 {
328 case InstructionFormat::k8:
329 // For external, we don't have enough info to predict
330 // the offset.
331 return FALSE;
332
333 case InstructionFormat::k32:
334 return sizeof(PVOID) <= sizeof(UINT32);
335
336 case InstructionFormat::k64Small:
337 return FitsInI4(offset);
338
339 case InstructionFormat::k64:
340 // intentional fallthru
341 case InstructionFormat::kAllowAlways:
342 return TRUE;
343
344 default:
345 _ASSERTE(0);
346 return FALSE;
347 }
348 }
349 else
350 {
351 switch (refsize)
352 {
353 case InstructionFormat::k8:
354 return FitsInI1(offset);
355
356 case InstructionFormat::k32:
357 return FitsInI4(offset);
358
359 case InstructionFormat::k64Small:
360 // EmitInstruction emits a non-relative jmp for
361 // k64Small. We don't have enough info to predict the
362 // target address. (Even if we did, this would only
363 // handle the set of unsigned offsets with bit 31 set
364 // and no higher bits set, too uncommon/hard to test.)
365 return FALSE;
366
367 case InstructionFormat::k64:
368 // intentional fallthru
369 case InstructionFormat::kAllowAlways:
370 return TRUE;
371 default:
372 _ASSERTE(0);
373 return FALSE;
374 }
375 }
376 }
377};
378
379#endif
380
381//-----------------------------------------------------------------------
382// InstructionFormat for near Jump and short Jump
383//-----------------------------------------------------------------------
384class X86NearJump : public InstructionFormat
385{
386 public:
387 X86NearJump() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
388#ifdef _TARGET_AMD64_
389 | InstructionFormat::k64Small | InstructionFormat::k64
390#endif // _TARGET_AMD64_
391 )
392 {
393 LIMITED_METHOD_CONTRACT;
394 }
395
396 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
397 {
398 LIMITED_METHOD_CONTRACT
399 switch (refsize)
400 {
401 case k8:
402 return 2;
403
404 case k32:
405 return 5;
406#ifdef _TARGET_AMD64_
407 case k64Small:
408 return 5 + 2;
409
410 case k64:
411 return 12;
412#endif // _TARGET_AMD64_
413 default:
414 _ASSERTE(!"unexpected refsize");
415 return 0;
416
417 }
418 }
419
420 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
421 {
422 LIMITED_METHOD_CONTRACT
423 if (k8 == refsize)
424 {
425 pOutBuffer[0] = 0xeb;
426 *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
427 }
428 else if (k32 == refsize)
429 {
430 pOutBuffer[0] = 0xe9;
431 *((__int32*)(pOutBuffer+1)) = (__int32)fixedUpReference;
432 }
433#ifdef _TARGET_AMD64_
434 else if (k64Small == refsize)
435 {
436 UINT64 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
437 _ASSERTE(FitsInU4(TargetAddress));
438
439 // mov eax, imm32 ; zero-extended
440 pOutBuffer[0] = 0xB8;
441 *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
442
443 // jmp rax
444 pOutBuffer[5] = 0xFF;
445 pOutBuffer[6] = 0xE0;
446 }
447 else if (k64 == refsize)
448 {
449 // mov rax, imm64
450 pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
451 pOutBuffer[1] = 0xB8;
452 *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
453
454 // jmp rax
455 pOutBuffer[10] = 0xFF;
456 pOutBuffer[11] = 0xE0;
457 }
458#endif // _TARGET_AMD64_
459 else
460 {
461 _ASSERTE(!"unreached");
462 }
463 }
464
465 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
466 {
467 STATIC_CONTRACT_NOTHROW;
468 STATIC_CONTRACT_GC_NOTRIGGER;
469 STATIC_CONTRACT_FORBID_FAULT;
470
471
472 if (fExternal)
473 {
474 switch (refsize)
475 {
476 case InstructionFormat::k8:
477 // For external, we don't have enough info to predict
478 // the offset.
479 return FALSE;
480
481 case InstructionFormat::k32:
482 return sizeof(PVOID) <= sizeof(UINT32);
483
484#ifdef _TARGET_AMD64_
485 case InstructionFormat::k64Small:
486 return FitsInI4(offset);
487
488 case InstructionFormat::k64:
489 // intentional fallthru
490#endif
491 case InstructionFormat::kAllowAlways:
492 return TRUE;
493
494 default:
495 _ASSERTE(0);
496 return FALSE;
497 }
498 }
499 else
500 {
501 switch (refsize)
502 {
503 case InstructionFormat::k8:
504 return FitsInI1(offset);
505
506 case InstructionFormat::k32:
507#ifdef _TARGET_AMD64_
508 return FitsInI4(offset);
509#else
510 return TRUE;
511#endif
512
513#ifdef _TARGET_AMD64_
514 case InstructionFormat::k64Small:
515 // EmitInstruction emits a non-relative jmp for
516 // k64Small. We don't have enough info to predict the
517 // target address. (Even if we did, this would only
518 // handle the set of unsigned offsets with bit 31 set
519 // and no higher bits set, too uncommon/hard to test.)
520 return FALSE;
521
522 case InstructionFormat::k64:
523 // intentional fallthru
524#endif
525 case InstructionFormat::kAllowAlways:
526 return TRUE;
527 default:
528 _ASSERTE(0);
529 return FALSE;
530 }
531 }
532 }
533};
534
535
536//-----------------------------------------------------------------------
537// InstructionFormat for conditional jump. Set the variationCode
538// to members of X86CondCode.
539//-----------------------------------------------------------------------
540class X86CondJump : public InstructionFormat
541{
542 public:
543 X86CondJump(UINT allowedSizes) : InstructionFormat(allowedSizes)
544 {
545 LIMITED_METHOD_CONTRACT;
546 }
547
548 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
549 {
550 LIMITED_METHOD_CONTRACT
551 return (refsize == k8 ? 2 : 6);
552 }
553
554 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
555 {
556 LIMITED_METHOD_CONTRACT
557 if (refsize == k8)
558 {
559 pOutBuffer[0] = static_cast<BYTE>(0x70 | variationCode);
560 *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
561 }
562 else
563 {
564 pOutBuffer[0] = 0x0f;
565 pOutBuffer[1] = static_cast<BYTE>(0x80 | variationCode);
566 *((__int32*)(pOutBuffer+2)) = (__int32)fixedUpReference;
567 }
568 }
569};
570
571
572//-----------------------------------------------------------------------
573// InstructionFormat for near call.
574//-----------------------------------------------------------------------
575class X86Call : public InstructionFormat
576{
577 public:
578 X86Call ()
579 : InstructionFormat( InstructionFormat::k32
580#ifdef _TARGET_AMD64_
581 | InstructionFormat::k64Small | InstructionFormat::k64
582#endif // _TARGET_AMD64_
583 )
584 {
585 LIMITED_METHOD_CONTRACT;
586 }
587
588 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
589 {
590 LIMITED_METHOD_CONTRACT;
591
592 switch (refsize)
593 {
594 case k32:
595 return 5;
596
597#ifdef _TARGET_AMD64_
598 case k64Small:
599 return 5 + 2;
600
601 case k64:
602 return 10 + 2;
603#endif // _TARGET_AMD64_
604
605 default:
606 _ASSERTE(!"unexpected refsize");
607 return 0;
608 }
609 }
610
611 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
612 {
613 LIMITED_METHOD_CONTRACT
614
615 switch (refsize)
616 {
617 case k32:
618 pOutBuffer[0] = 0xE8;
619 *((__int32*)(1+pOutBuffer)) = (__int32)fixedUpReference;
620 break;
621
622#ifdef _TARGET_AMD64_
623 case k64Small:
624 UINT64 TargetAddress;
625
626 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
627 _ASSERTE(FitsInU4(TargetAddress));
628
629 // mov eax,<fixedUpReference> ; zero-extends
630 pOutBuffer[0] = 0xB8;
631 *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
632
633 // call rax
634 pOutBuffer[5] = 0xff;
635 pOutBuffer[6] = 0xd0;
636 break;
637
638 case k64:
639 // mov rax,<fixedUpReference>
640 pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
641 pOutBuffer[1] = 0xB8;
642 *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
643
644 // call rax
645 pOutBuffer[10] = 0xff;
646 pOutBuffer[11] = 0xd0;
647 break;
648#endif // _TARGET_AMD64_
649
650 default:
651 _ASSERTE(!"unreached");
652 break;
653 }
654 }
655
656// For x86, the default CanReach implementation will suffice. It only needs
657// to handle k32.
658#ifdef _TARGET_AMD64_
659 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
660 {
661 if (fExternal)
662 {
663 switch (refsize)
664 {
665 case InstructionFormat::k32:
666 // For external, we don't have enough info to predict
667 // the offset.
668 return FALSE;
669
670 case InstructionFormat::k64Small:
671 return FitsInI4(offset);
672
673 case InstructionFormat::k64:
674 // intentional fallthru
675 case InstructionFormat::kAllowAlways:
676 return TRUE;
677
678 default:
679 _ASSERTE(0);
680 return FALSE;
681 }
682 }
683 else
684 {
685 switch (refsize)
686 {
687 case InstructionFormat::k32:
688 return FitsInI4(offset);
689
690 case InstructionFormat::k64Small:
691 // EmitInstruction emits a non-relative jmp for
692 // k64Small. We don't have enough info to predict the
693 // target address. (Even if we did, this would only
694 // handle the set of unsigned offsets with bit 31 set
695 // and no higher bits set, too uncommon/hard to test.)
696 return FALSE;
697
698 case InstructionFormat::k64:
699 // intentional fallthru
700 case InstructionFormat::kAllowAlways:
701 return TRUE;
702 default:
703 _ASSERTE(0);
704 return FALSE;
705 }
706 }
707 }
708#endif // _TARGET_AMD64_
709};
710
711
712//-----------------------------------------------------------------------
713// InstructionFormat for push imm32.
714//-----------------------------------------------------------------------
715class X86PushImm32 : public InstructionFormat
716{
717 public:
718 X86PushImm32(UINT allowedSizes) : InstructionFormat(allowedSizes)
719 {
720 LIMITED_METHOD_CONTRACT;
721 }
722
723 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
724 {
725 LIMITED_METHOD_CONTRACT;
726
727 return 5;
728 }
729
730 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
731 {
732 LIMITED_METHOD_CONTRACT;
733
734 pOutBuffer[0] = 0x68;
735 // only support absolute pushimm32 of the label address. The fixedUpReference is
736 // the offset to the label from the current point, so add to get address
737 *((__int32*)(1+pOutBuffer)) = (__int32)(fixedUpReference);
738 }
739};
740
741#if defined(_TARGET_AMD64_)
742//-----------------------------------------------------------------------
743// InstructionFormat for lea reg, [RIP relative].
744//-----------------------------------------------------------------------
745class X64LeaRIP : public InstructionFormat
746{
747 public:
748 X64LeaRIP() : InstructionFormat(InstructionFormat::k64Small)
749 {
750 LIMITED_METHOD_CONTRACT;
751 }
752
753 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
754 {
755 LIMITED_METHOD_CONTRACT;
756
757 return 7;
758 }
759
760 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
761 {
762 if (fExternal)
763 {
764 switch (refsize)
765 {
766 case InstructionFormat::k64Small:
767 // For external, we don't have enough info to predict
768 // the offset.
769 return FALSE;
770
771 case InstructionFormat::k64:
772 // intentional fallthru
773 case InstructionFormat::kAllowAlways:
774 return TRUE;
775
776 default:
777 _ASSERTE(0);
778 return FALSE;
779 }
780 }
781 else
782 {
783 switch (refsize)
784 {
785 case InstructionFormat::k64Small:
786 return FitsInI4(offset);
787
788 case InstructionFormat::k64:
789 // intentional fallthru
790 case InstructionFormat::kAllowAlways:
791 return TRUE;
792
793 default:
794 _ASSERTE(0);
795 return FALSE;
796 }
797 }
798 }
799
800 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
801 {
802 LIMITED_METHOD_CONTRACT;
803
804 X86Reg reg = (X86Reg)variationCode;
805 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
806
807 if (reg >= kR8)
808 {
809 rex |= REX_MODRM_REG_EXT;
810 reg = X86RegFromAMD64Reg(reg);
811 }
812
813 pOutBuffer[0] = rex;
814 pOutBuffer[1] = 0x8D;
815 pOutBuffer[2] = 0x05 | (reg << 3);
816 // only support absolute pushimm32 of the label address. The fixedUpReference is
817 // the offset to the label from the current point, so add to get address
818 *((__int32*)(3+pOutBuffer)) = (__int32)(fixedUpReference);
819 }
820};
821
822#endif // _TARGET_AMD64_
823
824#if defined(_TARGET_AMD64_)
825static BYTE gX64NearJumpSetup[sizeof(X64NearJumpSetup)];
826static BYTE gX64NearJumpExecute[sizeof(X64NearJumpExecute)];
827static BYTE gX64LeaRIP[sizeof(X64LeaRIP)];
828#endif
829
830static BYTE gX86NearJump[sizeof(X86NearJump)];
831static BYTE gX86CondJump[sizeof(X86CondJump)];
832static BYTE gX86Call[sizeof(X86Call)];
833static BYTE gX86PushImm32[sizeof(X86PushImm32)];
834
835/* static */ void StubLinkerCPU::Init()
836{
837 CONTRACTL
838 {
839 THROWS;
840 GC_NOTRIGGER;
841 INJECT_FAULT(COMPlusThrowOM(););
842 }
843 CONTRACTL_END;
844 new (gX86NearJump) X86NearJump();
845 new (gX86CondJump) X86CondJump( InstructionFormat::k8|InstructionFormat::k32);
846 new (gX86Call) X86Call();
847 new (gX86PushImm32) X86PushImm32(InstructionFormat::k32);
848
849#if defined(_TARGET_AMD64_)
850 new (gX64NearJumpSetup) X64NearJumpSetup();
851 new (gX64NearJumpExecute) X64NearJumpExecute();
852 new (gX64LeaRIP) X64LeaRIP();
853#endif
854}
855
856//---------------------------------------------------------------
857// Emits:
858// mov destReg, srcReg
859//---------------------------------------------------------------
860VOID StubLinkerCPU::X86EmitMovRegReg(X86Reg destReg, X86Reg srcReg)
861{
862 STANDARD_VM_CONTRACT;
863
864#ifdef _TARGET_AMD64_
865 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
866
867 if (destReg >= kR8)
868 {
869 rex |= REX_MODRM_RM_EXT;
870 destReg = X86RegFromAMD64Reg(destReg);
871 }
872 if (srcReg >= kR8)
873 {
874 rex |= REX_MODRM_REG_EXT;
875 srcReg = X86RegFromAMD64Reg(srcReg);
876 }
877 Emit8(rex);
878#endif
879
880 Emit8(0x89);
881 Emit8(static_cast<UINT8>(0xC0 | (srcReg << 3) | destReg));
882}
883
884//---------------------------------------------------------------
885
886VOID StubLinkerCPU::X86EmitMovSPReg(X86Reg srcReg)
887{
888 STANDARD_VM_CONTRACT;
889 const X86Reg kESP = (X86Reg)4;
890 X86EmitMovRegReg(kESP, srcReg);
891}
892
893VOID StubLinkerCPU::X86EmitMovRegSP(X86Reg destReg)
894{
895 STANDARD_VM_CONTRACT;
896 const X86Reg kESP = (X86Reg)4;
897 X86EmitMovRegReg(destReg, kESP);
898}
899
900
901//---------------------------------------------------------------
902// Emits:
903// PUSH <reg32>
904//---------------------------------------------------------------
905VOID StubLinkerCPU::X86EmitPushReg(X86Reg reg)
906{
907 STANDARD_VM_CONTRACT;
908
909#ifdef STUBLINKER_GENERATES_UNWIND_INFO
910 X86Reg origReg = reg;
911#endif
912
913#ifdef _TARGET_AMD64_
914 if (reg >= kR8)
915 {
916 Emit8(REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT);
917 reg = X86RegFromAMD64Reg(reg);
918 }
919#endif
920 Emit8(static_cast<UINT8>(0x50 + reg));
921
922#ifdef STUBLINKER_GENERATES_UNWIND_INFO
923 if (IsPreservedReg(origReg))
924 {
925 UnwindPushedReg(origReg);
926 }
927 else
928#endif
929 {
930 Push(sizeof(void*));
931 }
932}
933
934
935//---------------------------------------------------------------
936// Emits:
937// POP <reg32>
938//---------------------------------------------------------------
939VOID StubLinkerCPU::X86EmitPopReg(X86Reg reg)
940{
941 STANDARD_VM_CONTRACT;
942
943#ifdef _TARGET_AMD64_
944 if (reg >= kR8)
945 {
946 Emit8(REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT);
947 reg = X86RegFromAMD64Reg(reg);
948 }
949#endif // _TARGET_AMD64_
950
951 Emit8(static_cast<UINT8>(0x58 + reg));
952 Pop(sizeof(void*));
953}
954
955//---------------------------------------------------------------
956// Emits:
957// PUSH <imm32>
958//---------------------------------------------------------------
959VOID StubLinkerCPU::X86EmitPushImm32(UINT32 value)
960{
961 STANDARD_VM_CONTRACT;
962
963 Emit8(0x68);
964 Emit32(value);
965 Push(sizeof(void*));
966}
967
968
969//---------------------------------------------------------------
970// Emits:
971// PUSH <imm32>
972//---------------------------------------------------------------
973VOID StubLinkerCPU::X86EmitPushImm32(CodeLabel &target)
974{
975 STANDARD_VM_CONTRACT;
976
977 EmitLabelRef(&target, reinterpret_cast<X86PushImm32&>(gX86PushImm32), 0);
978}
979
980
981//---------------------------------------------------------------
982// Emits:
983// PUSH <imm8>
984//---------------------------------------------------------------
985VOID StubLinkerCPU::X86EmitPushImm8(BYTE value)
986{
987 STANDARD_VM_CONTRACT;
988
989 Emit8(0x6a);
990 Emit8(value);
991 Push(sizeof(void*));
992}
993
994
995//---------------------------------------------------------------
996// Emits:
997// PUSH <ptr>
998//---------------------------------------------------------------
999VOID StubLinkerCPU::X86EmitPushImmPtr(LPVOID value WIN64_ARG(X86Reg tmpReg /*=kR10*/))
1000{
1001 STANDARD_VM_CONTRACT;
1002
1003#ifdef _TARGET_AMD64_
1004 X86EmitRegLoad(tmpReg, (UINT_PTR) value);
1005 X86EmitPushReg(tmpReg);
1006#else
1007 X86EmitPushImm32((UINT_PTR) value);
1008#endif
1009}
1010
1011//---------------------------------------------------------------
1012// Emits:
1013// XOR <reg32>,<reg32>
1014//---------------------------------------------------------------
1015VOID StubLinkerCPU::X86EmitZeroOutReg(X86Reg reg)
1016{
1017 STANDARD_VM_CONTRACT;
1018
1019#ifdef _TARGET_AMD64_
1020 // 32-bit results are zero-extended, so we only need the REX byte if
1021 // it's an extended register.
1022 if (reg >= kR8)
1023 {
1024 Emit8(REX_PREFIX_BASE | REX_MODRM_REG_EXT | REX_MODRM_RM_EXT);
1025 reg = X86RegFromAMD64Reg(reg);
1026 }
1027#endif
1028 Emit8(0x33);
1029 Emit8(static_cast<UINT8>(0xc0 | (reg << 3) | reg));
1030}
1031
1032//---------------------------------------------------------------
1033// Emits:
1034// jmp [reg]
1035//---------------------------------------------------------------
1036VOID StubLinkerCPU::X86EmitJumpReg(X86Reg reg)
1037{
1038 CONTRACTL
1039 {
1040 STANDARD_VM_CHECK;
1041 }
1042 CONTRACTL_END;
1043
1044 Emit8(0xff);
1045 Emit8(static_cast<BYTE>(0xe0) | static_cast<BYTE>(reg));
1046}
1047
1048//---------------------------------------------------------------
1049// Emits:
1050// CMP <reg32>,imm32
1051//---------------------------------------------------------------
1052VOID StubLinkerCPU::X86EmitCmpRegImm32(X86Reg reg, INT32 imm32)
1053{
1054 CONTRACTL
1055 {
1056 STANDARD_VM_CHECK;
1057 PRECONDITION((int) reg < NumX86Regs);
1058 }
1059 CONTRACTL_END;
1060
1061#ifdef _TARGET_AMD64_
1062 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1063
1064 if (reg >= kR8)
1065 {
1066 rex |= REX_OPCODE_REG_EXT;
1067 reg = X86RegFromAMD64Reg(reg);
1068 }
1069 Emit8(rex);
1070#endif
1071
1072 if (FitsInI1(imm32)) {
1073 Emit8(0x83);
1074 Emit8(static_cast<UINT8>(0xF8 | reg));
1075 Emit8((INT8)imm32);
1076 } else {
1077 Emit8(0x81);
1078 Emit8(static_cast<UINT8>(0xF8 | reg));
1079 Emit32(imm32);
1080 }
1081}
1082
1083#ifdef _TARGET_AMD64_
1084//---------------------------------------------------------------
1085// Emits:
1086// CMP [reg+offs], imm32
1087// CMP [reg], imm32
1088//---------------------------------------------------------------
1089VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
1090{
1091 STANDARD_VM_CONTRACT;
1092
1093 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1094
1095 if (reg >= kR8)
1096 {
1097 rex |= REX_OPCODE_REG_EXT;
1098 reg = X86RegFromAMD64Reg(reg);
1099 }
1100 Emit8(rex);
1101
1102 X64EmitCmp32RegIndexImm32(reg, offs, imm32);
1103}
1104
1105VOID StubLinkerCPU:: X64EmitCmp32RegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
1106#else // _TARGET_AMD64_
1107VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
1108#endif // _TARGET_AMD64_
1109{
1110 CONTRACTL
1111 {
1112 STANDARD_VM_CHECK;
1113 PRECONDITION((int) reg < NumX86Regs);
1114 }
1115 CONTRACTL_END;
1116
1117 //
1118 // The binary representation of "cmp [mem], imm32" is :
1119 // 1000-00sw mod11-1r/m
1120 //
1121
1122 unsigned wBit = (FitsInI1(imm32) ? 0 : 1);
1123 Emit8(static_cast<UINT8>(0x80 | wBit));
1124
1125 unsigned modBits;
1126 if (offs == 0)
1127 modBits = 0;
1128 else if (FitsInI1(offs))
1129 modBits = 1;
1130 else
1131 modBits = 2;
1132
1133 Emit8(static_cast<UINT8>((modBits << 6) | 0x38 | reg));
1134
1135 if (offs)
1136 {
1137 if (FitsInI1(offs))
1138 Emit8((INT8)offs);
1139 else
1140 Emit32(offs);
1141 }
1142
1143 if (FitsInI1(imm32))
1144 Emit8((INT8)imm32);
1145 else
1146 Emit32(imm32);
1147}
1148
1149//---------------------------------------------------------------
1150// Emits:
1151#if defined(_TARGET_AMD64_)
1152// mov rax, <target>
1153// add rsp, imm32
1154// jmp rax
1155#else
1156// add rsp, imm32
1157// jmp <target>
1158#endif
1159//---------------------------------------------------------------
1160VOID StubLinkerCPU::X86EmitTailcallWithESPAdjust(CodeLabel *pTarget, INT32 imm32)
1161{
1162 STANDARD_VM_CONTRACT;
1163
1164#if defined(_TARGET_AMD64_)
1165 EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpSetup&>(gX64NearJumpSetup), 0);
1166 X86EmitAddEsp(imm32);
1167 EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpExecute&>(gX64NearJumpExecute), 0);
1168#else
1169 X86EmitAddEsp(imm32);
1170 X86EmitNearJump(pTarget);
1171#endif
1172}
1173
1174//---------------------------------------------------------------
1175// Emits:
1176#if defined(_TARGET_AMD64_)
1177// mov rax, <target>
1178// pop reg
1179// jmp rax
1180#else
1181// pop reg
1182// jmp <target>
1183#endif
1184//---------------------------------------------------------------
1185VOID StubLinkerCPU::X86EmitTailcallWithSinglePop(CodeLabel *pTarget, X86Reg reg)
1186{
1187 STANDARD_VM_CONTRACT;
1188
1189#if defined(_TARGET_AMD64_)
1190 EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpSetup&>(gX64NearJumpSetup), 0);
1191 X86EmitPopReg(reg);
1192 EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpExecute&>(gX64NearJumpExecute), 0);
1193#else
1194 X86EmitPopReg(reg);
1195 X86EmitNearJump(pTarget);
1196#endif
1197}
1198
1199//---------------------------------------------------------------
1200// Emits:
1201// JMP <ofs8> or
1202// JMP <ofs32}
1203//---------------------------------------------------------------
1204VOID StubLinkerCPU::X86EmitNearJump(CodeLabel *target)
1205{
1206 STANDARD_VM_CONTRACT;
1207 EmitLabelRef(target, reinterpret_cast<X86NearJump&>(gX86NearJump), 0);
1208}
1209
1210
1211//---------------------------------------------------------------
1212// Emits:
1213// Jcc <ofs8> or
1214// Jcc <ofs32>
1215//---------------------------------------------------------------
1216VOID StubLinkerCPU::X86EmitCondJump(CodeLabel *target, X86CondCode::cc condcode)
1217{
1218 STANDARD_VM_CONTRACT;
1219 EmitLabelRef(target, reinterpret_cast<X86CondJump&>(gX86CondJump), condcode);
1220}
1221
1222
1223//---------------------------------------------------------------
1224// Emits:
1225// call <ofs32>
1226//---------------------------------------------------------------
1227VOID StubLinkerCPU::X86EmitCall(CodeLabel *target, int iArgBytes)
1228{
1229 STANDARD_VM_CONTRACT;
1230
1231 EmitLabelRef(target, reinterpret_cast<X86Call&>(gX86Call), 0);
1232
1233 INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
1234 // we know that this is a call that can directly call
1235 // managed code
1236#ifndef _TARGET_AMD64_
1237 Pop(iArgBytes);
1238#endif // !_TARGET_AMD64_
1239}
1240
1241
1242//---------------------------------------------------------------
1243// Emits:
1244// ret n
1245//---------------------------------------------------------------
1246VOID StubLinkerCPU::X86EmitReturn(WORD wArgBytes)
1247{
1248 CONTRACTL
1249 {
1250 STANDARD_VM_CHECK;
1251#if defined(_TARGET_AMD64_) || defined(UNIX_X86_ABI)
1252 PRECONDITION(wArgBytes == 0);
1253#endif
1254
1255 }
1256 CONTRACTL_END;
1257
1258 if (wArgBytes == 0)
1259 Emit8(0xc3);
1260 else
1261 {
1262 Emit8(0xc2);
1263 Emit16(wArgBytes);
1264 }
1265
1266 Pop(wArgBytes);
1267}
1268
1269#ifdef _TARGET_AMD64_
1270//---------------------------------------------------------------
1271// Emits:
1272// JMP <ofs8> or
1273// JMP <ofs32}
1274//---------------------------------------------------------------
1275VOID StubLinkerCPU::X86EmitLeaRIP(CodeLabel *target, X86Reg reg)
1276{
1277 STANDARD_VM_CONTRACT;
1278 EmitLabelRef(target, reinterpret_cast<X64LeaRIP&>(gX64LeaRIP), reg);
1279}
1280#endif // _TARGET_AMD64_
1281
1282
1283
1284VOID StubLinkerCPU::X86EmitPushRegs(unsigned regSet)
1285{
1286 STANDARD_VM_CONTRACT;
1287
1288 for (X86Reg r = kEAX; r <= NumX86Regs; r = (X86Reg)(r+1))
1289 if (regSet & (1U<<r))
1290 {
1291 X86EmitPushReg(r);
1292 }
1293}
1294
1295
1296VOID StubLinkerCPU::X86EmitPopRegs(unsigned regSet)
1297{
1298 STANDARD_VM_CONTRACT;
1299
1300 for (X86Reg r = NumX86Regs; r >= kEAX; r = (X86Reg)(r-1))
1301 if (regSet & (1U<<r))
1302 X86EmitPopReg(r);
1303}
1304
1305
1306//---------------------------------------------------------------
1307// Emits:
1308// mov <dstreg>, [<srcreg> + <ofs>]
1309//---------------------------------------------------------------
1310VOID StubLinkerCPU::X86EmitIndexRegLoad(X86Reg dstreg,
1311 X86Reg srcreg,
1312 __int32 ofs)
1313{
1314 STANDARD_VM_CONTRACT;
1315 X86EmitOffsetModRM(0x8b, dstreg, srcreg, ofs);
1316}
1317
1318
1319//---------------------------------------------------------------
1320// Emits:
1321// mov [<dstreg> + <ofs>],<srcreg>
1322//
1323// Note: If you intend to use this to perform 64bit moves to a RSP
1324// based offset, then this method may not work. Consider
1325// using X86EmitIndexRegStoreRSP.
1326//---------------------------------------------------------------
1327VOID StubLinkerCPU::X86EmitIndexRegStore(X86Reg dstreg,
1328 __int32 ofs,
1329 X86Reg srcreg)
1330{
1331 STANDARD_VM_CONTRACT;
1332
1333 if (dstreg != kESP_Unsafe)
1334 X86EmitOffsetModRM(0x89, srcreg, dstreg, ofs);
1335 else
1336 X86EmitOp(0x89, srcreg, (X86Reg)kESP_Unsafe, ofs);
1337}
1338
1339#if defined(_TARGET_AMD64_)
1340//---------------------------------------------------------------
1341// Emits:
1342// mov [RSP + <ofs>],<srcreg>
1343//
1344// It marks the instruction has 64bit so that the processor
1345// performs a 8byte data move to a RSP based stack location.
1346//---------------------------------------------------------------
1347VOID StubLinkerCPU::X86EmitIndexRegStoreRSP(__int32 ofs,
1348 X86Reg srcreg)
1349{
1350 STANDARD_VM_CONTRACT;
1351
1352 X86EmitOp(0x89, srcreg, (X86Reg)kESP_Unsafe, ofs, (X86Reg)0, 0, k64BitOp);
1353}
1354
1355//---------------------------------------------------------------
1356// Emits:
1357// mov [R12 + <ofs>],<srcreg>
1358//
1359// It marks the instruction has 64bit so that the processor
1360// performs a 8byte data move to a R12 based stack location.
1361//---------------------------------------------------------------
1362VOID StubLinkerCPU::X86EmitIndexRegStoreR12(__int32 ofs,
1363 X86Reg srcreg)
1364{
1365 STANDARD_VM_CONTRACT;
1366
1367 X86EmitOp(0x89, srcreg, (X86Reg)kR12, ofs, (X86Reg)0, 0, k64BitOp);
1368}
1369#endif // defined(_TARGET_AMD64_)
1370
1371//---------------------------------------------------------------
1372// Emits:
1373// push dword ptr [<srcreg> + <ofs>]
1374//---------------------------------------------------------------
1375VOID StubLinkerCPU::X86EmitIndexPush(X86Reg srcreg, __int32 ofs)
1376{
1377 STANDARD_VM_CONTRACT;
1378
1379 if(srcreg != kESP_Unsafe)
1380 X86EmitOffsetModRM(0xff, (X86Reg)0x6, srcreg, ofs);
1381 else
1382 X86EmitOp(0xff,(X86Reg)0x6, srcreg, ofs);
1383
1384 Push(sizeof(void*));
1385}
1386
1387//---------------------------------------------------------------
1388// Emits:
1389// push dword ptr [<baseReg> + <indexReg>*<scale> + <ofs>]
1390//---------------------------------------------------------------
1391VOID StubLinkerCPU::X86EmitBaseIndexPush(
1392 X86Reg baseReg,
1393 X86Reg indexReg,
1394 __int32 scale,
1395 __int32 ofs)
1396{
1397 STANDARD_VM_CONTRACT;
1398
1399 X86EmitOffsetModRmSIB(0xff, (X86Reg)0x6, baseReg, indexReg, scale, ofs);
1400 Push(sizeof(void*));
1401}
1402
1403//---------------------------------------------------------------
1404// Emits:
1405// push dword ptr [ESP + <ofs>]
1406//---------------------------------------------------------------
1407VOID StubLinkerCPU::X86EmitSPIndexPush(__int32 ofs)
1408{
1409 STANDARD_VM_CONTRACT;
1410
1411 __int8 ofs8 = (__int8) ofs;
1412 if (ofs == (__int32) ofs8)
1413 {
1414 // The offset can be expressed in a byte (can use the byte
1415 // form of the push esp instruction)
1416
1417 BYTE code[] = {0xff, 0x74, 0x24, ofs8};
1418 EmitBytes(code, sizeof(code));
1419 }
1420 else
1421 {
1422 // The offset requires 4 bytes (need to use the long form
1423 // of the push esp instruction)
1424
1425 BYTE code[] = {0xff, 0xb4, 0x24, 0x0, 0x0, 0x0, 0x0};
1426 *(__int32 *)(&code[3]) = ofs;
1427 EmitBytes(code, sizeof(code));
1428 }
1429
1430 Push(sizeof(void*));
1431}
1432
1433
1434//---------------------------------------------------------------
1435// Emits:
1436// pop dword ptr [<srcreg> + <ofs>]
1437//---------------------------------------------------------------
1438VOID StubLinkerCPU::X86EmitIndexPop(X86Reg srcreg, __int32 ofs)
1439{
1440 STANDARD_VM_CONTRACT;
1441
1442 if(srcreg != kESP_Unsafe)
1443 X86EmitOffsetModRM(0x8f, (X86Reg)0x0, srcreg, ofs);
1444 else
1445 X86EmitOp(0x8f,(X86Reg)0x0, srcreg, ofs);
1446
1447 Pop(sizeof(void*));
1448}
1449
1450//---------------------------------------------------------------
1451// Emits:
1452// lea <dstreg>, [<srcreg> + <ofs>
1453//---------------------------------------------------------------
1454VOID StubLinkerCPU::X86EmitIndexLea(X86Reg dstreg, X86Reg srcreg, __int32 ofs)
1455{
1456 CONTRACTL
1457 {
1458 STANDARD_VM_CHECK;
1459 PRECONDITION((int) dstreg < NumX86Regs);
1460 PRECONDITION((int) srcreg < NumX86Regs);
1461 }
1462 CONTRACTL_END;
1463
1464 X86EmitOffsetModRM(0x8d, dstreg, srcreg, ofs);
1465}
1466
1467#if defined(_TARGET_AMD64_)
1468VOID StubLinkerCPU::X86EmitIndexLeaRSP(X86Reg dstreg, X86Reg srcreg, __int32 ofs)
1469{
1470 STANDARD_VM_CONTRACT;
1471
1472 X86EmitOp(0x8d, dstreg, (X86Reg)kESP_Unsafe, ofs, (X86Reg)0, 0, k64BitOp);
1473}
1474#endif // defined(_TARGET_AMD64_)
1475
1476//---------------------------------------------------------------
1477// Emits:
1478// sub esp, IMM
1479//---------------------------------------------------------------
1480VOID StubLinkerCPU::X86EmitSubEsp(INT32 imm32)
1481{
1482 STANDARD_VM_CONTRACT;
1483
1484 if (imm32 < 0x1000-100)
1485 {
1486 // As long as the esp size is less than 1 page plus a small
1487 // safety fudge factor, we can just bump esp.
1488 X86EmitSubEspWorker(imm32);
1489 }
1490 else
1491 {
1492 // Otherwise, must touch at least one byte for each page.
1493 while (imm32 >= 0x1000)
1494 {
1495
1496 X86EmitSubEspWorker(0x1000-4);
1497 X86EmitPushReg(kEAX);
1498
1499 imm32 -= 0x1000;
1500 }
1501 if (imm32 < 500)
1502 {
1503 X86EmitSubEspWorker(imm32);
1504 }
1505 else
1506 {
1507 // If the remainder is large, touch the last byte - again,
1508 // as a fudge factor.
1509 X86EmitSubEspWorker(imm32-4);
1510 X86EmitPushReg(kEAX);
1511 }
1512 }
1513}
1514
1515
1516//---------------------------------------------------------------
1517// Emits:
1518// sub esp, IMM
1519//---------------------------------------------------------------
1520VOID StubLinkerCPU::X86EmitSubEspWorker(INT32 imm32)
1521{
1522 CONTRACTL
1523 {
1524 STANDARD_VM_CHECK;
1525
1526 // On Win32, stacks must be faulted in one page at a time.
1527 PRECONDITION(imm32 < 0x1000);
1528 }
1529 CONTRACTL_END;
1530
1531 if (!imm32)
1532 {
1533 // nop
1534 }
1535 else
1536 {
1537 X86_64BitOperands();
1538
1539 if (FitsInI1(imm32))
1540 {
1541 Emit16(0xec83);
1542 Emit8((INT8)imm32);
1543 }
1544 else
1545 {
1546 Emit16(0xec81);
1547 Emit32(imm32);
1548 }
1549
1550 Push(imm32);
1551 }
1552}
1553
1554
1555//---------------------------------------------------------------
1556// Emits:
1557// add esp, IMM
1558//---------------------------------------------------------------
1559VOID StubLinkerCPU::X86EmitAddEsp(INT32 imm32)
1560{
1561 STANDARD_VM_CONTRACT;
1562
1563 if (!imm32)
1564 {
1565 // nop
1566 }
1567 else
1568 {
1569 X86_64BitOperands();
1570
1571 if (FitsInI1(imm32))
1572 {
1573 Emit16(0xc483);
1574 Emit8((INT8)imm32);
1575 }
1576 else
1577 {
1578 Emit16(0xc481);
1579 Emit32(imm32);
1580 }
1581 }
1582 Pop(imm32);
1583}
1584
1585VOID StubLinkerCPU::X86EmitAddReg(X86Reg reg, INT32 imm32)
1586{
1587 CONTRACTL
1588 {
1589 STANDARD_VM_CHECK;
1590 PRECONDITION((int) reg < NumX86Regs);
1591 }
1592 CONTRACTL_END;
1593
1594 if (imm32 == 0)
1595 return;
1596
1597#ifdef _TARGET_AMD64_
1598 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1599
1600 if (reg >= kR8)
1601 {
1602 rex |= REX_OPCODE_REG_EXT;
1603 reg = X86RegFromAMD64Reg(reg);
1604 }
1605 Emit8(rex);
1606#endif
1607
1608 if (FitsInI1(imm32)) {
1609 Emit8(0x83);
1610 Emit8(static_cast<UINT8>(0xC0 | reg));
1611 Emit8(static_cast<UINT8>(imm32));
1612 } else {
1613 Emit8(0x81);
1614 Emit8(static_cast<UINT8>(0xC0 | reg));
1615 Emit32(imm32);
1616 }
1617}
1618
1619//---------------------------------------------------------------
1620// Emits: add destReg, srcReg
1621//---------------------------------------------------------------
1622
1623VOID StubLinkerCPU::X86EmitAddRegReg(X86Reg destReg, X86Reg srcReg)
1624{
1625 STANDARD_VM_CONTRACT;
1626
1627 X86EmitR2ROp(0x01, srcReg, destReg);
1628}
1629
1630
1631
1632
1633VOID StubLinkerCPU::X86EmitSubReg(X86Reg reg, INT32 imm32)
1634{
1635 CONTRACTL
1636 {
1637 STANDARD_VM_CHECK;
1638 PRECONDITION((int) reg < NumX86Regs);
1639 }
1640 CONTRACTL_END;
1641
1642#ifdef _TARGET_AMD64_
1643 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1644
1645 if (reg >= kR8)
1646 {
1647 rex |= REX_OPCODE_REG_EXT;
1648 reg = X86RegFromAMD64Reg(reg);
1649 }
1650 Emit8(rex);
1651#endif
1652
1653 if (FitsInI1(imm32)) {
1654 Emit8(0x83);
1655 Emit8(static_cast<UINT8>(0xE8 | reg));
1656 Emit8(static_cast<UINT8>(imm32));
1657 } else {
1658 Emit8(0x81);
1659 Emit8(static_cast<UINT8>(0xE8 | reg));
1660 Emit32(imm32);
1661 }
1662}
1663
1664//---------------------------------------------------------------
1665// Emits: sub destReg, srcReg
1666//---------------------------------------------------------------
1667
1668VOID StubLinkerCPU::X86EmitSubRegReg(X86Reg destReg, X86Reg srcReg)
1669{
1670 STANDARD_VM_CONTRACT;
1671
1672 X86EmitR2ROp(0x29, srcReg, destReg);
1673}
1674
1675#if defined(_TARGET_AMD64_)
1676
1677//---------------------------------------------------------------
1678// movdqa destXmmreg, srcXmmReg
1679//---------------------------------------------------------------
1680VOID StubLinkerCPU::X64EmitMovXmmXmm(X86Reg destXmmreg, X86Reg srcXmmReg)
1681{
1682 STANDARD_VM_CONTRACT;
1683 // There are several that could be used to mov xmm registers. MovAps is
1684 // what C++ compiler uses so let's use it here too.
1685 X86EmitR2ROp(X86_INSTR_MOVAPS_R_RM, destXmmreg, srcXmmReg, k32BitOp);
1686}
1687
1688//---------------------------------------------------------------
1689// movdqa XmmN, [baseReg + offset]
1690//---------------------------------------------------------------
1691VOID StubLinkerCPU::X64EmitMovdqaFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1692{
1693 STANDARD_VM_CONTRACT;
1694 X64EmitMovXmmWorker(0x66, 0x6F, Xmmreg, baseReg, ofs);
1695}
1696
1697//---------------------------------------------------------------
1698// movdqa [baseReg + offset], XmmN
1699//---------------------------------------------------------------
1700VOID StubLinkerCPU::X64EmitMovdqaToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1701{
1702 STANDARD_VM_CONTRACT;
1703 X64EmitMovXmmWorker(0x66, 0x7F, Xmmreg, baseReg, ofs);
1704}
1705
1706//---------------------------------------------------------------
1707// movsd XmmN, [baseReg + offset]
1708//---------------------------------------------------------------
1709VOID StubLinkerCPU::X64EmitMovSDFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1710{
1711 STANDARD_VM_CONTRACT;
1712 X64EmitMovXmmWorker(0xF2, 0x10, Xmmreg, baseReg, ofs);
1713}
1714
1715//---------------------------------------------------------------
1716// movsd [baseReg + offset], XmmN
1717//---------------------------------------------------------------
1718VOID StubLinkerCPU::X64EmitMovSDToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1719{
1720 STANDARD_VM_CONTRACT;
1721 X64EmitMovXmmWorker(0xF2, 0x11, Xmmreg, baseReg, ofs);
1722}
1723
1724//---------------------------------------------------------------
1725// movss XmmN, [baseReg + offset]
1726//---------------------------------------------------------------
1727VOID StubLinkerCPU::X64EmitMovSSFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1728{
1729 STANDARD_VM_CONTRACT;
1730 X64EmitMovXmmWorker(0xF3, 0x10, Xmmreg, baseReg, ofs);
1731}
1732
1733//---------------------------------------------------------------
1734// movss [baseReg + offset], XmmN
1735//---------------------------------------------------------------
1736VOID StubLinkerCPU::X64EmitMovSSToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1737{
1738 STANDARD_VM_CONTRACT;
1739 X64EmitMovXmmWorker(0xF3, 0x11, Xmmreg, baseReg, ofs);
1740}
1741
1742//---------------------------------------------------------------
1743// Helper method for emitting of XMM from/to memory moves
1744//---------------------------------------------------------------
1745VOID StubLinkerCPU::X64EmitMovXmmWorker(BYTE prefix, BYTE opcode, X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1746{
1747 STANDARD_VM_CONTRACT;
1748
1749 BYTE codeBuffer[10];
1750 unsigned int nBytes = 0;
1751
1752 // Setup the legacyPrefix for movsd
1753 codeBuffer[nBytes++] = prefix;
1754
1755 // By default, assume we dont have to emit the REX byte.
1756 bool fEmitRex = false;
1757
1758 BYTE rex = REX_PREFIX_BASE;
1759
1760 if (baseReg >= kR8)
1761 {
1762 rex |= REX_MODRM_RM_EXT;
1763 baseReg = X86RegFromAMD64Reg(baseReg);
1764 fEmitRex = true;
1765 }
1766 if (Xmmreg >= kXMM8)
1767 {
1768 rex |= REX_MODRM_REG_EXT;
1769 Xmmreg = X86RegFromAMD64Reg(Xmmreg);
1770 fEmitRex = true;
1771 }
1772
1773 if (fEmitRex == true)
1774 {
1775 codeBuffer[nBytes++] = rex;
1776 }
1777
1778 // Next, specify the two byte opcode - first byte is always 0x0F.
1779 codeBuffer[nBytes++] = 0x0F;
1780 codeBuffer[nBytes++] = opcode;
1781
1782 BYTE modrm = static_cast<BYTE>((Xmmreg << 3) | baseReg);
1783 bool fOffsetFitsInSignedByte = FitsInI1(ofs)?true:false;
1784
1785 if (fOffsetFitsInSignedByte)
1786 codeBuffer[nBytes++] = 0x40|modrm;
1787 else
1788 codeBuffer[nBytes++] = 0x80|modrm;
1789
1790 // If we are dealing with RSP or R12 as the baseReg, we need to emit the SIB byte.
1791 if ((baseReg == (X86Reg)4 /*kRSP*/) || (baseReg == kR12))
1792 {
1793 codeBuffer[nBytes++] = 0x24;
1794 }
1795
1796 // Finally, specify the offset
1797 if (fOffsetFitsInSignedByte)
1798 {
1799 codeBuffer[nBytes++] = (BYTE)ofs;
1800 }
1801 else
1802 {
1803 *((__int32*)(codeBuffer+nBytes)) = ofs;
1804 nBytes += 4;
1805 }
1806
1807 _ASSERTE(nBytes <= _countof(codeBuffer));
1808
1809 // Lastly, emit the encoded bytes
1810 EmitBytes(codeBuffer, nBytes);
1811}
1812
1813#endif // defined(_TARGET_AMD64_)
1814
1815//---------------------------------------------------------------
1816// Emits a MOD/RM for accessing a dword at [<indexreg> + ofs32]
1817//---------------------------------------------------------------
1818VOID StubLinkerCPU::X86EmitOffsetModRM(BYTE opcode, X86Reg opcodereg, X86Reg indexreg, __int32 ofs)
1819{
1820 STANDARD_VM_CONTRACT;
1821
1822 BYTE codeBuffer[7];
1823 BYTE* code = codeBuffer;
1824 int nBytes = 0;
1825#ifdef _TARGET_AMD64_
1826 code++;
1827 //
1828 // code points to base X86 instruction,
1829 // codeBuffer points to full AMD64 instruction
1830 //
1831 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1832
1833 if (indexreg >= kR8)
1834 {
1835 rex |= REX_MODRM_RM_EXT;
1836 indexreg = X86RegFromAMD64Reg(indexreg);
1837 }
1838 if (opcodereg >= kR8)
1839 {
1840 rex |= REX_MODRM_REG_EXT;
1841 opcodereg = X86RegFromAMD64Reg(opcodereg);
1842 }
1843
1844 nBytes++;
1845 code[-1] = rex;
1846#endif
1847 code[0] = opcode;
1848 nBytes++;
1849 BYTE modrm = static_cast<BYTE>((opcodereg << 3) | indexreg);
1850 if (ofs == 0 && indexreg != kEBP)
1851 {
1852 code[1] = modrm;
1853 nBytes++;
1854 EmitBytes(codeBuffer, nBytes);
1855 }
1856 else if (FitsInI1(ofs))
1857 {
1858 code[1] = 0x40|modrm;
1859 code[2] = (BYTE)ofs;
1860 nBytes += 2;
1861 EmitBytes(codeBuffer, nBytes);
1862 }
1863 else
1864 {
1865 code[1] = 0x80|modrm;
1866 *((__int32*)(2+code)) = ofs;
1867 nBytes += 5;
1868 EmitBytes(codeBuffer, nBytes);
1869 }
1870}
1871
1872//---------------------------------------------------------------
1873// Emits a MOD/RM for accessing a dword at [<baseReg> + <indexReg>*<scale> + ofs32]
1874//---------------------------------------------------------------
1875VOID StubLinkerCPU::X86EmitOffsetModRmSIB(BYTE opcode, X86Reg opcodeOrReg, X86Reg baseReg, X86Reg indexReg, __int32 scale, __int32 ofs)
1876{
1877 CONTRACTL
1878 {
1879 STANDARD_VM_CHECK;
1880 PRECONDITION(scale == 1 || scale == 2 || scale == 4 || scale == 8);
1881 PRECONDITION(indexReg != kESP_Unsafe);
1882 }
1883 CONTRACTL_END;
1884
1885 BYTE codeBuffer[8];
1886 BYTE* code = codeBuffer;
1887 int nBytes = 0;
1888
1889#ifdef _TARGET_AMD64_
1890 _ASSERTE(!"NYI");
1891#endif
1892 code[0] = opcode;
1893 nBytes++;
1894
1895 BYTE scaleEnc = 0;
1896 switch(scale)
1897 {
1898 case 1: scaleEnc = 0; break;
1899 case 2: scaleEnc = 1; break;
1900 case 4: scaleEnc = 2; break;
1901 case 8: scaleEnc = 3; break;
1902 default: _ASSERTE(!"Unexpected");
1903 }
1904
1905 BYTE sib = static_cast<BYTE>((scaleEnc << 6) | (indexReg << 3) | baseReg);
1906
1907 if (FitsInI1(ofs))
1908 {
1909 code[1] = static_cast<BYTE>(0x44 | (opcodeOrReg << 3));
1910 code[2] = sib;
1911 code[3] = (BYTE)ofs;
1912 nBytes += 3;
1913 EmitBytes(codeBuffer, nBytes);
1914 }
1915 else
1916 {
1917 code[1] = static_cast<BYTE>(0x84 | (opcodeOrReg << 3));
1918 code[2] = sib;
1919 *(__int32*)(&code[3]) = ofs;
1920 nBytes += 6;
1921 EmitBytes(codeBuffer, nBytes);
1922 }
1923}
1924
1925
1926
1927VOID StubLinkerCPU::X86EmitRegLoad(X86Reg reg, UINT_PTR imm)
1928{
1929 STANDARD_VM_CONTRACT;
1930
1931 if (!imm)
1932 {
1933 X86EmitZeroOutReg(reg);
1934 return;
1935 }
1936
1937 UINT cbimm = sizeof(void*);
1938
1939#ifdef _TARGET_AMD64_
1940 // amd64 zero-extends all 32-bit operations. If the immediate will fit in
1941 // 32 bits, use the smaller encoding.
1942
1943 if (reg >= kR8 || !FitsInU4(imm))
1944 {
1945 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1946 if (reg >= kR8)
1947 {
1948 rex |= REX_MODRM_RM_EXT;
1949 reg = X86RegFromAMD64Reg(reg);
1950 }
1951 Emit8(rex);
1952 }
1953 else
1954 {
1955 // amd64 is little endian, so the &imm below will correctly read off
1956 // the low 4 bytes.
1957 cbimm = sizeof(UINT32);
1958 }
1959#endif // _TARGET_AMD64_
1960 Emit8(0xB8 | (BYTE)reg);
1961 EmitBytes((BYTE*)&imm, cbimm);
1962}
1963
1964
1965//---------------------------------------------------------------
1966// Emits the most efficient form of the operation:
1967//
1968// opcode altreg, [basereg + scaledreg*scale + ofs]
1969//
1970// or
1971//
1972// opcode [basereg + scaledreg*scale + ofs], altreg
1973//
1974// (the opcode determines which comes first.)
1975//
1976//
1977// Limitations:
1978//
1979// scale must be 0,1,2,4 or 8.
1980// if scale == 0, scaledreg is ignored.
1981// basereg and altreg may be equal to 4 (ESP) but scaledreg cannot
1982// for some opcodes, "altreg" may actually select an operation
1983// rather than a second register argument.
1984// if basereg is EBP, scale must be 0.
1985//
1986//---------------------------------------------------------------
1987VOID StubLinkerCPU::X86EmitOp(WORD opcode,
1988 X86Reg altreg,
1989 X86Reg basereg,
1990 __int32 ofs /*=0*/,
1991 X86Reg scaledreg /*=0*/,
1992 BYTE scale /*=0*/
1993 AMD64_ARG(X86OperandSize OperandSize /*= k32BitOp*/))
1994{
1995 CONTRACTL
1996 {
1997 STANDARD_VM_CHECK;
1998
1999 // All 2-byte opcodes start with 0x0f.
2000 PRECONDITION(!(opcode >> 8) || (opcode & 0xff) == 0x0f);
2001
2002 PRECONDITION(scale == 0 || scale == 1 || scale == 2 || scale == 4 || scale == 8);
2003 PRECONDITION(scaledreg != (X86Reg)4);
2004 PRECONDITION(!(basereg == kEBP && scale != 0));
2005
2006 PRECONDITION( ((UINT)basereg) < NumX86Regs );
2007 PRECONDITION( ((UINT)scaledreg) < NumX86Regs );
2008 PRECONDITION( ((UINT)altreg) < NumX86Regs );
2009 }
2010 CONTRACTL_END;
2011
2012#ifdef _TARGET_AMD64_
2013 if ( k64BitOp == OperandSize
2014 || altreg >= kR8
2015 || basereg >= kR8
2016 || scaledreg >= kR8)
2017 {
2018 BYTE rex = REX_PREFIX_BASE;
2019
2020 if (k64BitOp == OperandSize)
2021 rex |= REX_OPERAND_SIZE_64BIT;
2022
2023 if (altreg >= kR8)
2024 {
2025 rex |= REX_MODRM_REG_EXT;
2026 altreg = X86RegFromAMD64Reg(altreg);
2027 }
2028
2029 if (basereg >= kR8)
2030 {
2031 // basereg might be in the modrm or sib fields. This will be
2032 // decided below, but the encodings are the same either way.
2033 _ASSERTE(REX_SIB_BASE_EXT == REX_MODRM_RM_EXT);
2034 rex |= REX_SIB_BASE_EXT;
2035 basereg = X86RegFromAMD64Reg(basereg);
2036 }
2037
2038 if (scaledreg >= kR8)
2039 {
2040 rex |= REX_SIB_INDEX_EXT;
2041 scaledreg = X86RegFromAMD64Reg(scaledreg);
2042 }
2043
2044 Emit8(rex);
2045 }
2046#endif // _TARGET_AMD64_
2047
2048 BYTE modrmbyte = static_cast<BYTE>(altreg << 3);
2049 BOOL fNeedSIB = FALSE;
2050 BYTE SIBbyte = 0;
2051 BYTE ofssize;
2052 BYTE scaleselect= 0;
2053
2054 if (ofs == 0 && basereg != kEBP)
2055 {
2056 ofssize = 0; // Don't change this constant!
2057 }
2058 else if (FitsInI1(ofs))
2059 {
2060 ofssize = 1; // Don't change this constant!
2061 }
2062 else
2063 {
2064 ofssize = 2; // Don't change this constant!
2065 }
2066
2067 switch (scale)
2068 {
2069 case 1: scaleselect = 0; break;
2070 case 2: scaleselect = 1; break;
2071 case 4: scaleselect = 2; break;
2072 case 8: scaleselect = 3; break;
2073 }
2074
2075 if (scale == 0 && basereg != (X86Reg)4 /*ESP*/)
2076 {
2077 // [basereg + ofs]
2078 modrmbyte |= basereg | (ofssize << 6);
2079 }
2080 else if (scale == 0)
2081 {
2082 // [esp + ofs]
2083 _ASSERTE(basereg == (X86Reg)4);
2084 fNeedSIB = TRUE;
2085 SIBbyte = 0044;
2086
2087 modrmbyte |= 4 | (ofssize << 6);
2088 }
2089 else
2090 {
2091
2092 //[basereg + scaledreg*scale + ofs]
2093
2094 modrmbyte |= 0004 | (ofssize << 6);
2095 fNeedSIB = TRUE;
2096 SIBbyte = static_cast<BYTE>((scaleselect << 6) | (scaledreg << 3) | basereg);
2097
2098 }
2099
2100 //Some sanity checks:
2101 _ASSERTE(!(fNeedSIB && basereg == kEBP)); // EBP not valid as a SIB base register.
2102 _ASSERTE(!( (!fNeedSIB) && basereg == (X86Reg)4 )) ; // ESP addressing requires SIB byte
2103
2104 Emit8((BYTE)opcode);
2105
2106 if (opcode >> 8)
2107 Emit8(opcode >> 8);
2108
2109 Emit8(modrmbyte);
2110 if (fNeedSIB)
2111 {
2112 Emit8(SIBbyte);
2113 }
2114 switch (ofssize)
2115 {
2116 case 0: break;
2117 case 1: Emit8( (__int8)ofs ); break;
2118 case 2: Emit32( ofs ); break;
2119 default: _ASSERTE(!"Can't get here.");
2120 }
2121}
2122
2123
2124// Emits
2125//
2126// opcode altreg, modrmreg
2127//
2128// or
2129//
2130// opcode modrmreg, altreg
2131//
2132// (the opcode determines which one comes first)
2133//
2134// For single-operand opcodes, "altreg" actually selects
2135// an operation rather than a register.
2136
2137VOID StubLinkerCPU::X86EmitR2ROp (WORD opcode,
2138 X86Reg altreg,
2139 X86Reg modrmreg
2140 AMD64_ARG(X86OperandSize OperandSize /*= k64BitOp*/)
2141 )
2142{
2143 CONTRACTL
2144 {
2145 STANDARD_VM_CHECK;
2146
2147 // All 2-byte opcodes start with 0x0f.
2148 PRECONDITION(!(opcode >> 8) || (opcode & 0xff) == 0x0f);
2149
2150 PRECONDITION( ((UINT)altreg) < NumX86Regs );
2151 PRECONDITION( ((UINT)modrmreg) < NumX86Regs );
2152 }
2153 CONTRACTL_END;
2154
2155#ifdef _TARGET_AMD64_
2156 BYTE rex = 0;
2157
2158 if (modrmreg >= kR8)
2159 {
2160 rex |= REX_MODRM_RM_EXT;
2161 modrmreg = X86RegFromAMD64Reg(modrmreg);
2162 }
2163
2164 if (altreg >= kR8)
2165 {
2166 rex |= REX_MODRM_REG_EXT;
2167 altreg = X86RegFromAMD64Reg(altreg);
2168 }
2169
2170 if (k64BitOp == OperandSize)
2171 rex |= REX_OPERAND_SIZE_64BIT;
2172
2173 if (rex)
2174 Emit8(REX_PREFIX_BASE | rex);
2175#endif // _TARGET_AMD64_
2176
2177 Emit8((BYTE)opcode);
2178
2179 if (opcode >> 8)
2180 Emit8(opcode >> 8);
2181
2182 Emit8(static_cast<UINT8>(0300 | (altreg << 3) | modrmreg));
2183}
2184
2185
2186//---------------------------------------------------------------
2187// Emits:
2188// op altreg, [esp+ofs]
2189//---------------------------------------------------------------
2190VOID StubLinkerCPU::X86EmitEspOffset(BYTE opcode,
2191 X86Reg altreg,
2192 __int32 ofs
2193 AMD64_ARG(X86OperandSize OperandSize /*= k64BitOp*/)
2194 )
2195{
2196 STANDARD_VM_CONTRACT;
2197
2198 BYTE codeBuffer[8];
2199 BYTE *code = codeBuffer;
2200 int nBytes;
2201
2202#ifdef _TARGET_AMD64_
2203 BYTE rex = 0;
2204
2205 if (k64BitOp == OperandSize)
2206 rex |= REX_OPERAND_SIZE_64BIT;
2207
2208 if (altreg >= kR8)
2209 {
2210 rex |= REX_MODRM_REG_EXT;
2211 altreg = X86RegFromAMD64Reg(altreg);
2212 }
2213
2214 if (rex)
2215 {
2216 *code = (REX_PREFIX_BASE | rex);
2217 code++;
2218 nBytes = 1;
2219 }
2220 else
2221#endif // _TARGET_AMD64_
2222 {
2223 nBytes = 0;
2224 }
2225
2226 code[0] = opcode;
2227 BYTE modrm = static_cast<BYTE>((altreg << 3) | 004);
2228 if (ofs == 0)
2229 {
2230 code[1] = modrm;
2231 code[2] = 0044;
2232 EmitBytes(codeBuffer, 3 + nBytes);
2233 }
2234 else if (FitsInI1(ofs))
2235 {
2236 code[1] = 0x40|modrm;
2237 code[2] = 0044;
2238 code[3] = (BYTE)ofs;
2239 EmitBytes(codeBuffer, 4 + nBytes);
2240 }
2241 else
2242 {
2243 code[1] = 0x80|modrm;
2244 code[2] = 0044;
2245 *((__int32*)(3+code)) = ofs;
2246 EmitBytes(codeBuffer, 7 + nBytes);
2247 }
2248
2249}
2250
2251//---------------------------------------------------------------
2252
2253VOID StubLinkerCPU::X86EmitPushEBPframe()
2254{
2255 STANDARD_VM_CONTRACT;
2256
2257 // push ebp
2258 X86EmitPushReg(kEBP);
2259 // mov ebp,esp
2260 X86EmitMovRegSP(kEBP);
2261}
2262
2263#ifdef _DEBUG
2264//---------------------------------------------------------------
2265// Emits:
2266// mov <reg32>,0xcccccccc
2267//---------------------------------------------------------------
2268VOID StubLinkerCPU::X86EmitDebugTrashReg(X86Reg reg)
2269{
2270 STANDARD_VM_CONTRACT;
2271
2272#ifdef _TARGET_AMD64_
2273 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
2274
2275 if (reg >= kR8)
2276 {
2277 rex |= REX_OPCODE_REG_EXT;
2278 reg = X86RegFromAMD64Reg(reg);
2279 }
2280 Emit8(rex);
2281 Emit8(0xb8|reg);
2282 Emit64(0xcccccccccccccccc);
2283#else
2284 Emit8(static_cast<UINT8>(0xb8 | reg));
2285 Emit32(0xcccccccc);
2286#endif
2287}
2288#endif //_DEBUG
2289
2290
2291// Get X86Reg indexes of argument registers based on offset into ArgumentRegister
2292X86Reg GetX86ArgumentRegisterFromOffset(size_t ofs)
2293{
2294 CONTRACT(X86Reg)
2295 {
2296 NOTHROW;
2297 GC_NOTRIGGER;
2298
2299 }
2300 CONTRACT_END;
2301
2302 #define ARGUMENT_REGISTER(reg) if (ofs == offsetof(ArgumentRegisters, reg)) RETURN k##reg ;
2303 ENUM_ARGUMENT_REGISTERS();
2304 #undef ARGUMENT_REGISTER
2305
2306 _ASSERTE(0);//Can't get here.
2307 RETURN kEBP;
2308}
2309
2310
2311#ifdef _TARGET_AMD64_
2312static const X86Reg c_argRegs[] = {
2313 #define ARGUMENT_REGISTER(regname) k##regname,
2314 ENUM_ARGUMENT_REGISTERS()
2315 #undef ARGUMENT_REGISTER
2316};
2317#endif
2318
2319
2320#ifndef CROSSGEN_COMPILE
2321
2322#if defined(_DEBUG) && !defined(FEATURE_PAL)
2323void StubLinkerCPU::EmitJITHelperLoggingThunk(PCODE pJitHelper, LPVOID helperFuncCount)
2324{
2325 STANDARD_VM_CONTRACT;
2326
2327 VMHELPCOUNTDEF* pHelperFuncCount = (VMHELPCOUNTDEF*)helperFuncCount;
2328/*
2329 push rcx
2330 mov rcx, &(pHelperFuncCount->count)
2331 lock inc [rcx]
2332 pop rcx
2333#ifdef _TARGET_AMD64_
2334 mov rax, <pJitHelper>
2335 jmp rax
2336#else
2337 jmp <pJitHelper>
2338#endif
2339*/
2340
2341 // push rcx
2342 // mov rcx, &(pHelperFuncCount->count)
2343 X86EmitPushReg(kECX);
2344 X86EmitRegLoad(kECX, (UINT_PTR)(&(pHelperFuncCount->count)));
2345
2346 // lock inc [rcx]
2347 BYTE lock_inc_RCX[] = { 0xf0, 0xff, 0x01 };
2348 EmitBytes(lock_inc_RCX, sizeof(lock_inc_RCX));
2349
2350#if defined(_TARGET_AMD64_)
2351 // mov rax, <pJitHelper>
2352 // pop rcx
2353 // jmp rax
2354#else
2355 // pop rcx
2356 // jmp <pJitHelper>
2357#endif
2358 X86EmitTailcallWithSinglePop(NewExternalCodeLabel(pJitHelper), kECX);
2359}
2360#endif // _DEBUG && !FEATURE_PAL
2361
2362VOID StubLinkerCPU::X86EmitCurrentThreadFetch(X86Reg dstreg, unsigned preservedRegSet)
2363{
2364 CONTRACTL
2365 {
2366 STANDARD_VM_CHECK;
2367
2368 // It doesn't make sense to have the destination register be preserved
2369 PRECONDITION((preservedRegSet & (1 << dstreg)) == 0);
2370 AMD64_ONLY(PRECONDITION(dstreg < 8)); // code below doesn't support high registers
2371 }
2372 CONTRACTL_END;
2373
2374#ifdef FEATURE_PAL
2375
2376 X86EmitPushRegs(preservedRegSet & ((1 << kEAX) | (1 << kEDX) | (1 << kECX)));
2377
2378 // call GetThread
2379 X86EmitCall(NewExternalCodeLabel((LPVOID)GetThread), sizeof(void*));
2380
2381 // mov dstreg, eax
2382 X86EmitMovRegReg(dstreg, kEAX);
2383
2384 X86EmitPopRegs(preservedRegSet & ((1 << kEAX) | (1 << kEDX) | (1 << kECX)));
2385
2386#ifdef _DEBUG
2387 // Trash caller saved regs that we were not told to preserve, and that aren't the dstreg.
2388 preservedRegSet |= 1 << dstreg;
2389 if (!(preservedRegSet & (1 << kEAX)))
2390 X86EmitDebugTrashReg(kEAX);
2391 if (!(preservedRegSet & (1 << kEDX)))
2392 X86EmitDebugTrashReg(kEDX);
2393 if (!(preservedRegSet & (1 << kECX)))
2394 X86EmitDebugTrashReg(kECX);
2395#endif // _DEBUG
2396
2397#else // FEATURE_PAL
2398
2399#ifdef _TARGET_AMD64_
2400 BYTE code[] = { 0x65,0x48,0x8b,0x04,0x25 }; // mov dstreg, qword ptr gs:[IMM32]
2401 static const int regByteIndex = 3;
2402#elif defined(_TARGET_X86_)
2403 BYTE code[] = { 0x64,0x8b,0x05 }; // mov dstreg, dword ptr fs:[IMM32]
2404 static const int regByteIndex = 2;
2405#endif
2406 code[regByteIndex] |= (dstreg << 3);
2407
2408 EmitBytes(code, sizeof(code));
2409 Emit32(offsetof(TEB, ThreadLocalStoragePointer));
2410
2411 X86EmitIndexRegLoad(dstreg, dstreg, sizeof(void *) * (g_TlsIndex & 0xFFFF));
2412
2413 X86EmitIndexRegLoad(dstreg, dstreg, (g_TlsIndex & 0x7FFF0000) >> 16);
2414
2415#endif // FEATURE_PAL
2416}
2417
2418#if defined(_TARGET_X86_)
2419
2420#if defined(PROFILING_SUPPORTED) && !defined(FEATURE_STUBS_AS_IL)
2421VOID StubLinkerCPU::EmitProfilerComCallProlog(TADDR pFrameVptr, X86Reg regFrame)
2422{
2423 STANDARD_VM_CONTRACT;
2424
2425 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2426 {
2427 // Load the methoddesc into ECX (UMThkCallFrame->m_pvDatum->m_pMD)
2428 X86EmitIndexRegLoad(kECX, regFrame, UMThkCallFrame::GetOffsetOfDatum());
2429 X86EmitIndexRegLoad(kECX, kECX, UMEntryThunk::GetOffsetOfMethodDesc());
2430
2431 // Push arguments and notify profiler
2432 X86EmitPushImm32(COR_PRF_TRANSITION_CALL); // Reason
2433 X86EmitPushReg(kECX); // MethodDesc*
2434 X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerUnmanagedToManagedTransitionMD), 2*sizeof(void*));
2435 }
2436
2437#ifdef FEATURE_COMINTEROP
2438 else if (pFrameVptr == ComMethodFrame::GetMethodFrameVPtr())
2439 {
2440 // Load the methoddesc into ECX (Frame->m_pvDatum->m_pMD)
2441 X86EmitIndexRegLoad(kECX, regFrame, ComMethodFrame::GetOffsetOfDatum());
2442 X86EmitIndexRegLoad(kECX, kECX, ComCallMethodDesc::GetOffsetOfMethodDesc());
2443
2444 // Push arguments and notify profiler
2445 X86EmitPushImm32(COR_PRF_TRANSITION_CALL); // Reason
2446 X86EmitPushReg(kECX); // MethodDesc*
2447 X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerUnmanagedToManagedTransitionMD), 2*sizeof(void*));
2448 }
2449#endif // FEATURE_COMINTEROP
2450
2451 // Unrecognized frame vtbl
2452 else
2453 {
2454 _ASSERTE(!"Unrecognized vtble passed to EmitComMethodStubProlog with profiling turned on.");
2455 }
2456}
2457
2458
2459VOID StubLinkerCPU::EmitProfilerComCallEpilog(TADDR pFrameVptr, X86Reg regFrame)
2460{
2461 CONTRACTL
2462 {
2463 STANDARD_VM_CHECK;
2464#ifdef FEATURE_COMINTEROP
2465 PRECONDITION(pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr() || pFrameVptr == ComMethodFrame::GetMethodFrameVPtr());
2466#else
2467 PRECONDITION(pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr());
2468#endif // FEATURE_COMINTEROP
2469 }
2470 CONTRACTL_END;
2471
2472 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2473 {
2474 // Load the methoddesc into ECX (UMThkCallFrame->m_pvDatum->m_pMD)
2475 X86EmitIndexRegLoad(kECX, regFrame, UMThkCallFrame::GetOffsetOfDatum());
2476 X86EmitIndexRegLoad(kECX, kECX, UMEntryThunk::GetOffsetOfMethodDesc());
2477
2478 // Push arguments and notify profiler
2479 X86EmitPushImm32(COR_PRF_TRANSITION_RETURN); // Reason
2480 X86EmitPushReg(kECX); // MethodDesc*
2481 X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerManagedToUnmanagedTransitionMD), 2*sizeof(void*));
2482 }
2483
2484#ifdef FEATURE_COMINTEROP
2485 else if (pFrameVptr == ComMethodFrame::GetMethodFrameVPtr())
2486 {
2487 // Load the methoddesc into ECX (Frame->m_pvDatum->m_pMD)
2488 X86EmitIndexRegLoad(kECX, regFrame, ComMethodFrame::GetOffsetOfDatum());
2489 X86EmitIndexRegLoad(kECX, kECX, ComCallMethodDesc::GetOffsetOfMethodDesc());
2490
2491 // Push arguments and notify profiler
2492 X86EmitPushImm32(COR_PRF_TRANSITION_RETURN); // Reason
2493 X86EmitPushReg(kECX); // MethodDesc*
2494 X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerManagedToUnmanagedTransitionMD), 2*sizeof(void*));
2495 }
2496#endif // FEATURE_COMINTEROP
2497
2498 // Unrecognized frame vtbl
2499 else
2500 {
2501 _ASSERTE(!"Unrecognized vtble passed to EmitComMethodStubEpilog with profiling turned on.");
2502 }
2503}
2504#endif // PROFILING_SUPPORTED && !FEATURE_STUBS_AS_IL
2505
2506
2507#ifndef FEATURE_STUBS_AS_IL
2508//========================================================================
2509// Prolog for entering managed code from COM
2510// pushes the appropriate frame ptr
2511// sets up a thread and returns a label that needs to be emitted by the caller
2512// At the end:
2513// ESI will hold the pointer to the ComMethodFrame or UMThkCallFrame
2514// EBX will hold the result of GetThread()
2515// EDI will hold the previous Frame ptr
2516
2517void StubLinkerCPU::EmitComMethodStubProlog(TADDR pFrameVptr,
2518 CodeLabel** rgRareLabels,
2519 CodeLabel** rgRejoinLabels,
2520 BOOL bShouldProfile)
2521{
2522 CONTRACTL
2523 {
2524 STANDARD_VM_CHECK;
2525
2526 PRECONDITION(rgRareLabels != NULL);
2527 PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
2528 PRECONDITION(rgRejoinLabels != NULL);
2529 PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
2530 }
2531 CONTRACTL_END;
2532
2533 // push ebp ;; save callee-saved register
2534 // push ebx ;; save callee-saved register
2535 // push esi ;; save callee-saved register
2536 // push edi ;; save callee-saved register
2537 X86EmitPushEBPframe();
2538
2539 X86EmitPushReg(kEBX);
2540 X86EmitPushReg(kESI);
2541 X86EmitPushReg(kEDI);
2542
2543 // push eax ; datum
2544 X86EmitPushReg(kEAX);
2545
2546 // push edx ;leave room for m_next (edx is an arbitrary choice)
2547 X86EmitPushReg(kEDX);
2548
2549 // push IMM32 ; push Frame vptr
2550 X86EmitPushImmPtr((LPVOID) pFrameVptr);
2551
2552 X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
2553
2554 // lea esi, [esp+4] ;; set ESI -> new frame
2555 X86EmitEspOffset(0x8d, kESI, 4); // lea ESI, [ESP+4]
2556
2557 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2558 {
2559 // Preserve argument registers for thiscall/fastcall
2560 X86EmitPushReg(kECX);
2561 X86EmitPushReg(kEDX);
2562 }
2563
2564 // Emit Setup thread
2565 EmitSetup(rgRareLabels[0]); // rareLabel for rare setup
2566 EmitLabel(rgRejoinLabels[0]); // rejoin label for rare setup
2567
2568#ifdef PROFILING_SUPPORTED
2569 // If profiling is active, emit code to notify profiler of transition
2570 // Must do this before preemptive GC is disabled, so no problem if the
2571 // profiler blocks.
2572 if (CORProfilerTrackTransitions() && bShouldProfile)
2573 {
2574 EmitProfilerComCallProlog(pFrameVptr, /*Frame*/ kESI);
2575 }
2576#endif // PROFILING_SUPPORTED
2577
2578 //-----------------------------------------------------------------------
2579 // Generate the inline part of disabling preemptive GC. It is critical
2580 // that this part happen before we link in the frame. That's because
2581 // we won't be able to unlink the frame from preemptive mode. And during
2582 // shutdown, we cannot switch to cooperative mode under some circumstances
2583 //-----------------------------------------------------------------------
2584 EmitDisable(rgRareLabels[1], /*fCallIn=*/TRUE, kEBX); // rare disable gc
2585 EmitLabel(rgRejoinLabels[1]); // rejoin for rare disable gc
2586
2587 // If we take an SO after installing the new frame but before getting the exception
2588 // handlers in place, we will have a corrupt frame stack. So probe-by-touch first for
2589 // sufficient stack space to erect the handler. Because we know we will be touching
2590 // that stack right away when install the handler, this probe-by-touch will not incur
2591 // unnecessary cache misses. And this allows us to do the probe with one instruction.
2592
2593 // Note that for Win64, the personality routine will handle unlinking the frame, so
2594 // we don't need to probe in the Win64 stubs. The exception is ComToCLRWorker
2595 // where we don't setup a personality routine. However, we push the frame inside
2596 // that function and it is probe-protected with an entry point probe first, so we are
2597 // OK there too.
2598
2599 // We push two registers to setup the EH handler and none to setup the frame
2600 // so probe for double that to give ourselves a small margin for error.
2601 // mov eax, [esp+n] ;; probe for sufficient stack to setup EH
2602 X86EmitEspOffset(0x8B, kEAX, -0x20);
2603 // mov edi,[ebx + Thread.GetFrame()] ;; get previous frame
2604 X86EmitIndexRegLoad(kEDI, kEBX, Thread::GetOffsetOfCurrentFrame());
2605
2606 // mov [esi + Frame.m_next], edi
2607 X86EmitIndexRegStore(kESI, Frame::GetOffsetOfNextLink(), kEDI);
2608
2609 // mov [ebx + Thread.GetFrame()], esi
2610 X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kESI);
2611
2612 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2613 {
2614 // push UnmanagedToManagedExceptHandler
2615 X86EmitPushImmPtr((LPVOID)UMThunkPrestubHandler);
2616
2617 // mov eax, fs:[0]
2618 static const BYTE codeSEH1[] = { 0x64, 0xA1, 0x0, 0x0, 0x0, 0x0};
2619 EmitBytes(codeSEH1, sizeof(codeSEH1));
2620
2621 // push eax
2622 X86EmitPushReg(kEAX);
2623
2624 // mov dword ptr fs:[0], esp
2625 static const BYTE codeSEH2[] = { 0x64, 0x89, 0x25, 0x0, 0x0, 0x0, 0x0};
2626 EmitBytes(codeSEH2, sizeof(codeSEH2));
2627 }
2628
2629#if _DEBUG
2630 if (Frame::ShouldLogTransitions())
2631 {
2632 // call LogTransition
2633 X86EmitPushReg(kESI);
2634 X86EmitCall(NewExternalCodeLabel((LPVOID) Frame::LogTransition), sizeof(void*));
2635 }
2636#endif
2637}
2638
2639//========================================================================
2640// Epilog for stubs that enter managed code from COM
2641//
2642// At this point of the stub, the state should be as follows:
2643// ESI holds the ComMethodFrame or UMThkCallFrame ptr
2644// EBX holds the result of GetThread()
2645// EDI holds the previous Frame ptr
2646//
2647void StubLinkerCPU::EmitComMethodStubEpilog(TADDR pFrameVptr,
2648 CodeLabel** rgRareLabels,
2649 CodeLabel** rgRejoinLabels,
2650 BOOL bShouldProfile)
2651{
2652 CONTRACTL
2653 {
2654 STANDARD_VM_CHECK;
2655
2656 PRECONDITION(rgRareLabels != NULL);
2657 PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
2658 PRECONDITION(rgRejoinLabels != NULL);
2659 PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
2660 }
2661 CONTRACTL_END;
2662
2663 EmitCheckGSCookie(kESI, UnmanagedToManagedFrame::GetOffsetOfGSCookie());
2664
2665 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2666 {
2667 // if we are using exceptions, unlink the SEH
2668 // mov ecx,[esp] ;;pointer to the next exception record
2669 X86EmitEspOffset(0x8b, kECX, 0);
2670
2671 // mov dword ptr fs:[0], ecx
2672 static const BYTE codeSEH[] = { 0x64, 0x89, 0x0D, 0x0, 0x0, 0x0, 0x0 };
2673 EmitBytes(codeSEH, sizeof(codeSEH));
2674
2675 X86EmitAddEsp(sizeof(EXCEPTION_REGISTRATION_RECORD));
2676 }
2677
2678 // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
2679 X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
2680
2681 //-----------------------------------------------------------------------
2682 // Generate the inline part of disabling preemptive GC
2683 //-----------------------------------------------------------------------
2684 EmitEnable(rgRareLabels[2]); // rare gc
2685 EmitLabel(rgRejoinLabels[2]); // rejoin for rare gc
2686
2687 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2688 {
2689 // Restore argument registers for thiscall/fastcall
2690 X86EmitPopReg(kEDX);
2691 X86EmitPopReg(kECX);
2692 }
2693
2694 // add esp, popstack
2695 X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfCalleeSavedRegisters());
2696
2697 // pop edi ; restore callee-saved registers
2698 // pop esi
2699 // pop ebx
2700 // pop ebp
2701 X86EmitPopReg(kEDI);
2702 X86EmitPopReg(kESI);
2703 X86EmitPopReg(kEBX);
2704 X86EmitPopReg(kEBP);
2705
2706 // jmp eax //reexecute!
2707 X86EmitR2ROp(0xff, (X86Reg)4, kEAX);
2708
2709 // ret
2710 // This will never be executed. It is just to help out stack-walking logic
2711 // which disassembles the epilog to unwind the stack. A "ret" instruction
2712 // indicates that no more code needs to be disassembled, if the stack-walker
2713 // keeps on going past the previous "jmp eax".
2714 X86EmitReturn(0);
2715
2716 //-----------------------------------------------------------------------
2717 // The out-of-line portion of enabling preemptive GC - rarely executed
2718 //-----------------------------------------------------------------------
2719 EmitLabel(rgRareLabels[2]); // label for rare enable gc
2720 EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc
2721
2722 //-----------------------------------------------------------------------
2723 // The out-of-line portion of disabling preemptive GC - rarely executed
2724 //-----------------------------------------------------------------------
2725 EmitLabel(rgRareLabels[1]); // label for rare disable gc
2726 EmitRareDisable(rgRejoinLabels[1]); // emit rare disable gc
2727
2728 //-----------------------------------------------------------------------
2729 // The out-of-line portion of setup thread - rarely executed
2730 //-----------------------------------------------------------------------
2731 EmitLabel(rgRareLabels[0]); // label for rare setup thread
2732 EmitRareSetup(rgRejoinLabels[0], /*fThrow*/ TRUE); // emit rare setup thread
2733}
2734#endif // !FEATURE_STUBS_AS_IL
2735
2736//---------------------------------------------------------------
2737// Emit code to store the setup current Thread structure in eax.
2738// TRASHES eax,ecx&edx.
2739// RESULTS ebx = current Thread
2740//---------------------------------------------------------------
2741VOID StubLinkerCPU::EmitSetup(CodeLabel *pForwardRef)
2742{
2743 STANDARD_VM_CONTRACT;
2744
2745 X86EmitCurrentThreadFetch(kEBX, 0);
2746
2747 // cmp ebx, 0
2748 static const BYTE b[] = { 0x83, 0xFB, 0x0};
2749
2750 EmitBytes(b, sizeof(b));
2751
2752 // jz RarePath
2753 X86EmitCondJump(pForwardRef, X86CondCode::kJZ);
2754
2755#ifdef _DEBUG
2756 X86EmitDebugTrashReg(kECX);
2757 X86EmitDebugTrashReg(kEDX);
2758#endif
2759}
2760
2761VOID StubLinkerCPU::EmitRareSetup(CodeLabel *pRejoinPoint, BOOL fThrow)
2762{
2763 STANDARD_VM_CONTRACT;
2764
2765#ifndef FEATURE_COMINTEROP
2766 _ASSERTE(fThrow);
2767#else // !FEATURE_COMINTEROP
2768 if (!fThrow)
2769 {
2770 X86EmitPushReg(kESI);
2771 X86EmitCall(NewExternalCodeLabel((LPVOID) CreateThreadBlockReturnHr), sizeof(void*));
2772 }
2773 else
2774#endif // !FEATURE_COMINTEROP
2775 {
2776 X86EmitCall(NewExternalCodeLabel((LPVOID) CreateThreadBlockThrow), 0);
2777 }
2778
2779 // mov ebx,eax
2780 Emit16(0xc389);
2781 X86EmitNearJump(pRejoinPoint);
2782}
2783
2784//========================================================================
2785#endif // _TARGET_X86_
2786//========================================================================
2787#if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
2788//========================================================================
2789// Epilog for stubs that enter managed code from COM
2790//
2791// On entry, ESI points to the Frame
2792// ESP points to below FramedMethodFrame::m_vc5Frame
2793// EBX hold GetThread()
2794// EDI holds the previous Frame
2795
2796void StubLinkerCPU::EmitSharedComMethodStubEpilog(TADDR pFrameVptr,
2797 CodeLabel** rgRareLabels,
2798 CodeLabel** rgRejoinLabels,
2799 unsigned offsetRetThunk,
2800 BOOL bShouldProfile)
2801{
2802 CONTRACTL
2803 {
2804 STANDARD_VM_CHECK;
2805
2806 PRECONDITION(rgRareLabels != NULL);
2807 PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
2808 PRECONDITION(rgRejoinLabels != NULL);
2809 PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
2810 }
2811 CONTRACTL_END;
2812
2813 CodeLabel *NoEntryLabel;
2814 NoEntryLabel = NewCodeLabel();
2815
2816 EmitCheckGSCookie(kESI, UnmanagedToManagedFrame::GetOffsetOfGSCookie());
2817
2818 // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
2819 X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
2820
2821 //-----------------------------------------------------------------------
2822 // Generate the inline part of enabling preemptive GC
2823 //-----------------------------------------------------------------------
2824 EmitLabel(NoEntryLabel); // need to enable preemp mode even when we fail the disable as rare disable will return in coop mode
2825
2826 EmitEnable(rgRareLabels[2]); // rare enable gc
2827 EmitLabel(rgRejoinLabels[2]); // rejoin for rare enable gc
2828
2829#ifdef PROFILING_SUPPORTED
2830 // If profiling is active, emit code to notify profiler of transition
2831 if (CORProfilerTrackTransitions() && bShouldProfile)
2832 {
2833 // Save return value
2834 X86EmitPushReg(kEAX);
2835 X86EmitPushReg(kEDX);
2836
2837 EmitProfilerComCallEpilog(pFrameVptr, kESI);
2838
2839 // Restore return value
2840 X86EmitPopReg(kEDX);
2841 X86EmitPopReg(kEAX);
2842 }
2843#endif // PROFILING_SUPPORTED
2844
2845 X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfDatum());
2846
2847 // pop ecx
2848 X86EmitPopReg(kECX); // pop the MethodDesc*
2849
2850 // pop edi ; restore callee-saved registers
2851 // pop esi
2852 // pop ebx
2853 // pop ebp
2854 X86EmitPopReg(kEDI);
2855 X86EmitPopReg(kESI);
2856 X86EmitPopReg(kEBX);
2857 X86EmitPopReg(kEBP);
2858
2859 // add ecx, offsetRetThunk
2860 X86EmitAddReg(kECX, offsetRetThunk);
2861
2862 // jmp ecx
2863 // This will jump to the "ret cbStackArgs" instruction in COMMETHOD_PREPAD.
2864 static const BYTE bjmpecx[] = { 0xff, 0xe1 };
2865 EmitBytes(bjmpecx, sizeof(bjmpecx));
2866
2867 // ret
2868 // This will never be executed. It is just to help out stack-walking logic
2869 // which disassembles the epilog to unwind the stack. A "ret" instruction
2870 // indicates that no more code needs to be disassembled, if the stack-walker
2871 // keeps on going past the previous "jmp ecx".
2872 X86EmitReturn(0);
2873
2874 //-----------------------------------------------------------------------
2875 // The out-of-line portion of enabling preemptive GC - rarely executed
2876 //-----------------------------------------------------------------------
2877 EmitLabel(rgRareLabels[2]); // label for rare enable gc
2878 EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc
2879
2880 //-----------------------------------------------------------------------
2881 // The out-of-line portion of disabling preemptive GC - rarely executed
2882 //-----------------------------------------------------------------------
2883 EmitLabel(rgRareLabels[1]); // label for rare disable gc
2884 EmitRareDisableHRESULT(rgRejoinLabels[1], NoEntryLabel);
2885
2886 //-----------------------------------------------------------------------
2887 // The out-of-line portion of setup thread - rarely executed
2888 //-----------------------------------------------------------------------
2889 EmitLabel(rgRareLabels[0]); // label for rare setup thread
2890 EmitRareSetup(rgRejoinLabels[0],/*fThrow*/ FALSE); // emit rare setup thread
2891}
2892
2893//========================================================================
2894#endif // defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
2895
2896#ifndef FEATURE_STUBS_AS_IL
2897/*==============================================================================
2898 Pushes a TransitionFrame on the stack
2899 If you make any changes to the prolog instruction sequence, be sure
2900 to update UpdateRegdisplay, too!! This service should only be called from
2901 within the runtime. It should not be called for any unmanaged -> managed calls in.
2902
2903 At the end of the generated prolog stub code:
2904 pFrame is in ESI/RSI.
2905 the previous pFrame is in EDI/RDI
2906 The current Thread* is in EBX/RBX.
2907 For x86, ESP points to TransitionFrame
2908 For amd64, ESP points to the space reserved for the outgoing argument registers
2909*/
2910
2911VOID StubLinkerCPU::EmitMethodStubProlog(TADDR pFrameVptr, int transitionBlockOffset)
2912{
2913 STANDARD_VM_CONTRACT;
2914
2915#ifdef _TARGET_AMD64_
2916 X86EmitPushReg(kR15); // CalleeSavedRegisters
2917 X86EmitPushReg(kR14);
2918 X86EmitPushReg(kR13);
2919 X86EmitPushReg(kR12);
2920 X86EmitPushReg(kRBP);
2921 X86EmitPushReg(kRBX);
2922 X86EmitPushReg(kRSI);
2923 X86EmitPushReg(kRDI);
2924
2925 // Push m_datum
2926 X86EmitPushReg(SCRATCH_REGISTER_X86REG);
2927
2928 // push edx ;leave room for m_next (edx is an arbitrary choice)
2929 X86EmitPushReg(kEDX);
2930
2931 // push Frame vptr
2932 X86EmitPushImmPtr((LPVOID) pFrameVptr);
2933
2934 // mov rsi, rsp
2935 X86EmitR2ROp(0x8b, kRSI, (X86Reg)4 /*kESP*/);
2936 UnwindSetFramePointer(kRSI);
2937
2938 // Save ArgumentRegisters
2939 #define ARGUMENT_REGISTER(regname) X86EmitRegSave(k##regname, SecureDelegateFrame::GetOffsetOfTransitionBlock() + \
2940 sizeof(TransitionBlock) + offsetof(ArgumentRegisters, regname));
2941 ENUM_ARGUMENT_REGISTERS();
2942 #undef ARGUMENT_REGISTER
2943
2944 _ASSERTE(((Frame*)&pFrameVptr)->GetGSCookiePtr() == PTR_GSCookie(PBYTE(&pFrameVptr) - sizeof(GSCookie)));
2945 X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
2946
2947 // sub rsp, 4*sizeof(void*) ;; allocate callee scratch area and ensure rsp is 16-byte-aligned
2948 const INT32 padding = sizeof(ArgumentRegisters) + ((sizeof(FramedMethodFrame) % (2 * sizeof(LPVOID))) ? 0 : sizeof(LPVOID));
2949 X86EmitSubEsp(padding);
2950#endif // _TARGET_AMD64_
2951
2952#ifdef _TARGET_X86_
2953 // push ebp ;; save callee-saved register
2954 // mov ebp,esp
2955 // push ebx ;; save callee-saved register
2956 // push esi ;; save callee-saved register
2957 // push edi ;; save callee-saved register
2958 X86EmitPushEBPframe();
2959
2960 X86EmitPushReg(kEBX);
2961 X86EmitPushReg(kESI);
2962 X86EmitPushReg(kEDI);
2963
2964 // Push & initialize ArgumentRegisters
2965 #define ARGUMENT_REGISTER(regname) X86EmitPushReg(k##regname);
2966 ENUM_ARGUMENT_REGISTERS();
2967 #undef ARGUMENT_REGISTER
2968
2969 // Push m_datum
2970 X86EmitPushReg(kEAX);
2971
2972 // push edx ;leave room for m_next (edx is an arbitrary choice)
2973 X86EmitPushReg(kEDX);
2974
2975 // push Frame vptr
2976 X86EmitPushImmPtr((LPVOID) pFrameVptr);
2977
2978 // mov esi,esp
2979 X86EmitMovRegSP(kESI);
2980
2981 X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
2982#endif // _TARGET_X86_
2983
2984 // ebx <-- GetThread()
2985 X86EmitCurrentThreadFetch(kEBX, 0);
2986
2987#if _DEBUG
2988
2989 // call ObjectRefFlush
2990#ifdef _TARGET_AMD64_
2991
2992 // mov rcx, rbx
2993 X86EmitR2ROp(0x8b, kECX, kEBX); // arg in reg
2994
2995#else // !_TARGET_AMD64_
2996 X86EmitPushReg(kEBX); // arg on stack
2997#endif // _TARGET_AMD64_
2998
2999 // Make the call
3000 X86EmitCall(NewExternalCodeLabel((LPVOID) Thread::ObjectRefFlush), sizeof(void*));
3001
3002#endif // _DEBUG
3003
3004 // mov edi,[ebx + Thread.GetFrame()] ;; get previous frame
3005 X86EmitIndexRegLoad(kEDI, kEBX, Thread::GetOffsetOfCurrentFrame());
3006
3007 // mov [esi + Frame.m_next], edi
3008 X86EmitIndexRegStore(kESI, Frame::GetOffsetOfNextLink(), kEDI);
3009
3010 // mov [ebx + Thread.GetFrame()], esi
3011 X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kESI);
3012
3013#if _DEBUG
3014
3015 if (Frame::ShouldLogTransitions())
3016 {
3017 // call LogTransition
3018#ifdef _TARGET_AMD64_
3019
3020 // mov rcx, rsi
3021 X86EmitR2ROp(0x8b, kECX, kESI); // arg in reg
3022
3023#else // !_TARGET_AMD64_
3024 X86EmitPushReg(kESI); // arg on stack
3025#endif // _TARGET_AMD64_
3026
3027 X86EmitCall(NewExternalCodeLabel((LPVOID) Frame::LogTransition), sizeof(void*));
3028
3029#ifdef _TARGET_AMD64_
3030 // Reload parameter registers
3031 // mov r, [esp+offs]
3032 #define ARGUMENT_REGISTER(regname) X86EmitEspOffset(0x8b, k##regname, sizeof(ArgumentRegisters) + \
3033 sizeof(TransitionFrame) + offsetof(ArgumentRegisters, regname));
3034 ENUM_ARGUMENT_REGISTERS();
3035 #undef ARGUMENT_REGISTER
3036
3037#endif // _TARGET_AMD64_
3038 }
3039
3040#endif // _DEBUG
3041
3042
3043#ifdef _TARGET_AMD64_
3044 // OK for the debugger to examine the new frame now
3045 // (Note that if it's not OK yet for some stub, another patch label
3046 // can be emitted later which will override this one.)
3047 EmitPatchLabel();
3048#else
3049 // For x86, the patch label can be specified only after the GSCookie is pushed
3050 // Otherwise the debugger will see a Frame without a valid GSCookie
3051#endif
3052}
3053
3054/*==============================================================================
3055 EmitMethodStubEpilog generates the part of the stub that will pop off the
3056 Frame
3057
3058 restoreArgRegs - indicates whether the argument registers need to be
3059 restored from m_argumentRegisters
3060
3061 At this point of the stub:
3062 pFrame is in ESI/RSI.
3063 the previous pFrame is in EDI/RDI
3064 The current Thread* is in EBX/RBX.
3065 For x86, ESP points to the FramedMethodFrame::NegInfo
3066*/
3067
3068VOID StubLinkerCPU::EmitMethodStubEpilog(WORD numArgBytes, int transitionBlockOffset)
3069{
3070 STANDARD_VM_CONTRACT;
3071
3072 // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
3073 X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
3074
3075#ifdef _TARGET_X86_
3076 // deallocate Frame
3077 X86EmitAddEsp(sizeof(GSCookie) + transitionBlockOffset + TransitionBlock::GetOffsetOfCalleeSavedRegisters());
3078
3079#elif defined(_TARGET_AMD64_)
3080 // lea rsp, [rsi + <offset of preserved registers>]
3081 X86EmitOffsetModRM(0x8d, (X86Reg)4 /*kRSP*/, kRSI, transitionBlockOffset + TransitionBlock::GetOffsetOfCalleeSavedRegisters());
3082#endif // _TARGET_AMD64_
3083
3084 // pop edi ; restore callee-saved registers
3085 // pop esi
3086 // pop ebx
3087 // pop ebp
3088 X86EmitPopReg(kEDI);
3089 X86EmitPopReg(kESI);
3090 X86EmitPopReg(kEBX);
3091 X86EmitPopReg(kEBP);
3092
3093#ifdef _TARGET_AMD64_
3094 X86EmitPopReg(kR12);
3095 X86EmitPopReg(kR13);
3096 X86EmitPopReg(kR14);
3097 X86EmitPopReg(kR15);
3098#endif
3099
3100#if defined(_TARGET_AMD64_) || defined(UNIX_X86_ABI)
3101 // Caller deallocates argument space. (Bypasses ASSERT in
3102 // X86EmitReturn.)
3103 numArgBytes = 0;
3104#endif
3105
3106 X86EmitReturn(numArgBytes);
3107}
3108
3109
3110// On entry, ESI should be pointing to the Frame
3111
3112VOID StubLinkerCPU::EmitCheckGSCookie(X86Reg frameReg, int gsCookieOffset)
3113{
3114 STANDARD_VM_CONTRACT;
3115
3116#ifdef _DEBUG
3117 // cmp dword ptr[frameReg-gsCookieOffset], gsCookie
3118#ifdef _TARGET_X86_
3119 X86EmitCmpRegIndexImm32(frameReg, gsCookieOffset, GetProcessGSCookie());
3120#else
3121 X64EmitCmp32RegIndexImm32(frameReg, gsCookieOffset, (INT32)GetProcessGSCookie());
3122#endif
3123
3124 CodeLabel * pLabel = NewCodeLabel();
3125 X86EmitCondJump(pLabel, X86CondCode::kJE);
3126
3127 X86EmitCall(NewExternalCodeLabel((LPVOID) JIT_FailFast), 0);
3128
3129 EmitLabel(pLabel);
3130#endif
3131}
3132#endif // !FEATURE_STUBS_AS_IL
3133
3134
3135// This method unboxes the THIS pointer and then calls pRealMD
3136// If it's shared code for a method in a generic value class, then also extract the vtable pointer
3137// and pass it as an extra argument. Thus this stub generator really covers both
3138// - Unboxing, non-instantiating stubs
3139// - Unboxing, method-table-instantiating stubs
3140VOID StubLinkerCPU::EmitUnboxMethodStub(MethodDesc* pUnboxMD)
3141{
3142 CONTRACTL
3143 {
3144 STANDARD_VM_CHECK;
3145 PRECONDITION(!pUnboxMD->IsStatic());
3146 }
3147 CONTRACTL_END;
3148
3149#ifdef FEATURE_STUBS_AS_IL
3150 _ASSERTE(!pUnboxMD->RequiresInstMethodTableArg());
3151#else
3152 if (pUnboxMD->RequiresInstMethodTableArg())
3153 {
3154 EmitInstantiatingMethodStub(pUnboxMD, NULL);
3155 return;
3156 }
3157#endif
3158
3159 //
3160 // unboxing a value class simply means adding sizeof(void*) to the THIS pointer
3161 //
3162#ifdef _TARGET_AMD64_
3163 X86EmitAddReg(THIS_kREG, sizeof(void*));
3164
3165 // Use direct call if possible
3166 if (pUnboxMD->HasStableEntryPoint())
3167 {
3168 X86EmitRegLoad(kRAX, pUnboxMD->GetStableEntryPoint());// MOV RAX, DWORD
3169 }
3170 else
3171 {
3172 X86EmitRegLoad(kRAX, (UINT_PTR)pUnboxMD->GetAddrOfSlot()); // MOV RAX, DWORD
3173
3174 X86EmitIndexRegLoad(kRAX, kRAX); // MOV RAX, [RAX]
3175 }
3176
3177 Emit16(X86_INSTR_JMP_EAX); // JMP EAX
3178#else // _TARGET_AMD64_
3179 X86EmitAddReg(THIS_kREG, sizeof(void*));
3180
3181 // Use direct call if possible
3182 if (pUnboxMD->HasStableEntryPoint())
3183 {
3184 X86EmitNearJump(NewExternalCodeLabel((LPVOID) pUnboxMD->GetStableEntryPoint()));
3185 }
3186 else
3187 {
3188 // jmp [slot]
3189 Emit16(0x25ff);
3190 Emit32((DWORD)(size_t)pUnboxMD->GetAddrOfSlot());
3191 }
3192#endif //_TARGET_AMD64_
3193}
3194
3195
3196#if defined(FEATURE_SHARE_GENERIC_CODE) && !defined(FEATURE_STUBS_AS_IL)
3197// The stub generated by this method passes an extra dictionary argument before jumping to
3198// shared-instantiation generic code.
3199//
3200// pMD is either
3201// * An InstantiatedMethodDesc for a generic method whose code is shared across instantiations.
3202// In this case, the extra argument is the InstantiatedMethodDesc for the instantiation-specific stub itself.
3203// or * A MethodDesc for a static method in a generic class whose code is shared across instantiations.
3204// In this case, the extra argument is the MethodTable pointer of the instantiated type.
3205// or * A MethodDesc for unboxing stub. In this case, the extra argument is null.
3206VOID StubLinkerCPU::EmitInstantiatingMethodStub(MethodDesc* pMD, void* extra)
3207{
3208 CONTRACTL
3209 {
3210 STANDARD_VM_CHECK;
3211 PRECONDITION(pMD->RequiresInstArg());
3212 }
3213 CONTRACTL_END;
3214
3215 MetaSig msig(pMD);
3216 ArgIterator argit(&msig);
3217
3218#ifdef _TARGET_AMD64_
3219 int paramTypeArgOffset = argit.GetParamTypeArgOffset();
3220 int paramTypeArgIndex = TransitionBlock::GetArgumentIndexFromOffset(paramTypeArgOffset);
3221
3222 CorElementType argTypes[5];
3223
3224 int firstRealArg = paramTypeArgIndex + 1;
3225 int argNum = firstRealArg;
3226
3227 //
3228 // Compute types of the 4 register args and first stack arg
3229 //
3230
3231 CorElementType sigType;
3232 while ((sigType = msig.NextArgNormalized()) != ELEMENT_TYPE_END)
3233 {
3234 argTypes[argNum++] = sigType;
3235 if (argNum > 4)
3236 break;
3237 }
3238 msig.Reset();
3239
3240 BOOL fUseInstantiatingMethodStubWorker = FALSE;
3241
3242 if (argNum > 4)
3243 {
3244 //
3245 // We will need to go through assembly helper.
3246 //
3247 fUseInstantiatingMethodStubWorker = TRUE;
3248
3249 // Allocate space for frame before pushing the arguments for the assembly helper
3250 X86EmitSubEsp((INT32)(AlignUp(sizeof(void *) /* extra stack param */ + sizeof(GSCookie) + sizeof(StubHelperFrame), 16) - sizeof(void *) /* return address */));
3251
3252 //
3253 // Store extra arg stack arg param for the helper.
3254 //
3255 CorElementType argType = argTypes[--argNum];
3256 switch (argType)
3257 {
3258 case ELEMENT_TYPE_R4:
3259 // movss dword ptr [rsp], xmm?
3260 X64EmitMovSSToMem(kXMM3, (X86Reg)4 /*kRSP*/);
3261 break;
3262 case ELEMENT_TYPE_R8:
3263 // movsd qword ptr [rsp], xmm?
3264 X64EmitMovSDToMem(kXMM3, (X86Reg)4 /*kRSP*/);
3265 break;
3266 default:
3267 X86EmitIndexRegStoreRSP(0, kR9);
3268 break;
3269 }
3270 }
3271
3272 //
3273 // Shuffle the register arguments
3274 //
3275 while (argNum > firstRealArg)
3276 {
3277 CorElementType argType = argTypes[--argNum];
3278
3279 switch (argType)
3280 {
3281 case ELEMENT_TYPE_R4:
3282 case ELEMENT_TYPE_R8:
3283 // mov xmm#, xmm#-1
3284 X64EmitMovXmmXmm((X86Reg)argNum, (X86Reg)(argNum - 1));
3285 break;
3286 default:
3287 //mov reg#, reg#-1
3288 X86EmitMovRegReg(c_argRegs[argNum], c_argRegs[argNum-1]);
3289 break;
3290 }
3291 }
3292
3293 //
3294 // Setup the hidden instantiation argument
3295 //
3296 if (extra != NULL)
3297 {
3298 X86EmitRegLoad(c_argRegs[paramTypeArgIndex], (UINT_PTR)extra);
3299 }
3300 else
3301 {
3302 X86EmitIndexRegLoad(c_argRegs[paramTypeArgIndex], THIS_kREG);
3303
3304 X86EmitAddReg(THIS_kREG, sizeof(void*));
3305 }
3306
3307 // Use direct call if possible
3308 if (pMD->HasStableEntryPoint())
3309 {
3310 X86EmitRegLoad(kRAX, pMD->GetStableEntryPoint());// MOV RAX, DWORD
3311 }
3312 else
3313 {
3314 X86EmitRegLoad(kRAX, (UINT_PTR)pMD->GetAddrOfSlot()); // MOV RAX, DWORD
3315
3316 X86EmitIndexRegLoad(kRAX, kRAX); // MOV RAX, [RAX]
3317 }
3318
3319 if (fUseInstantiatingMethodStubWorker)
3320 {
3321 X86EmitPushReg(kRAX);
3322
3323 UINT cbStack = argit.SizeOfArgStack();
3324 _ASSERTE(cbStack > 0);
3325
3326 X86EmitPushImm32((AlignUp(cbStack, 16) / sizeof(void*)) - 1); // -1 for extra stack arg
3327
3328 X86EmitRegLoad(kRAX, GetEEFuncEntryPoint(InstantiatingMethodStubWorker));// MOV RAX, DWORD
3329 }
3330 else
3331 {
3332 _ASSERTE(argit.SizeOfArgStack() == 0);
3333 }
3334
3335 Emit16(X86_INSTR_JMP_EAX);
3336
3337#else
3338 int paramTypeArgOffset = argit.GetParamTypeArgOffset();
3339
3340 // It's on the stack
3341 if (TransitionBlock::IsStackArgumentOffset(paramTypeArgOffset))
3342 {
3343 // Pop return address into AX
3344 X86EmitPopReg(kEAX);
3345
3346 if (extra != NULL)
3347 {
3348 // Push extra dictionary argument
3349 X86EmitPushImmPtr(extra);
3350 }
3351 else
3352 {
3353 // Push the vtable pointer from "this"
3354 X86EmitIndexPush(THIS_kREG, 0);
3355 }
3356
3357 // Put return address back
3358 X86EmitPushReg(kEAX);
3359 }
3360 // It's in a register
3361 else
3362 {
3363 X86Reg paramReg = GetX86ArgumentRegisterFromOffset(paramTypeArgOffset - TransitionBlock::GetOffsetOfArgumentRegisters());
3364
3365 if (extra != NULL)
3366 {
3367 X86EmitRegLoad(paramReg, (UINT_PTR)extra);
3368 }
3369 else
3370 {
3371 // Just extract the vtable pointer from "this"
3372 X86EmitIndexRegLoad(paramReg, THIS_kREG);
3373 }
3374 }
3375
3376 if (extra == NULL)
3377 {
3378 // Unboxing stub case.
3379 X86EmitAddReg(THIS_kREG, sizeof(void*));
3380 }
3381
3382 // Use direct call if possible
3383 if (pMD->HasStableEntryPoint())
3384 {
3385 X86EmitNearJump(NewExternalCodeLabel((LPVOID) pMD->GetStableEntryPoint()));
3386 }
3387 else
3388 {
3389 // jmp [slot]
3390 Emit16(0x25ff);
3391 Emit32((DWORD)(size_t)pMD->GetAddrOfSlot());
3392 }
3393#endif //
3394}
3395#endif // FEATURE_SHARE_GENERIC_CODE && FEATURE_STUBS_AS_IL
3396
3397
3398#if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
3399
3400typedef BOOL GetModuleInformationProc(
3401 HANDLE hProcess,
3402 HMODULE hModule,
3403 LPMODULEINFO lpmodinfo,
3404 DWORD cb
3405);
3406
3407GetModuleInformationProc *g_pfnGetModuleInformation = NULL;
3408
3409extern "C" VOID __cdecl DebugCheckStubUnwindInfoWorker (CONTEXT *pStubContext)
3410{
3411 BEGIN_ENTRYPOINT_VOIDRET;
3412
3413 LOG((LF_STUBS, LL_INFO1000000, "checking stub unwind info:\n"));
3414
3415 //
3416 // Make a copy of the CONTEXT. RtlVirtualUnwind will modify this copy.
3417 // DebugCheckStubUnwindInfo will need to restore registers from the
3418 // original CONTEXT.
3419 //
3420 CONTEXT ctx = *pStubContext;
3421 ctx.ContextFlags = (CONTEXT_CONTROL | CONTEXT_INTEGER);
3422
3423 //
3424 // Find the upper bound of the stack and address range of KERNEL32. This
3425 // is where we expect the unwind to stop.
3426 //
3427 void *pvStackTop = GetThread()->GetCachedStackBase();
3428
3429 if (!g_pfnGetModuleInformation)
3430 {
3431 HMODULE hmodPSAPI = WszGetModuleHandle(W("PSAPI.DLL"));
3432
3433 if (!hmodPSAPI)
3434 {
3435 hmodPSAPI = WszLoadLibrary(W("PSAPI.DLL"));
3436 if (!hmodPSAPI)
3437 {
3438 _ASSERTE(!"unable to load PSAPI.DLL");
3439 goto ErrExit;
3440 }
3441 }
3442
3443 g_pfnGetModuleInformation = (GetModuleInformationProc*)GetProcAddress(hmodPSAPI, "GetModuleInformation");
3444 if (!g_pfnGetModuleInformation)
3445 {
3446 _ASSERTE(!"can't find PSAPI!GetModuleInformation");
3447 goto ErrExit;
3448 }
3449
3450 // Intentionally leak hmodPSAPI. We don't want to
3451 // LoadLibrary/FreeLibrary every time, this is slow + produces lots of
3452 // debugger spew. This is just debugging code after all...
3453 }
3454
3455 HMODULE hmodKERNEL32 = WszGetModuleHandle(W("KERNEL32"));
3456 _ASSERTE(hmodKERNEL32);
3457
3458 MODULEINFO modinfoKERNEL32;
3459 if (!g_pfnGetModuleInformation(GetCurrentProcess(), hmodKERNEL32, &modinfoKERNEL32, sizeof(modinfoKERNEL32)))
3460 {
3461 _ASSERTE(!"unable to get bounds of KERNEL32");
3462 goto ErrExit;
3463 }
3464
3465 //
3466 // Unwind until IP is 0, sp is at the stack top, and callee IP is in kernel32.
3467 //
3468
3469 for (;;)
3470 {
3471 ULONG64 ControlPc = (ULONG64)GetIP(&ctx);
3472
3473 LOG((LF_STUBS, LL_INFO1000000, "pc %p, sp %p\n", ControlPc, GetSP(&ctx)));
3474
3475 ULONG64 ImageBase;
3476 T_RUNTIME_FUNCTION *pFunctionEntry = RtlLookupFunctionEntry(
3477 ControlPc,
3478 &ImageBase,
3479 NULL);
3480 if (pFunctionEntry)
3481 {
3482 PVOID HandlerData;
3483 ULONG64 EstablisherFrame;
3484
3485 RtlVirtualUnwind(
3486 0,
3487 ImageBase,
3488 ControlPc,
3489 pFunctionEntry,
3490 &ctx,
3491 &HandlerData,
3492 &EstablisherFrame,
3493 NULL);
3494
3495 ULONG64 NewControlPc = (ULONG64)GetIP(&ctx);
3496
3497 LOG((LF_STUBS, LL_INFO1000000, "function %p, image %p, new pc %p, new sp %p\n", pFunctionEntry, ImageBase, NewControlPc, GetSP(&ctx)));
3498
3499 if (!NewControlPc)
3500 {
3501 if (dac_cast<PTR_BYTE>(GetSP(&ctx)) < (BYTE*)pvStackTop - 0x100)
3502 {
3503 _ASSERTE(!"SP did not end up at top of stack");
3504 goto ErrExit;
3505 }
3506
3507 if (!( ControlPc > (ULONG64)modinfoKERNEL32.lpBaseOfDll
3508 && ControlPc < (ULONG64)modinfoKERNEL32.lpBaseOfDll + modinfoKERNEL32.SizeOfImage))
3509 {
3510 _ASSERTE(!"PC did not end up in KERNEL32");
3511 goto ErrExit;
3512 }
3513
3514 break;
3515 }
3516 }
3517 else
3518 {
3519 // Nested functions that do not use any stack space or nonvolatile
3520 // registers are not required to have unwind info (ex.
3521 // USER32!ZwUserCreateWindowEx).
3522 ctx.Rip = *(ULONG64*)(ctx.Rsp);
3523 ctx.Rsp += sizeof(ULONG64);
3524 }
3525 }
3526ErrExit:
3527
3528 END_ENTRYPOINT_VOIDRET;
3529 return;
3530}
3531
3532//virtual
3533VOID StubLinkerCPU::EmitUnwindInfoCheckWorker (CodeLabel *pCheckLabel)
3534{
3535 STANDARD_VM_CONTRACT;
3536 X86EmitCall(pCheckLabel, 0);
3537}
3538
3539//virtual
3540VOID StubLinkerCPU::EmitUnwindInfoCheckSubfunction()
3541{
3542 STANDARD_VM_CONTRACT;
3543
3544#ifdef _TARGET_AMD64_
3545 // X86EmitCall will generate "mov rax, target/jmp rax", so we have to save
3546 // rax on the stack. DO NOT use X86EmitPushReg. That will induce infinite
3547 // recursion, since the push may require more unwind info. This "push rax"
3548 // will be accounted for by DebugCheckStubUnwindInfo's unwind info
3549 // (considered part of its locals), so there doesn't have to be unwind
3550 // info for it.
3551 Emit8(0x50);
3552#endif
3553
3554 X86EmitNearJump(NewExternalCodeLabel(DebugCheckStubUnwindInfo));
3555}
3556
3557#endif // defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
3558
3559
3560#ifdef _TARGET_X86_
3561
3562//-----------------------------------------------------------------------
3563// Generates the inline portion of the code to enable preemptive GC. Hopefully,
3564// the inline code is all that will execute most of the time. If this code
3565// path is entered at certain times, however, it will need to jump out to
3566// a separate out-of-line path which is more expensive. The "pForwardRef"
3567// label indicates the start of the out-of-line path.
3568//
3569// Assumptions:
3570// ebx = Thread
3571// Preserves
3572// all registers except ecx.
3573//
3574//-----------------------------------------------------------------------
3575VOID StubLinkerCPU::EmitEnable(CodeLabel *pForwardRef)
3576{
3577 CONTRACTL
3578 {
3579 STANDARD_VM_CHECK;
3580
3581 PRECONDITION(4 == sizeof( ((Thread*)0)->m_State ));
3582 PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled ));
3583 }
3584 CONTRACTL_END;
3585
3586 // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],0
3587 X86EmitOffsetModRM(0xc6, (X86Reg)0, kEBX, Thread::GetOffsetOfGCFlag());
3588 Emit8(0);
3589
3590 _ASSERTE(FitsInI1(Thread::TS_CatchAtSafePoint));
3591
3592 // test byte ptr [ebx + Thread.m_State], TS_CatchAtSafePoint
3593 X86EmitOffsetModRM(0xf6, (X86Reg)0, kEBX, Thread::GetOffsetOfState());
3594 Emit8(Thread::TS_CatchAtSafePoint);
3595
3596 // jnz RarePath
3597 X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
3598
3599#ifdef _DEBUG
3600 X86EmitDebugTrashReg(kECX);
3601#endif
3602
3603}
3604
3605
3606//-----------------------------------------------------------------------
3607// Generates the out-of-line portion of the code to enable preemptive GC.
3608// After the work is done, the code jumps back to the "pRejoinPoint"
3609// which should be emitted right after the inline part is generated.
3610//
3611// Assumptions:
3612// ebx = Thread
3613// Preserves
3614// all registers except ecx.
3615//
3616//-----------------------------------------------------------------------
3617VOID StubLinkerCPU::EmitRareEnable(CodeLabel *pRejoinPoint)
3618{
3619 STANDARD_VM_CONTRACT;
3620
3621 X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareEnable), 0);
3622#ifdef _DEBUG
3623 X86EmitDebugTrashReg(kECX);
3624#endif
3625 if (pRejoinPoint)
3626 {
3627 X86EmitNearJump(pRejoinPoint);
3628 }
3629
3630}
3631
3632
3633//-----------------------------------------------------------------------
3634// Generates the inline portion of the code to disable preemptive GC. Hopefully,
3635// the inline code is all that will execute most of the time. If this code
3636// path is entered at certain times, however, it will need to jump out to
3637// a separate out-of-line path which is more expensive. The "pForwardRef"
3638// label indicates the start of the out-of-line path.
3639//
3640// Assumptions:
3641// ebx = Thread
3642// Preserves
3643// all registers except ecx.
3644//
3645//-----------------------------------------------------------------------
3646VOID StubLinkerCPU::EmitDisable(CodeLabel *pForwardRef, BOOL fCallIn, X86Reg ThreadReg)
3647{
3648 CONTRACTL
3649 {
3650 STANDARD_VM_CHECK;
3651
3652 PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled ));
3653 PRECONDITION(4 == sizeof(g_TrapReturningThreads));
3654 }
3655 CONTRACTL_END;
3656
3657#if defined(FEATURE_COMINTEROP) && defined(MDA_SUPPORTED)
3658 // If we are checking whether the current thread is already holds the loader lock, vector
3659 // such cases to the rare disable pathway, where we can check again.
3660 if (fCallIn && (NULL != MDA_GET_ASSISTANT(Reentrancy)))
3661 {
3662 CodeLabel *pNotReentrantLabel = NewCodeLabel();
3663
3664 // test byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],1
3665 X86EmitOffsetModRM(0xf6, (X86Reg)0, ThreadReg, Thread::GetOffsetOfGCFlag());
3666 Emit8(1);
3667
3668 // jz NotReentrant
3669 X86EmitCondJump(pNotReentrantLabel, X86CondCode::kJZ);
3670
3671 X86EmitPushReg(kEAX);
3672 X86EmitPushReg(kEDX);
3673 X86EmitPushReg(kECX);
3674
3675 X86EmitCall(NewExternalCodeLabel((LPVOID) HasIllegalReentrancy), 0);
3676
3677 // If the probe fires, we go ahead and allow the call anyway. At this point, there could be
3678 // GC heap corruptions. So the probe detects the illegal case, but doesn't prevent it.
3679
3680 X86EmitPopReg(kECX);
3681 X86EmitPopReg(kEDX);
3682 X86EmitPopReg(kEAX);
3683
3684 EmitLabel(pNotReentrantLabel);
3685 }
3686#endif
3687
3688 // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],1
3689 X86EmitOffsetModRM(0xc6, (X86Reg)0, ThreadReg, Thread::GetOffsetOfGCFlag());
3690 Emit8(1);
3691
3692 // cmp dword ptr g_TrapReturningThreads, 0
3693 Emit16(0x3d83);
3694 EmitPtr((void *)&g_TrapReturningThreads);
3695 Emit8(0);
3696
3697 // jnz RarePath
3698 X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
3699
3700#if defined(FEATURE_COMINTEROP) && !defined(FEATURE_CORESYSTEM)
3701 // If we are checking whether the current thread holds the loader lock, vector
3702 // such cases to the rare disable pathway, where we can check again.
3703 if (fCallIn && ShouldCheckLoaderLock())
3704 {
3705 X86EmitPushReg(kEAX);
3706 X86EmitPushReg(kEDX);
3707
3708 if (ThreadReg == kECX)
3709 X86EmitPushReg(kECX);
3710
3711 // BOOL AuxUlibIsDLLSynchronizationHeld(BOOL *IsHeld)
3712 //
3713 // So we need to be sure that both the return value and the passed BOOL are both TRUE.
3714 // If either is FALSE, then the call failed or the lock is not held. Either way, the
3715 // probe should not fire.
3716
3717 X86EmitPushReg(kEDX); // BOOL temp
3718 Emit8(0x54); // push ESP because arg is &temp
3719 X86EmitCall(NewExternalCodeLabel((LPVOID) AuxUlibIsDLLSynchronizationHeld), 0);
3720
3721 // callee has popped.
3722 X86EmitPopReg(kEDX); // recover temp
3723
3724 CodeLabel *pPopLabel = NewCodeLabel();
3725
3726 Emit16(0xc085); // test eax, eax
3727 X86EmitCondJump(pPopLabel, X86CondCode::kJZ);
3728
3729 Emit16(0xd285); // test edx, edx
3730
3731 EmitLabel(pPopLabel); // retain the conditional flags across the pops
3732
3733 if (ThreadReg == kECX)
3734 X86EmitPopReg(kECX);
3735
3736 X86EmitPopReg(kEDX);
3737 X86EmitPopReg(kEAX);
3738
3739 X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
3740 }
3741#endif
3742
3743#ifdef _DEBUG
3744 if (ThreadReg != kECX)
3745 X86EmitDebugTrashReg(kECX);
3746#endif
3747
3748}
3749
3750
3751//-----------------------------------------------------------------------
3752// Generates the out-of-line portion of the code to disable preemptive GC.
3753// After the work is done, the code jumps back to the "pRejoinPoint"
3754// which should be emitted right after the inline part is generated. However,
3755// if we cannot execute managed code at this time, an exception is thrown
3756// which cannot be caught by managed code.
3757//
3758// Assumptions:
3759// ebx = Thread
3760// Preserves
3761// all registers except ecx, eax.
3762//
3763//-----------------------------------------------------------------------
3764VOID StubLinkerCPU::EmitRareDisable(CodeLabel *pRejoinPoint)
3765{
3766 STANDARD_VM_CONTRACT;
3767
3768 X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareDisableTHROW), 0);
3769
3770#ifdef _DEBUG
3771 X86EmitDebugTrashReg(kECX);
3772#endif
3773 X86EmitNearJump(pRejoinPoint);
3774}
3775
3776#ifdef FEATURE_COMINTEROP
3777//-----------------------------------------------------------------------
3778// Generates the out-of-line portion of the code to disable preemptive GC.
3779// After the work is done, the code normally jumps back to the "pRejoinPoint"
3780// which should be emitted right after the inline part is generated. However,
3781// if we cannot execute managed code at this time, an HRESULT is returned
3782// via the ExitPoint.
3783//
3784// Assumptions:
3785// ebx = Thread
3786// Preserves
3787// all registers except ecx, eax.
3788//
3789//-----------------------------------------------------------------------
3790VOID StubLinkerCPU::EmitRareDisableHRESULT(CodeLabel *pRejoinPoint, CodeLabel *pExitPoint)
3791{
3792 STANDARD_VM_CONTRACT;
3793
3794 X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareDisableHR), 0);
3795
3796#ifdef _DEBUG
3797 X86EmitDebugTrashReg(kECX);
3798#endif
3799
3800 // test eax, eax ;; test the result of StubRareDisableHR
3801 Emit16(0xc085);
3802
3803 // JZ pRejoinPoint
3804 X86EmitCondJump(pRejoinPoint, X86CondCode::kJZ);
3805
3806 X86EmitNearJump(pExitPoint);
3807}
3808#endif // FEATURE_COMINTEROP
3809
3810#endif // _TARGET_X86_
3811
3812#endif // CROSSGEN_COMPILE
3813
3814
3815VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)
3816{
3817 STANDARD_VM_CONTRACT;
3818
3819#ifdef _TARGET_AMD64_
3820
3821 // mov SCRATCHREG,rsp
3822 X86_64BitOperands();
3823 Emit8(0x8b);
3824 Emit8(0304 | (SCRATCH_REGISTER_X86REG << 3));
3825
3826 // save the real target in r11, will jump to it later. r10 is used below.
3827 // Windows: mov r11, rcx
3828 // Unix: mov r11, rdi
3829 X86EmitMovRegReg(kR11, THIS_kREG);
3830
3831#ifdef UNIX_AMD64_ABI
3832 for (ShuffleEntry* pEntry = pShuffleEntryArray; pEntry->srcofs != ShuffleEntry::SENTINEL; pEntry++)
3833 {
3834 if (pEntry->srcofs & ShuffleEntry::REGMASK)
3835 {
3836 // Source in a general purpose or float register, destination in the same kind of a register or on stack
3837 int srcRegIndex = pEntry->srcofs & ShuffleEntry::OFSREGMASK;
3838
3839 if (pEntry->dstofs & ShuffleEntry::REGMASK)
3840 {
3841 // Source in register, destination in register
3842
3843 // Both the srcofs and dstofs must be of the same kind of registers - float or general purpose.
3844 _ASSERTE((pEntry->dstofs & ShuffleEntry::FPREGMASK) == (pEntry->srcofs & ShuffleEntry::FPREGMASK));
3845 int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
3846
3847 if (pEntry->srcofs & ShuffleEntry::FPREGMASK)
3848 {
3849 // movdqa dstReg, srcReg
3850 X64EmitMovXmmXmm((X86Reg)(kXMM0 + dstRegIndex), (X86Reg)(kXMM0 + srcRegIndex));
3851 }
3852 else
3853 {
3854 // mov dstReg, srcReg
3855 X86EmitMovRegReg(c_argRegs[dstRegIndex], c_argRegs[srcRegIndex]);
3856 }
3857 }
3858 else
3859 {
3860 // Source in register, destination on stack
3861 int dstOffset = (pEntry->dstofs + 1) * sizeof(void*);
3862
3863 if (pEntry->srcofs & ShuffleEntry::FPREGMASK)
3864 {
3865 if (pEntry->dstofs & ShuffleEntry::FPSINGLEMASK)
3866 {
3867 // movss [rax + dst], srcReg
3868 X64EmitMovSSToMem((X86Reg)(kXMM0 + srcRegIndex), SCRATCH_REGISTER_X86REG, dstOffset);
3869 }
3870 else
3871 {
3872 // movsd [rax + dst], srcReg
3873 X64EmitMovSDToMem((X86Reg)(kXMM0 + srcRegIndex), SCRATCH_REGISTER_X86REG, dstOffset);
3874 }
3875 }
3876 else
3877 {
3878 // mov [rax + dst], srcReg
3879 X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, dstOffset, c_argRegs[srcRegIndex]);
3880 }
3881 }
3882 }
3883 else if (pEntry->dstofs & ShuffleEntry::REGMASK)
3884 {
3885 // Source on stack, destination in register
3886 _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));
3887
3888 int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
3889 int srcOffset = (pEntry->srcofs + 1) * sizeof(void*);
3890
3891 if (pEntry->dstofs & ShuffleEntry::FPREGMASK)
3892 {
3893 if (pEntry->dstofs & ShuffleEntry::FPSINGLEMASK)
3894 {
3895 // movss dstReg, [rax + src]
3896 X64EmitMovSSFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset);
3897 }
3898 else
3899 {
3900 // movsd dstReg, [rax + src]
3901 X64EmitMovSDFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset);
3902 }
3903 }
3904 else
3905 {
3906 // mov dstreg, [rax + src]
3907 X86EmitIndexRegLoad(c_argRegs[dstRegIndex], SCRATCH_REGISTER_X86REG, srcOffset);
3908 }
3909 }
3910 else
3911 {
3912 // Source on stack, destination on stack
3913 _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));
3914 _ASSERTE(!(pEntry->dstofs & ShuffleEntry::REGMASK));
3915
3916 // mov r10, [rax + src]
3917 X86EmitIndexRegLoad (kR10, SCRATCH_REGISTER_X86REG, (pEntry->srcofs + 1) * sizeof(void*));
3918
3919 // mov [rax + dst], r10
3920 X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, (pEntry->dstofs + 1) * sizeof(void*), kR10);
3921 }
3922 }
3923#else // UNIX_AMD64_ABI
3924 UINT step = 1;
3925
3926 if (pShuffleEntryArray->argtype == ELEMENT_TYPE_END)
3927 {
3928 // Special handling of open instance methods with return buffer. Move "this"
3929 // by two slots, and leave the "retbufptr" between the two slots intact.
3930
3931 // mov rcx, r8
3932 X86EmitMovRegReg(kRCX, kR8);
3933
3934 // Skip this entry
3935 pShuffleEntryArray++;
3936
3937 // Skip this entry and leave retbufptr intact
3938 step += 2;
3939 }
3940
3941 // Now shuffle the args by one position:
3942 // steps 1-3 : reg args (rcx, rdx, r8)
3943 // step 4 : stack->reg arg (r9)
3944 // step >4 : stack args
3945
3946 for(;
3947 pShuffleEntryArray->srcofs != ShuffleEntry::SENTINEL;
3948 step++, pShuffleEntryArray++)
3949 {
3950 switch (step)
3951 {
3952 case 1:
3953 case 2:
3954 case 3:
3955 switch (pShuffleEntryArray->argtype)
3956 {
3957 case ELEMENT_TYPE_R4:
3958 case ELEMENT_TYPE_R8:
3959 // mov xmm-1#, xmm#
3960 X64EmitMovXmmXmm((X86Reg)(step - 1), (X86Reg)(step));
3961 break;
3962 default:
3963 // mov argRegs[step-1], argRegs[step]
3964 X86EmitMovRegReg(c_argRegs[step-1], c_argRegs[step]);
3965 break;
3966 }
3967 break;
3968
3969 case 4:
3970 {
3971 switch (pShuffleEntryArray->argtype)
3972 {
3973 case ELEMENT_TYPE_R4:
3974 X64EmitMovSSFromMem(kXMM3, kRAX, 0x28);
3975 break;
3976
3977 case ELEMENT_TYPE_R8:
3978 X64EmitMovSDFromMem(kXMM3, kRAX, 0x28);
3979 break;
3980
3981 default:
3982 // mov r9, [rax + 28h]
3983 X86EmitIndexRegLoad (kR9, SCRATCH_REGISTER_X86REG, 5*sizeof(void*));
3984 }
3985 break;
3986 }
3987 default:
3988
3989 // mov r10, [rax + (step+1)*sizeof(void*)]
3990 X86EmitIndexRegLoad (kR10, SCRATCH_REGISTER_X86REG, (step+1)*sizeof(void*));
3991
3992 // mov [rax + step*sizeof(void*)], r10
3993 X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, step*sizeof(void*), kR10);
3994 }
3995 }
3996#endif // UNIX_AMD64_ABI
3997
3998 // mov r10, [r11 + Delegate._methodptraux]
3999 X86EmitIndexRegLoad(kR10, kR11, DelegateObject::GetOffsetOfMethodPtrAux());
4000 // add r11, DelegateObject::GetOffsetOfMethodPtrAux() - load the indirection cell into r11
4001 X86EmitAddReg(kR11, DelegateObject::GetOffsetOfMethodPtrAux());
4002 // Now jump to real target
4003 // jmp r10
4004 X86EmitR2ROp(0xff, (X86Reg)4, kR10);
4005
4006#else // _TARGET_AMD64_
4007
4008 UINT espadjust = 0;
4009 BOOL haveMemMemMove = FALSE;
4010
4011 ShuffleEntry *pWalk = NULL;
4012 for (pWalk = pShuffleEntryArray; pWalk->srcofs != ShuffleEntry::SENTINEL; pWalk++)
4013 {
4014 if (!(pWalk->dstofs & ShuffleEntry::REGMASK) &&
4015 !(pWalk->srcofs & ShuffleEntry::REGMASK) &&
4016 pWalk->srcofs != pWalk->dstofs)
4017 {
4018 haveMemMemMove = TRUE;
4019 espadjust = sizeof(void*);
4020 break;
4021 }
4022 }
4023
4024 if (haveMemMemMove)
4025 {
4026 // push ecx
4027 X86EmitPushReg(THIS_kREG);
4028 }
4029 else
4030 {
4031 // mov eax, ecx
4032 Emit8(0x8b);
4033 Emit8(0300 | SCRATCH_REGISTER_X86REG << 3 | THIS_kREG);
4034 }
4035
4036 UINT16 emptySpot = 0x4 | ShuffleEntry::REGMASK;
4037
4038 while (true)
4039 {
4040 for (pWalk = pShuffleEntryArray; pWalk->srcofs != ShuffleEntry::SENTINEL; pWalk++)
4041 if (pWalk->dstofs == emptySpot)
4042 break;
4043
4044 if (pWalk->srcofs == ShuffleEntry::SENTINEL)
4045 break;
4046
4047 if ((pWalk->dstofs & ShuffleEntry::REGMASK))
4048 {
4049 if (pWalk->srcofs & ShuffleEntry::REGMASK)
4050 {
4051 // mov <dstReg>,<srcReg>
4052 Emit8(0x8b);
4053 Emit8(static_cast<UINT8>(0300 |
4054 (GetX86ArgumentRegisterFromOffset( pWalk->dstofs & ShuffleEntry::OFSMASK ) << 3) |
4055 (GetX86ArgumentRegisterFromOffset( pWalk->srcofs & ShuffleEntry::OFSMASK ))));
4056 }
4057 else
4058 {
4059 X86EmitEspOffset(0x8b, GetX86ArgumentRegisterFromOffset( pWalk->dstofs & ShuffleEntry::OFSMASK ), pWalk->srcofs+espadjust);
4060 }
4061 }
4062 else
4063 {
4064 // if the destination is not a register, the source shouldn't be either.
4065 _ASSERTE(!(pWalk->srcofs & ShuffleEntry::REGMASK));
4066 if (pWalk->srcofs != pWalk->dstofs)
4067 {
4068 X86EmitEspOffset(0x8b, kEAX, pWalk->srcofs+espadjust);
4069 X86EmitEspOffset(0x89, kEAX, pWalk->dstofs+espadjust);
4070 }
4071 }
4072 emptySpot = pWalk->srcofs;
4073 }
4074
4075 // Capture the stacksizedelta while we're at the end of the list.
4076 _ASSERTE(pWalk->srcofs == ShuffleEntry::SENTINEL);
4077
4078 if (haveMemMemMove)
4079 X86EmitPopReg(SCRATCH_REGISTER_X86REG);
4080
4081#ifdef UNIX_X86_ABI
4082 _ASSERTE(pWalk->stacksizedelta == 0);
4083#endif
4084
4085 if (pWalk->stacksizedelta)
4086 X86EmitAddEsp(pWalk->stacksizedelta);
4087
4088 // Now jump to real target
4089 // JMP [SCRATCHREG]
4090 // we need to jump indirect so that for virtual delegates eax contains a pointer to the indirection cell
4091 X86EmitAddReg(SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtrAux());
4092 static const BYTE bjmpeax[] = { 0xff, 0x20 };
4093 EmitBytes(bjmpeax, sizeof(bjmpeax));
4094
4095#endif // _TARGET_AMD64_
4096}
4097
4098
4099#if !defined(CROSSGEN_COMPILE) && !defined(FEATURE_STUBS_AS_IL)
4100
4101//===========================================================================
4102// Computes hash code for MulticastDelegate.Invoke()
4103UINT_PTR StubLinkerCPU::HashMulticastInvoke(MetaSig* pSig)
4104{
4105 CONTRACTL
4106 {
4107 THROWS;
4108 GC_TRIGGERS;
4109 }
4110 CONTRACTL_END;
4111
4112 ArgIterator argit(pSig);
4113
4114 UINT numStackBytes = argit.SizeOfArgStack();
4115
4116 if (numStackBytes > 0x7FFF)
4117 COMPlusThrow(kNotSupportedException, W("NotSupported_TooManyArgs"));
4118
4119#ifdef _TARGET_AMD64_
4120 // Generate a hash key as follows:
4121 // UINT Arg0Type:2; // R4 (1), R8 (2), other (3)
4122 // UINT Arg1Type:2; // R4 (1), R8 (2), other (3)
4123 // UINT Arg2Type:2; // R4 (1), R8 (2), other (3)
4124 // UINT Arg3Type:2; // R4 (1), R8 (2), other (3)
4125 // UINT NumArgs:24; // number of arguments
4126 // (This should cover all the prestub variations)
4127
4128 _ASSERTE(!(numStackBytes & 7));
4129 UINT hash = (numStackBytes / sizeof(void*)) << 8;
4130
4131 UINT argNum = 0;
4132
4133 // NextArg() doesn't take into account the "this" pointer.
4134 // That's why we have to special case it here.
4135 if (argit.HasThis())
4136 {
4137 hash |= 3 << (2*argNum);
4138 argNum++;
4139 }
4140
4141 if (argit.HasRetBuffArg())
4142 {
4143 hash |= 3 << (2*argNum);
4144 argNum++;
4145 }
4146
4147 for (; argNum < 4; argNum++)
4148 {
4149 switch (pSig->NextArgNormalized())
4150 {
4151 case ELEMENT_TYPE_END:
4152 argNum = 4;
4153 break;
4154 case ELEMENT_TYPE_R4:
4155 hash |= 1 << (2*argNum);
4156 break;
4157 case ELEMENT_TYPE_R8:
4158 hash |= 2 << (2*argNum);
4159 break;
4160 default:
4161 hash |= 3 << (2*argNum);
4162 break;
4163 }
4164 }
4165
4166#else // _TARGET_AMD64_
4167
4168 // check if the function is returning a float, in which case the stub has to take
4169 // care of popping the floating point stack except for the last invocation
4170
4171 _ASSERTE(!(numStackBytes & 3));
4172
4173 UINT hash = numStackBytes;
4174
4175 if (CorTypeInfo::IsFloat(pSig->GetReturnType()))
4176 {
4177 hash |= 2;
4178 }
4179#endif // _TARGET_AMD64_
4180
4181 return hash;
4182}
4183
4184#ifdef _TARGET_X86_
4185//===========================================================================
4186// Emits code for MulticastDelegate.Invoke()
4187VOID StubLinkerCPU::EmitDelegateInvoke()
4188{
4189 STANDARD_VM_CONTRACT;
4190
4191 CodeLabel *pNullLabel = NewCodeLabel();
4192
4193 // test THISREG, THISREG
4194 X86EmitR2ROp(0x85, THIS_kREG, THIS_kREG);
4195
4196 // jz null
4197 X86EmitCondJump(pNullLabel, X86CondCode::kJZ);
4198
4199 // mov SCRATCHREG, [THISREG + Delegate.FP] ; Save target stub in register
4200 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfMethodPtr());
4201
4202 // mov THISREG, [THISREG + Delegate.OR] ; replace "this" pointer
4203 X86EmitIndexRegLoad(THIS_kREG, THIS_kREG, DelegateObject::GetOffsetOfTarget());
4204
4205 // jmp SCRATCHREG
4206 Emit16(0xe0ff | (SCRATCH_REGISTER_X86REG<<8));
4207
4208 // Do a null throw
4209 EmitLabel(pNullLabel);
4210
4211 // mov ECX, CORINFO_NullReferenceException
4212 Emit8(0xb8+kECX);
4213 Emit32(CORINFO_NullReferenceException);
4214
4215 X86EmitCall(NewExternalCodeLabel(GetEEFuncEntryPoint(JIT_InternalThrowFromHelper)), 0);
4216
4217 X86EmitReturn(0);
4218}
4219#endif // _TARGET_X86_
4220
4221VOID StubLinkerCPU::EmitMulticastInvoke(UINT_PTR hash)
4222{
4223 STANDARD_VM_CONTRACT;
4224
4225 int thisRegOffset = MulticastFrame::GetOffsetOfTransitionBlock() +
4226 TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, THIS_REG);
4227
4228 // push the methoddesc on the stack
4229 // mov eax, [ecx + offsetof(_methodAuxPtr)]
4230 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfMethodPtrAux());
4231
4232 // Push a MulticastFrame on the stack.
4233 EmitMethodStubProlog(MulticastFrame::GetMethodFrameVPtr(), MulticastFrame::GetOffsetOfTransitionBlock());
4234
4235#ifdef _TARGET_X86_
4236 // Frame is ready to be inspected by debugger for patch location
4237 EmitPatchLabel();
4238#else // _TARGET_AMD64_
4239
4240 // Save register arguments in their home locations.
4241 // Non-FP registers are already saved by EmitMethodStubProlog.
4242 // (Assumes Sig.NextArg() does not enum RetBuffArg or "this".)
4243
4244 int argNum = 0;
4245 __int32 argOfs = MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4246 CorElementType argTypes[4];
4247 CorElementType argType;
4248
4249 // 'this'
4250 argOfs += sizeof(void*);
4251 argTypes[argNum] = ELEMENT_TYPE_I8;
4252 argNum++;
4253
4254 do
4255 {
4256 argType = ELEMENT_TYPE_END;
4257
4258 switch ((hash >> (2 * argNum)) & 3)
4259 {
4260 case 0:
4261 argType = ELEMENT_TYPE_END;
4262 break;
4263 case 1:
4264 argType = ELEMENT_TYPE_R4;
4265
4266 // movss dword ptr [rsp + argOfs], xmm?
4267 X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
4268 break;
4269 case 2:
4270 argType = ELEMENT_TYPE_R8;
4271
4272 // movsd qword ptr [rsp + argOfs], xmm?
4273 X64EmitMovSDToMem((X86Reg)argNum, kRSI, argOfs);
4274 break;
4275 default:
4276 argType = ELEMENT_TYPE_I;
4277 break;
4278 }
4279
4280 argOfs += sizeof(void*);
4281 argTypes[argNum] = argType;
4282 argNum++;
4283 }
4284 while (argNum < 4 && ELEMENT_TYPE_END != argType);
4285
4286 _ASSERTE(4 == argNum || ELEMENT_TYPE_END == argTypes[argNum-1]);
4287
4288#endif // _TARGET_AMD64_
4289
4290 // TODO: on AMD64, pick different regs for locals so don't need the pushes
4291
4292 // push edi ;; Save EDI (want to use it as loop index)
4293 X86EmitPushReg(kEDI);
4294
4295 // xor edi,edi ;; Loop counter: EDI=0,1,2...
4296 X86EmitZeroOutReg(kEDI);
4297
4298 CodeLabel *pLoopLabel = NewCodeLabel();
4299 CodeLabel *pEndLoopLabel = NewCodeLabel();
4300
4301 EmitLabel(pLoopLabel);
4302
4303 // Entry:
4304 // EDI == iteration counter
4305
4306 // mov ecx, [esi + this] ;; get delegate
4307 X86EmitIndexRegLoad(THIS_kREG, kESI, thisRegOffset);
4308
4309 // cmp edi,[ecx]._invocationCount
4310 X86EmitOp(0x3b, kEDI, THIS_kREG, DelegateObject::GetOffsetOfInvocationCount());
4311
4312 // je ENDLOOP
4313 X86EmitCondJump(pEndLoopLabel, X86CondCode::kJZ);
4314
4315#ifdef _TARGET_AMD64_
4316
4317 INT32 numStackBytes = (INT32)((hash >> 8) * sizeof(void *));
4318
4319 INT32 stackUsed, numStackArgs, ofs;
4320
4321 // Push any stack args, plus an extra location
4322 // for rsp alignment if needed
4323
4324 numStackArgs = numStackBytes / sizeof(void*);
4325
4326 // 1 push above, so stack is currently misaligned
4327 const unsigned STACK_ALIGN_ADJUST = 8;
4328
4329 if (!numStackArgs)
4330 {
4331 // sub rsp, 28h ;; 4 reg arg home locs + rsp alignment
4332 stackUsed = 0x20 + STACK_ALIGN_ADJUST;
4333 X86EmitSubEsp(stackUsed);
4334 }
4335 else
4336 {
4337 stackUsed = numStackArgs * sizeof(void*);
4338
4339 // If the stack is misaligned, then an odd number of arguments
4340 // will naturally align the stack.
4341 if ( ((numStackArgs & 1) == 0)
4342 != (STACK_ALIGN_ADJUST == 0))
4343 {
4344 X86EmitPushReg(kRAX);
4345 stackUsed += sizeof(void*);
4346 }
4347
4348 ofs = MulticastFrame::GetOffsetOfTransitionBlock() +
4349 TransitionBlock::GetOffsetOfArgs() + sizeof(ArgumentRegisters) + numStackBytes;
4350
4351 while (numStackArgs--)
4352 {
4353 ofs -= sizeof(void*);
4354
4355 // push [rsi + ofs] ;; Push stack args
4356 X86EmitIndexPush(kESI, ofs);
4357 }
4358
4359 // sub rsp, 20h ;; Create 4 reg arg home locations
4360 X86EmitSubEsp(0x20);
4361
4362 stackUsed += 0x20;
4363 }
4364
4365 for(
4366 argNum = 0, argOfs = MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4367 argNum < 4 && argTypes[argNum] != ELEMENT_TYPE_END;
4368 argNum++, argOfs += sizeof(void*)
4369 )
4370 {
4371 switch (argTypes[argNum])
4372 {
4373 case ELEMENT_TYPE_R4:
4374 // movss xmm?, dword ptr [rsi + argOfs]
4375 X64EmitMovSSFromMem((X86Reg)argNum, kRSI, argOfs);
4376 break;
4377 case ELEMENT_TYPE_R8:
4378 // movsd xmm?, qword ptr [rsi + argOfs]
4379 X64EmitMovSDFromMem((X86Reg)argNum, kRSI, argOfs);
4380 break;
4381 default:
4382 if (c_argRegs[argNum] != THIS_kREG)
4383 {
4384 // mov r*, [rsi + dstOfs]
4385 X86EmitIndexRegLoad(c_argRegs[argNum], kESI,argOfs);
4386 }
4387 break;
4388 } // switch
4389 }
4390
4391 // mov SCRATCHREG, [rcx+Delegate._invocationList] ;;fetch invocation list
4392 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
4393
4394 // mov SCRATCHREG, [SCRATCHREG+m_Array+rdi*8] ;; index into invocation list
4395 X86EmitOp(0x8b, kEAX, SCRATCH_REGISTER_X86REG, static_cast<int>(PtrArray::GetDataOffset()), kEDI, sizeof(void*), k64BitOp);
4396
4397 // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
4398 X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
4399
4400 // call [SCRATCHREG+Delegate.target] ;; call current subscriber
4401 X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
4402
4403 // add rsp, stackUsed ;; Clean up stack
4404 X86EmitAddEsp(stackUsed);
4405
4406 // inc edi
4407 Emit16(0xC7FF);
4408
4409#else // _TARGET_AMD64_
4410
4411 UINT16 numStackBytes = static_cast<UINT16>(hash & ~3);
4412
4413 // ..repush & reenregister args..
4414 INT32 ofs = numStackBytes + MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4415 while (ofs != MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs())
4416 {
4417 ofs -= sizeof(void*);
4418 X86EmitIndexPush(kESI, ofs);
4419 }
4420
4421 #define ARGUMENT_REGISTER(regname) if (k##regname != THIS_kREG) { X86EmitIndexRegLoad(k##regname, kESI, \
4422 offsetof(ArgumentRegisters, regname) + MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); }
4423
4424 ENUM_ARGUMENT_REGISTERS_BACKWARD();
4425
4426 #undef ARGUMENT_REGISTER
4427
4428 // mov SCRATCHREG, [ecx+Delegate._invocationList] ;;fetch invocation list
4429 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
4430
4431 // mov SCRATCHREG, [SCRATCHREG+m_Array+edi*4] ;; index into invocation list
4432 X86EmitOp(0x8b, kEAX, SCRATCH_REGISTER_X86REG, PtrArray::GetDataOffset(), kEDI, sizeof(void*));
4433
4434 // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
4435 X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
4436
4437 // call [SCRATCHREG+Delegate.target] ;; call current subscriber
4438 X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
4439 INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
4440 // we know that this is a call that can directly call
4441 // managed code
4442
4443 // inc edi
4444 Emit8(0x47);
4445
4446 if (hash & 2) // CorTypeInfo::IsFloat(pSig->GetReturnType())
4447 {
4448 // if the return value is a float/double check if we just did the last call - if not,
4449 // emit the pop of the float stack
4450
4451 // mov SCRATCHREG, [esi + this] ;; get delegate
4452 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, kESI, thisRegOffset);
4453
4454 // cmp edi,[SCRATCHREG]._invocationCount
4455 X86EmitOffsetModRM(0x3b, kEDI, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfInvocationCount());
4456
4457 CodeLabel *pNoFloatStackPopLabel = NewCodeLabel();
4458
4459 // je NOFLOATSTACKPOP
4460 X86EmitCondJump(pNoFloatStackPopLabel, X86CondCode::kJZ);
4461
4462 // fstp 0
4463 Emit16(0xd8dd);
4464
4465 // NoFloatStackPopLabel:
4466 EmitLabel(pNoFloatStackPopLabel);
4467 }
4468
4469#endif // _TARGET_AMD64_
4470
4471 // The debugger may need to stop here, so grab the offset of this code.
4472 EmitPatchLabel();
4473
4474 // jmp LOOP
4475 X86EmitNearJump(pLoopLabel);
4476
4477 //ENDLOOP:
4478 EmitLabel(pEndLoopLabel);
4479
4480 // pop edi ;; Restore edi
4481 X86EmitPopReg(kEDI);
4482
4483 EmitCheckGSCookie(kESI, MulticastFrame::GetOffsetOfGSCookie());
4484
4485 // Epilog
4486 EmitMethodStubEpilog(numStackBytes, MulticastFrame::GetOffsetOfTransitionBlock());
4487}
4488
4489VOID StubLinkerCPU::EmitSecureDelegateInvoke(UINT_PTR hash)
4490{
4491 STANDARD_VM_CONTRACT;
4492
4493 int thisRegOffset = SecureDelegateFrame::GetOffsetOfTransitionBlock() +
4494 TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, THIS_REG);
4495
4496 // push the methoddesc on the stack
4497 // mov eax, [ecx + offsetof(_invocationCount)]
4498 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationCount());
4499
4500 // Push a SecureDelegateFrame on the stack.
4501 EmitMethodStubProlog(SecureDelegateFrame::GetMethodFrameVPtr(), SecureDelegateFrame::GetOffsetOfTransitionBlock());
4502
4503#ifdef _TARGET_X86_
4504 // Frame is ready to be inspected by debugger for patch location
4505 EmitPatchLabel();
4506#else // _TARGET_AMD64_
4507
4508 // Save register arguments in their home locations.
4509 // Non-FP registers are already saved by EmitMethodStubProlog.
4510 // (Assumes Sig.NextArg() does not enum RetBuffArg or "this".)
4511
4512 int argNum = 0;
4513 __int32 argOfs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4514 CorElementType argTypes[4];
4515 CorElementType argType;
4516
4517 // 'this'
4518 argOfs += sizeof(void*);
4519 argTypes[argNum] = ELEMENT_TYPE_I8;
4520 argNum++;
4521
4522 do
4523 {
4524 argType = ELEMENT_TYPE_END;
4525
4526 switch ((hash >> (2 * argNum)) & 3)
4527 {
4528 case 0:
4529 argType = ELEMENT_TYPE_END;
4530 break;
4531 case 1:
4532 argType = ELEMENT_TYPE_R4;
4533
4534 // movss dword ptr [rsp + argOfs], xmm?
4535 X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
4536 break;
4537 case 2:
4538 argType = ELEMENT_TYPE_R8;
4539
4540 // movsd qword ptr [rsp + argOfs], xmm?
4541 X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
4542 break;
4543 default:
4544 argType = ELEMENT_TYPE_I;
4545 break;
4546 }
4547
4548 argOfs += sizeof(void*);
4549 argTypes[argNum] = argType;
4550 argNum++;
4551 }
4552 while (argNum < 4 && ELEMENT_TYPE_END != argType);
4553
4554 _ASSERTE(4 == argNum || ELEMENT_TYPE_END == argTypes[argNum-1]);
4555
4556#endif // _TARGET_AMD64_
4557
4558 // mov ecx, [esi + this] ;; get delegate
4559 X86EmitIndexRegLoad(THIS_kREG, kESI, thisRegOffset);
4560
4561#ifdef _TARGET_AMD64_
4562
4563 INT32 numStackBytes = (INT32)((hash >> 8) * sizeof(void *));
4564
4565 INT32 stackUsed, numStackArgs, ofs;
4566
4567 // Push any stack args, plus an extra location
4568 // for rsp alignment if needed
4569
4570 numStackArgs = numStackBytes / sizeof(void*);
4571
4572 // 1 push above, so stack is currently misaligned
4573 const unsigned STACK_ALIGN_ADJUST = 0;
4574
4575 if (!numStackArgs)
4576 {
4577 // sub rsp, 28h ;; 4 reg arg home locs + rsp alignment
4578 stackUsed = 0x20 + STACK_ALIGN_ADJUST;
4579 X86EmitSubEsp(stackUsed);
4580 }
4581 else
4582 {
4583 stackUsed = numStackArgs * sizeof(void*);
4584
4585 // If the stack is misaligned, then an odd number of arguments
4586 // will naturally align the stack.
4587 if ( ((numStackArgs & 1) == 0)
4588 != (STACK_ALIGN_ADJUST == 0))
4589 {
4590 X86EmitPushReg(kRAX);
4591 stackUsed += sizeof(void*);
4592 }
4593
4594 ofs = SecureDelegateFrame::GetOffsetOfTransitionBlock() +
4595 TransitionBlock::GetOffsetOfArgs() + sizeof(ArgumentRegisters) + numStackBytes;
4596
4597 while (numStackArgs--)
4598 {
4599 ofs -= sizeof(void*);
4600
4601 // push [rsi + ofs] ;; Push stack args
4602 X86EmitIndexPush(kESI, ofs);
4603 }
4604
4605 // sub rsp, 20h ;; Create 4 reg arg home locations
4606 X86EmitSubEsp(0x20);
4607
4608 stackUsed += 0x20;
4609 }
4610
4611 int thisArgNum = 0;
4612
4613 for(
4614 argNum = 0, argOfs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4615 argNum < 4 && argTypes[argNum] != ELEMENT_TYPE_END;
4616 argNum++, argOfs += sizeof(void*)
4617 )
4618 {
4619 switch (argTypes[argNum])
4620 {
4621 case ELEMENT_TYPE_R4:
4622 // movss xmm?, dword ptr [rsi + argOfs]
4623 X64EmitMovSSFromMem((X86Reg)argNum, kRSI, argOfs);
4624 break;
4625 case ELEMENT_TYPE_R8:
4626 // movsd xmm?, qword ptr [rsi + argOfs]
4627 X64EmitMovSDFromMem((X86Reg)argNum, kRSI, argOfs);
4628 break;
4629 default:
4630 if (c_argRegs[argNum] != THIS_kREG)
4631 {
4632 // mov r*, [rsi + dstOfs]
4633 X86EmitIndexRegLoad(c_argRegs[argNum], kESI,argOfs);
4634 }
4635 break;
4636 } // switch
4637 }
4638
4639 // mov SCRATCHREG, [rcx+Delegate._invocationList] ;;fetch the inner delegate
4640 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
4641
4642 // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
4643 X86EmitIndexRegLoad(c_argRegs[thisArgNum], SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
4644
4645 // call [SCRATCHREG+Delegate.target] ;; call current subscriber
4646 X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
4647
4648 // add rsp, stackUsed ;; Clean up stack
4649 X86EmitAddEsp(stackUsed);
4650
4651#else // _TARGET_AMD64_
4652
4653 UINT16 numStackBytes = static_cast<UINT16>(hash & ~3);
4654
4655 // ..repush & reenregister args..
4656 INT32 ofs = numStackBytes + SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4657 while (ofs != SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs())
4658 {
4659 ofs -= sizeof(void*);
4660 X86EmitIndexPush(kESI, ofs);
4661 }
4662
4663 #define ARGUMENT_REGISTER(regname) if (k##regname != THIS_kREG) { X86EmitIndexRegLoad(k##regname, kESI, \
4664 offsetof(ArgumentRegisters, regname) + SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); }
4665
4666 ENUM_ARGUMENT_REGISTERS_BACKWARD();
4667
4668 #undef ARGUMENT_REGISTER
4669
4670 // mov SCRATCHREG, [ecx+Delegate._invocationList] ;;fetch the inner delegate
4671 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
4672
4673 // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
4674 X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
4675
4676 // call [SCRATCHREG+Delegate.target] ;; call current subscriber
4677 X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
4678 INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
4679 // we know that this is a call that can directly call
4680 // managed code
4681
4682#endif // _TARGET_AMD64_
4683
4684 // The debugger may need to stop here, so grab the offset of this code.
4685 EmitPatchLabel();
4686
4687 EmitCheckGSCookie(kESI, SecureDelegateFrame::GetOffsetOfGSCookie());
4688
4689 // Epilog
4690 EmitMethodStubEpilog(numStackBytes, SecureDelegateFrame::GetOffsetOfTransitionBlock());
4691}
4692#endif // !CROSSGEN_COMPILE && !FEATURE_STUBS_AS_IL
4693
4694#if !defined(CROSSGEN_COMPILE) && !defined(FEATURE_ARRAYSTUB_AS_IL)
4695
4696// Little helper to generate code to move nbytes bytes of non Ref memory
4697
4698void generate_noref_copy (unsigned nbytes, StubLinkerCPU* sl)
4699{
4700 CONTRACTL
4701 {
4702 THROWS;
4703 GC_NOTRIGGER;
4704 INJECT_FAULT(COMPlusThrowOM(););
4705 }
4706 CONTRACTL_END;
4707
4708 // If the size is pointer-aligned, we'll use movsd
4709 if (IS_ALIGNED(nbytes, sizeof(void*)))
4710 {
4711 // If there are less than 4 pointers to copy, "unroll" the "rep movsd"
4712 if (nbytes <= 3*sizeof(void*))
4713 {
4714 while (nbytes > 0)
4715 {
4716 // movsd
4717 sl->X86_64BitOperands();
4718 sl->Emit8(0xa5);
4719
4720 nbytes -= sizeof(void*);
4721 }
4722 }
4723 else
4724 {
4725 // mov ECX, size / 4
4726 sl->Emit8(0xb8+kECX);
4727 sl->Emit32(nbytes / sizeof(void*));
4728
4729 // repe movsd
4730 sl->Emit8(0xf3);
4731 sl->X86_64BitOperands();
4732 sl->Emit8(0xa5);
4733 }
4734 }
4735 else
4736 {
4737 // mov ECX, size
4738 sl->Emit8(0xb8+kECX);
4739 sl->Emit32(nbytes);
4740
4741 // repe movsb
4742 sl->Emit16(0xa4f3);
4743 }
4744}
4745
4746
4747X86Reg LoadArrayOpArg (
4748 UINT32 idxloc,
4749 StubLinkerCPU *psl,
4750 X86Reg kRegIfFromMem,
4751 UINT ofsadjust
4752 AMD64_ARG(StubLinkerCPU::X86OperandSize OperandSize = StubLinkerCPU::k64BitOp)
4753 )
4754{
4755 STANDARD_VM_CONTRACT;
4756
4757 if (!TransitionBlock::IsStackArgumentOffset(idxloc))
4758 return GetX86ArgumentRegisterFromOffset(idxloc - TransitionBlock::GetOffsetOfArgumentRegisters());
4759
4760 psl->X86EmitEspOffset(0x8b, kRegIfFromMem, idxloc + ofsadjust AMD64_ARG(OperandSize));
4761 return kRegIfFromMem;
4762}
4763
4764VOID StubLinkerCPU::EmitArrayOpStubThrow(unsigned exConst, unsigned cbRetArg)
4765{
4766 STANDARD_VM_CONTRACT;
4767
4768 //ArrayOpStub*Exception
4769 X86EmitPopReg(kESI);
4770 X86EmitPopReg(kEDI);
4771
4772 //mov CORINFO_NullReferenceException_ASM, %ecx
4773 Emit8(0xb8 | kECX);
4774 Emit32(exConst);
4775 //InternalExceptionWorker
4776
4777 X86EmitPopReg(kEDX);
4778 // add pArrayOpScript->m_cbretpop, %esp (was add %eax, %esp)
4779 Emit8(0x81);
4780 Emit8(0xc0 | 0x4);
4781 Emit32(cbRetArg);
4782 X86EmitPushReg(kEDX);
4783 X86EmitNearJump(NewExternalCodeLabel((PVOID)JIT_InternalThrow));
4784}
4785
4786//===========================================================================
4787// Emits code to do an array operation.
4788#ifdef _PREFAST_
4789#pragma warning(push)
4790#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
4791#endif
4792VOID StubLinkerCPU::EmitArrayOpStub(const ArrayOpScript* pArrayOpScript)
4793{
4794 STANDARD_VM_CONTRACT;
4795
4796 // This is the offset to the parameters/what's already pushed on the stack:
4797 // return address.
4798 const INT locsize = sizeof(void*);
4799
4800 // ArrayOpScript's stack offsets are built using ArgIterator, which
4801 // assumes a TransitionBlock has been pushed, which is not the case
4802 // here. rsp + ofsadjust should point at the first argument. Any further
4803 // stack modifications below need to adjust ofsadjust appropriately.
4804 // baseofsadjust needs to be the stack adjustment at the entry point -
4805 // this is used further below to compute how much stack space was used.
4806
4807 INT ofsadjust = locsize - (INT)sizeof(TransitionBlock);
4808
4809 // Register usage
4810 //
4811 // x86 AMD64
4812 // Inputs:
4813 // managed array THIS_kREG (ecx) THIS_kREG (rcx)
4814 // index 0 edx rdx
4815 // index 1/value <stack> r8
4816 // index 2/value <stack> r9
4817 // expected element type for LOADADDR eax rax rdx
4818 // Working registers:
4819 // total (accumulates unscaled offset) edi r10
4820 // factor (accumulates the slice factor) esi r11
4821 X86Reg kArrayRefReg = THIS_kREG;
4822#ifdef _TARGET_AMD64_
4823 const X86Reg kArrayMTReg = kR10;
4824 const X86Reg kTotalReg = kR10;
4825 const X86Reg kFactorReg = kR11;
4826#else
4827 const X86Reg kArrayMTReg = kESI;
4828 const X86Reg kTotalReg = kEDI;
4829 const X86Reg kFactorReg = kESI;
4830#endif
4831
4832#ifdef _TARGET_AMD64_
4833 // Simplifying assumption for fNeedPrologue.
4834 _ASSERTE(!pArrayOpScript->m_gcDesc || (pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER));
4835 // Simplifying assumption for saving rsi and rdi.
4836 _ASSERTE(!(pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER) || ArgIterator::IsArgPassedByRef(pArrayOpScript->m_elemsize));
4837
4838 // Cases where we need to make calls
4839 BOOL fNeedScratchArea = ( (pArrayOpScript->m_flags & (ArrayOpScript::NEEDSTYPECHECK | ArrayOpScript::NEEDSWRITEBARRIER))
4840 && ( pArrayOpScript->m_op == ArrayOpScript::STORE
4841 || ( pArrayOpScript->m_op == ArrayOpScript::LOAD
4842 && (pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER))));
4843
4844 // Cases where we need to copy large values
4845 BOOL fNeedRSIRDI = ( ArgIterator::IsArgPassedByRef(pArrayOpScript->m_elemsize)
4846 && ArrayOpScript::LOADADDR != pArrayOpScript->m_op);
4847
4848 BOOL fNeedPrologue = ( fNeedScratchArea
4849 || fNeedRSIRDI);
4850#endif
4851
4852 X86Reg kValueReg;
4853
4854 CodeLabel *Epilog = NewCodeLabel();
4855 CodeLabel *Inner_nullexception = NewCodeLabel();
4856 CodeLabel *Inner_rangeexception = NewCodeLabel();
4857 CodeLabel *Inner_typeMismatchexception = NULL;
4858
4859 //
4860 // Set up the stack frame.
4861 //
4862 //
4863 // x86:
4864 // value
4865 // <index n-1>
4866 // ...
4867 // <index 1>
4868 // return address
4869 // saved edi
4870 // esp -> saved esi
4871 //
4872 //
4873 // AMD64:
4874 // value, if rank > 2
4875 // ...
4876 // + 0x48 more indices
4877 // + 0x40 r9 home
4878 // + 0x38 r8 home
4879 // + 0x30 rdx home
4880 // + 0x28 rcx home
4881 // + 0x20 return address
4882 // + 0x18 scratch area (callee's r9)
4883 // + 0x10 scratch area (callee's r8)
4884 // + 8 scratch area (callee's rdx)
4885 // rsp -> scratch area (callee's rcx)
4886 //
4887 // If the element type is a value class w/ object references, then rsi
4888 // and rdi will also be saved above the scratch area:
4889 //
4890 // ...
4891 // + 0x28 saved rsi
4892 // + 0x20 saved rdi
4893 // + 0x18 scratch area (callee's r9)
4894 // + 0x10 scratch area (callee's r8)
4895 // + 8 scratch area (callee's rdx)
4896 // rsp -> scratch area (callee's rcx)
4897 //
4898 // And if no call or movsb is necessary, then the scratch area sits
4899 // directly under the MethodDesc*.
4900
4901 BOOL fSavedESI = FALSE;
4902 BOOL fSavedEDI = FALSE;
4903
4904#ifdef _TARGET_AMD64_
4905 if (fNeedPrologue)
4906 {
4907 // Save argument registers if we'll be making a call before using
4908 // them. Note that in this case the element value will always be an
4909 // object type, and never be in an xmm register.
4910
4911 if ( (pArrayOpScript->m_flags & ArrayOpScript::NEEDSTYPECHECK)
4912 && ArrayOpScript::STORE == pArrayOpScript->m_op)
4913 {
4914 // mov [rsp+0x08], rcx
4915 X86EmitEspOffset(0x89, kRCX, 0x08);
4916 X86EmitEspOffset(0x89, kRDX, 0x10);
4917 X86EmitEspOffset(0x89, kR8, 0x18);
4918
4919 if (pArrayOpScript->m_rank >= 2)
4920 X86EmitEspOffset(0x89, kR9, 0x20);
4921 }
4922
4923 if (fNeedRSIRDI)
4924 {
4925 X86EmitPushReg(kRSI);
4926 X86EmitPushReg(kRDI);
4927
4928 fSavedESI = fSavedEDI = TRUE;
4929
4930 ofsadjust += 0x10;
4931 }
4932
4933 if (fNeedScratchArea)
4934 {
4935 // Callee scratch area (0x8 for aligned esp)
4936 X86EmitSubEsp(sizeof(ArgumentRegisters) + 0x8);
4937 ofsadjust += sizeof(ArgumentRegisters) + 0x8;
4938 }
4939 }
4940#else
4941 // Preserve the callee-saved registers
4942 // NOTE: if you change the sequence of these pushes, you must also update:
4943 // ArrayOpStubNullException
4944 // ArrayOpStubRangeException
4945 // ArrayOpStubTypeMismatchException
4946 _ASSERTE( kTotalReg == kEDI);
4947 X86EmitPushReg(kTotalReg);
4948 _ASSERTE( kFactorReg == kESI);
4949 X86EmitPushReg(kFactorReg);
4950
4951 fSavedESI = fSavedEDI = TRUE;
4952
4953 ofsadjust += 2*sizeof(void*);
4954#endif
4955
4956 // Check for null.
4957 X86EmitR2ROp(0x85, kArrayRefReg, kArrayRefReg); // TEST ECX, ECX
4958 X86EmitCondJump(Inner_nullexception, X86CondCode::kJZ); // jz Inner_nullexception
4959
4960 // Do Type Check if needed
4961 if (pArrayOpScript->m_flags & ArrayOpScript::NEEDSTYPECHECK)
4962 {
4963 if (pArrayOpScript->m_op == ArrayOpScript::STORE)
4964 {
4965 // Get the value to be stored.
4966 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kEAX, ofsadjust);
4967
4968 X86EmitR2ROp(0x85, kValueReg, kValueReg); // TEST kValueReg, kValueReg
4969 CodeLabel *CheckPassed = NewCodeLabel();
4970 X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // storing NULL is OK
4971
4972 // mov EAX, element type ; possibly trashes kValueReg
4973 X86EmitOp(0x8b, kArrayMTReg, kArrayRefReg, 0 AMD64_ARG(k64BitOp)); // mov ESI/R10, [kArrayRefReg]
4974
4975 X86EmitOp(0x8b, kEAX, kValueReg, 0 AMD64_ARG(k64BitOp)); // mov EAX, [kValueReg] ; possibly trashes kValueReg
4976 // cmp EAX, [ESI/R10+m_ElementType]
4977
4978 X86EmitOp(0x3b, kEAX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
4979 X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // Exact match is OK
4980
4981 X86EmitRegLoad(kEAX, (UINT_PTR)g_pObjectClass); // mov EAX, g_pObjectMethodTable
4982 // cmp EAX, [ESI/R10+m_ElementType]
4983
4984 X86EmitOp(0x3b, kEAX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
4985 X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // Assigning to array of object is OK
4986
4987 // Try to call the fast helper first ( ObjIsInstanceOfNoGC ).
4988 // If that fails we will fall back to calling the slow helper ( ArrayStoreCheck ) that erects a frame.
4989 // See also JitInterfaceX86::JIT_Stelem_Ref
4990
4991#ifdef _TARGET_AMD64_
4992 // RCX contains pointer to object to check (Object*)
4993 // RDX contains array type handle
4994
4995 // mov RCX, [rsp+offsetToObject] ; RCX = Object*
4996 X86EmitEspOffset(0x8b, kRCX, ofsadjust + pArrayOpScript->m_fValLoc);
4997
4998 // get Array TypeHandle
4999 // mov RDX, [RSP+offsetOfTypeHandle]
5000
5001 X86EmitEspOffset(0x8b, kRDX, ofsadjust
5002 + TransitionBlock::GetOffsetOfArgumentRegisters()
5003 + FIELD_OFFSET(ArgumentRegisters, THIS_REG));
5004
5005 // mov RDX, [kArrayMTReg+offsetof(MethodTable, m_ElementType)]
5006 X86EmitIndexRegLoad(kRDX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle());
5007
5008#else
5009 X86EmitPushReg(kEDX); // Save EDX
5010 X86EmitPushReg(kECX); // Pass array object
5011
5012 X86EmitIndexPush(kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle()); // push [kArrayMTReg + m_ElementType] ; Array element type handle
5013
5014 // get address of value to store
5015 _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register
5016 X86EmitSPIndexPush(pArrayOpScript->m_fValLoc + ofsadjust + 3*sizeof(void*)); // push [ESP+offset] ; the object pointer
5017
5018#endif //_AMD64
5019
5020
5021 // emit a call to the fast helper
5022 // One side effect of this is that we are going to generate a "jnz Epilog" and we DON'T need it
5023 // in the fast path, however there are no side effects in emitting
5024 // it in the fast path anyway. the reason for that is that it makes
5025 // the cleanup code much easier ( we have only 1 place to cleanup the stack and
5026 // restore it to the original state )
5027 X86EmitCall(NewExternalCodeLabel((LPVOID)ObjIsInstanceOfNoGC), 0);
5028 X86EmitCmpRegImm32( kEAX, TypeHandle::CanCast); // CMP EAX, CanCast ; if ObjIsInstanceOfNoGC returns CanCast, we will go the fast path
5029 CodeLabel * Cleanup = NewCodeLabel();
5030 X86EmitCondJump(Cleanup, X86CondCode::kJZ);
5031
5032#ifdef _TARGET_AMD64_
5033 // get address of value to store
5034 // lea rcx, [rsp+offs]
5035 X86EmitEspOffset(0x8d, kRCX, ofsadjust + pArrayOpScript->m_fValLoc);
5036
5037 // get address of 'this'/rcx
5038 // lea rdx, [rsp+offs]
5039 X86EmitEspOffset(0x8d, kRDX, ofsadjust
5040 + TransitionBlock::GetOffsetOfArgumentRegisters()
5041 + FIELD_OFFSET(ArgumentRegisters, THIS_REG));
5042
5043#else
5044 // The stack is already setup correctly for the slow helper.
5045 _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register
5046 X86EmitEspOffset(0x8d, kECX, pArrayOpScript->m_fValLoc + ofsadjust + 2*sizeof(void*)); // lea ECX, [ESP+offset]
5047
5048 // get address of 'this'
5049 X86EmitEspOffset(0x8d, kEDX, 0); // lea EDX, [ESP] ; (address of ECX)
5050
5051
5052#endif
5053 AMD64_ONLY(_ASSERTE(fNeedScratchArea));
5054 X86EmitCall(NewExternalCodeLabel((LPVOID)ArrayStoreCheck), 0);
5055
5056 EmitLabel(Cleanup);
5057#ifdef _TARGET_AMD64_
5058 X86EmitEspOffset(0x8b, kRCX, 0x00 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
5059 X86EmitEspOffset(0x8b, kRDX, 0x08 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
5060 X86EmitEspOffset(0x8b, kR8, 0x10 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
5061
5062 if (pArrayOpScript->m_rank >= 2)
5063 X86EmitEspOffset(0x8b, kR9, 0x18 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
5064#else
5065 X86EmitPopReg(kECX); // restore regs
5066 X86EmitPopReg(kEDX);
5067
5068
5069 X86EmitR2ROp(0x3B, kEAX, kEAX); // CMP EAX, EAX
5070 X86EmitCondJump(Epilog, X86CondCode::kJNZ); // This branch never taken, but epilog walker uses it
5071#endif
5072
5073 EmitLabel(CheckPassed);
5074 }
5075 else
5076 {
5077 _ASSERTE(pArrayOpScript->m_op == ArrayOpScript::LOADADDR);
5078
5079 // Load up the hidden type parameter into 'typeReg'
5080 X86Reg typeReg = LoadArrayOpArg(pArrayOpScript->m_typeParamOffs, this, kEAX, ofsadjust);
5081
5082 // 'typeReg' holds the typeHandle for the ARRAY. This must be a ArrayTypeDesc*, so
5083 // mask off the low two bits to get the TypeDesc*
5084 X86EmitR2ROp(0x83, (X86Reg)4, typeReg); // AND typeReg, 0xFFFFFFFC
5085 Emit8(0xFC);
5086
5087 // If 'typeReg' is NULL then we're executing the readonly ::Address and no type check is
5088 // needed.
5089 CodeLabel *Inner_passedTypeCheck = NewCodeLabel();
5090
5091 X86EmitCondJump(Inner_passedTypeCheck, X86CondCode::kJZ);
5092
5093 // Get the parameter of the parameterize type
5094 // mov typeReg, [typeReg.m_Arg]
5095 X86EmitOp(0x8b, typeReg, typeReg, offsetof(ParamTypeDesc, m_Arg) AMD64_ARG(k64BitOp));
5096
5097 // Compare this against the element type of the array.
5098 // mov ESI/R10, [kArrayRefReg]
5099 X86EmitOp(0x8b, kArrayMTReg, kArrayRefReg, 0 AMD64_ARG(k64BitOp));
5100 // cmp typeReg, [ESI/R10+m_ElementType];
5101 X86EmitOp(0x3b, typeReg, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
5102
5103 // Throw error if not equal
5104 Inner_typeMismatchexception = NewCodeLabel();
5105 X86EmitCondJump(Inner_typeMismatchexception, X86CondCode::kJNZ);
5106 EmitLabel(Inner_passedTypeCheck);
5107 }
5108 }
5109
5110 CodeLabel* DoneCheckLabel = 0;
5111 if (pArrayOpScript->m_rank == 1 && pArrayOpScript->m_fHasLowerBounds)
5112 {
5113 DoneCheckLabel = NewCodeLabel();
5114 CodeLabel* NotSZArrayLabel = NewCodeLabel();
5115
5116 // for rank1 arrays, we might actually have two different layouts depending on
5117 // if we are ELEMENT_TYPE_ARRAY or ELEMENT_TYPE_SZARRAY.
5118
5119 // mov EAX, [ARRAY] // EAX holds the method table
5120 X86_64BitOperands();
5121 X86EmitOp(0x8b, kEAX, kArrayRefReg);
5122
5123 // test [EAX + m_dwFlags], enum_flag_Category_IfArrayThenSzArray
5124 X86_64BitOperands();
5125 X86EmitOffsetModRM(0xf7, (X86Reg)0, kEAX, MethodTable::GetOffsetOfFlags());
5126 Emit32(MethodTable::GetIfArrayThenSzArrayFlag());
5127
5128 // jz NotSZArrayLabel
5129 X86EmitCondJump(NotSZArrayLabel, X86CondCode::kJZ);
5130
5131 //Load the passed-in index into the scratch register.
5132 const ArrayOpIndexSpec *pai = pArrayOpScript->GetArrayOpIndexSpecs();
5133 X86Reg idxReg = LoadArrayOpArg(pai->m_idxloc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
5134
5135 // cmp idxReg, [kArrayRefReg + LENGTH]
5136 X86EmitOp(0x3b, idxReg, kArrayRefReg, ArrayBase::GetOffsetOfNumComponents());
5137
5138 // jae Inner_rangeexception
5139 X86EmitCondJump(Inner_rangeexception, X86CondCode::kJAE);
5140
5141 // <TODO> if we cared efficiency of this, this move can be optimized</TODO>
5142 X86EmitR2ROp(0x8b, kTotalReg, idxReg AMD64_ARG(k32BitOp));
5143
5144 // sub ARRAY. 8 // 8 is accounts for the Lower bound and Dim count in the ARRAY
5145 X86EmitSubReg(kArrayRefReg, 8); // adjust this pointer so that indexing works out for SZARRAY
5146
5147 X86EmitNearJump(DoneCheckLabel);
5148 EmitLabel(NotSZArrayLabel);
5149 }
5150
5151 // For each index, range-check and mix into accumulated total.
5152 UINT idx = pArrayOpScript->m_rank;
5153 BOOL firstTime = TRUE;
5154 while (idx--)
5155 {
5156 const ArrayOpIndexSpec *pai = pArrayOpScript->GetArrayOpIndexSpecs() + idx;
5157
5158 //Load the passed-in index into the scratch register.
5159 X86Reg srcreg = LoadArrayOpArg(pai->m_idxloc, this, SCRATCH_REGISTER_X86REG, ofsadjust AMD64_ARG(k32BitOp));
5160 if (SCRATCH_REGISTER_X86REG != srcreg)
5161 X86EmitR2ROp(0x8b, SCRATCH_REGISTER_X86REG, srcreg AMD64_ARG(k32BitOp));
5162
5163 // sub SCRATCH, dword ptr [kArrayRefReg + LOWERBOUND]
5164 if (pArrayOpScript->m_fHasLowerBounds)
5165 {
5166 X86EmitOp(0x2b, SCRATCH_REGISTER_X86REG, kArrayRefReg, pai->m_lboundofs);
5167 }
5168
5169 // cmp SCRATCH, dword ptr [kArrayRefReg + LENGTH]
5170 X86EmitOp(0x3b, SCRATCH_REGISTER_X86REG, kArrayRefReg, pai->m_lengthofs);
5171
5172 // jae Inner_rangeexception
5173 X86EmitCondJump(Inner_rangeexception, X86CondCode::kJAE);
5174
5175
5176 // SCRATCH == idx - LOWERBOUND
5177 //
5178 // imul SCRATCH, FACTOR
5179 if (!firstTime)
5180 {
5181 //Can skip the first time since FACTOR==1
5182 X86EmitR2ROp(0xaf0f, SCRATCH_REGISTER_X86REG, kFactorReg AMD64_ARG(k32BitOp));
5183 }
5184
5185 // TOTAL += SCRATCH
5186 if (firstTime)
5187 {
5188 // First time, we must zero-init TOTAL. Since
5189 // zero-initing and then adding is just equivalent to a
5190 // "mov", emit a "mov"
5191 // mov TOTAL, SCRATCH
5192 X86EmitR2ROp(0x8b, kTotalReg, SCRATCH_REGISTER_X86REG AMD64_ARG(k32BitOp));
5193 }
5194 else
5195 {
5196 // add TOTAL, SCRATCH
5197 X86EmitR2ROp(0x03, kTotalReg, SCRATCH_REGISTER_X86REG AMD64_ARG(k32BitOp));
5198 }
5199
5200 // FACTOR *= [kArrayRefReg + LENGTH]
5201 if (idx != 0)
5202 {
5203 // No need to update FACTOR on the last iteration
5204 // since we won't use it again
5205
5206 if (firstTime)
5207 {
5208 // must init FACTOR to 1 first: hence,
5209 // the "imul" becomes a "mov"
5210 // mov FACTOR, [kArrayRefReg + LENGTH]
5211 X86EmitOp(0x8b, kFactorReg, kArrayRefReg, pai->m_lengthofs);
5212 }
5213 else
5214 {
5215 // imul FACTOR, [kArrayRefReg + LENGTH]
5216 X86EmitOp(0xaf0f, kFactorReg, kArrayRefReg, pai->m_lengthofs);
5217 }
5218 }
5219
5220 firstTime = FALSE;
5221 }
5222
5223 if (DoneCheckLabel != 0)
5224 EmitLabel(DoneCheckLabel);
5225
5226 // Pass these values to X86EmitArrayOp() to generate the element address.
5227 X86Reg elemBaseReg = kArrayRefReg;
5228 X86Reg elemScaledReg = kTotalReg;
5229 UINT32 elemSize = pArrayOpScript->m_elemsize;
5230 UINT32 elemOfs = pArrayOpScript->m_ofsoffirst;
5231
5232 if (!(elemSize == 1 || elemSize == 2 || elemSize == 4 || elemSize == 8))
5233 {
5234 switch (elemSize)
5235 {
5236 // No way to express this as a SIB byte. Fold the scale
5237 // into TOTAL.
5238
5239 case 16:
5240 // shl TOTAL,4
5241 X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
5242 Emit8(4);
5243 break;
5244
5245 case 32:
5246 // shl TOTAL,5
5247 X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
5248 Emit8(5);
5249 break;
5250
5251 case 64:
5252 // shl TOTAL,6
5253 X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
5254 Emit8(6);
5255 break;
5256
5257 default:
5258 // imul TOTAL, elemScale
5259 X86EmitR2ROp(0x69, kTotalReg, kTotalReg AMD64_ARG(k32BitOp));
5260 Emit32(elemSize);
5261 break;
5262 }
5263 elemSize = 1;
5264 }
5265
5266 _ASSERTE(FitsInU1(elemSize));
5267 BYTE elemScale = static_cast<BYTE>(elemSize);
5268
5269 // Now, do the operation:
5270
5271 switch (pArrayOpScript->m_op)
5272 {
5273 case ArrayOpScript::LOADADDR:
5274 // lea eax, ELEMADDR
5275 X86EmitOp(0x8d, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
5276 break;
5277
5278 case ArrayOpScript::LOAD:
5279 if (pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER)
5280 {
5281 // Ensure that these registers have been saved!
5282 _ASSERTE(fSavedESI && fSavedEDI);
5283
5284 //lea esi, ELEMADDR
5285 X86EmitOp(0x8d, kESI, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
5286
5287 _ASSERTE(!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fRetBufLoc));
5288 // mov edi, retbufptr
5289 X86EmitR2ROp(0x8b, kEDI, GetX86ArgumentRegisterFromOffset(pArrayOpScript->m_fRetBufLoc - TransitionBlock::GetOffsetOfArgumentRegisters()));
5290
5291COPY_VALUE_CLASS:
5292 {
5293 size_t size = pArrayOpScript->m_elemsize;
5294 size_t total = 0;
5295 if(pArrayOpScript->m_gcDesc)
5296 {
5297 CGCDescSeries* cur = pArrayOpScript->m_gcDesc->GetHighestSeries();
5298 if ((cur->startoffset-elemOfs) > 0)
5299 generate_noref_copy ((unsigned) (cur->startoffset - elemOfs), this);
5300 total += cur->startoffset - elemOfs;
5301
5302 SSIZE_T cnt = (SSIZE_T) pArrayOpScript->m_gcDesc->GetNumSeries();
5303 // special array encoding
5304 _ASSERTE(cnt < 0);
5305
5306 for (SSIZE_T __i = 0; __i > cnt; __i--)
5307 {
5308 HALF_SIZE_T skip = cur->val_serie[__i].skip;
5309 HALF_SIZE_T nptrs = cur->val_serie[__i].nptrs;
5310 total += nptrs*sizeof (DWORD*);
5311 do
5312 {
5313 AMD64_ONLY(_ASSERTE(fNeedScratchArea));
5314
5315 X86EmitCall(NewExternalCodeLabel((LPVOID) JIT_ByRefWriteBarrier), 0);
5316 } while (--nptrs);
5317 if (skip > 0)
5318 {
5319 //check if we are at the end of the series
5320 if (__i == (cnt + 1))
5321 skip = skip - (HALF_SIZE_T)(cur->startoffset - elemOfs);
5322 if (skip > 0)
5323 generate_noref_copy (skip, this);
5324 }
5325 total += skip;
5326 }
5327
5328 _ASSERTE (size == total);
5329 }
5330 else
5331 {
5332 // no ref anywhere, just copy the bytes.
5333 _ASSERTE (size);
5334 generate_noref_copy ((unsigned)size, this);
5335 }
5336 }
5337 }
5338 else
5339 {
5340 switch (pArrayOpScript->m_elemsize)
5341 {
5342 case 1:
5343 // mov[zs]x eax, byte ptr ELEMADDR
5344 X86EmitOp(pArrayOpScript->m_signed ? 0xbe0f : 0xb60f, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5345 break;
5346
5347 case 2:
5348 // mov[zs]x eax, word ptr ELEMADDR
5349 X86EmitOp(pArrayOpScript->m_signed ? 0xbf0f : 0xb70f, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5350 break;
5351
5352 case 4:
5353 if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
5354 {
5355#ifdef _TARGET_AMD64_
5356 // movss xmm0, dword ptr ELEMADDR
5357 Emit8(0xf3);
5358 X86EmitOp(0x100f, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5359#else // !_TARGET_AMD64_
5360 // fld dword ptr ELEMADDR
5361 X86EmitOp(0xd9, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5362#endif // !_TARGET_AMD64_
5363 }
5364 else
5365 {
5366 // mov eax, ELEMADDR
5367 X86EmitOp(0x8b, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5368 }
5369 break;
5370
5371 case 8:
5372 if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
5373 {
5374#ifdef _TARGET_AMD64_
5375 // movsd xmm0, qword ptr ELEMADDR
5376 Emit8(0xf2);
5377 X86EmitOp(0x100f, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5378#else // !_TARGET_AMD64_
5379 // fld qword ptr ELEMADDR
5380 X86EmitOp(0xdd, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5381#endif // !_TARGET_AMD64_
5382 }
5383 else
5384 {
5385 // mov eax, ELEMADDR
5386 X86EmitOp(0x8b, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
5387#ifdef _TARGET_X86_
5388 // mov edx, ELEMADDR + 4
5389 X86EmitOp(0x8b, kEDX, elemBaseReg, elemOfs + 4, elemScaledReg, elemScale);
5390#endif
5391 }
5392 break;
5393
5394 default:
5395 _ASSERTE(0);
5396 }
5397 }
5398
5399 break;
5400
5401 case ArrayOpScript::STORE:
5402
5403 switch (pArrayOpScript->m_elemsize)
5404 {
5405 case 1:
5406 // mov SCRATCH, [esp + valoffset]
5407 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
5408 // mov byte ptr ELEMADDR, SCRATCH.b
5409 X86EmitOp(0x88, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5410 break;
5411 case 2:
5412 // mov SCRATCH, [esp + valoffset]
5413 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
5414 // mov word ptr ELEMADDR, SCRATCH.w
5415 Emit8(0x66);
5416 X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5417 break;
5418 case 4:
5419#ifndef _TARGET_AMD64_
5420 if (pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER)
5421 {
5422 // mov SCRATCH, [esp + valoffset]
5423 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
5424
5425 _ASSERTE(SCRATCH_REGISTER_X86REG == kEAX); // value to store is already in EAX where we want it.
5426 // lea edx, ELEMADDR
5427 X86EmitOp(0x8d, kEDX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5428
5429 // call JIT_Writeable_Thunks_Buf.WriteBarrierReg[0] (== EAX)
5430 X86EmitCall(NewExternalCodeLabel((LPVOID) &JIT_WriteBarrierEAX), 0);
5431 }
5432 else
5433#else // _TARGET_AMD64_
5434 if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
5435 {
5436 if (!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc))
5437 {
5438 kValueReg = (X86Reg)TransitionBlock::GetArgumentIndexFromOffset(pArrayOpScript->m_fValLoc);
5439 }
5440 else
5441 {
5442 kValueReg = (X86Reg)0; // xmm0
5443
5444 // movss xmm0, dword ptr [rsp+??]
5445 Emit8(0xf3);
5446 X86EmitOp(0x100f, kValueReg, (X86Reg)4 /*rsp*/, ofsadjust + pArrayOpScript->m_fValLoc);
5447 }
5448
5449 // movss dword ptr ELEMADDR, xmm?
5450 Emit8(0xf3);
5451 X86EmitOp(0x110f, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5452 }
5453 else
5454#endif // _TARGET_AMD64_
5455 {
5456 // mov SCRATCH, [esp + valoffset]
5457 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust AMD64_ARG(k32BitOp));
5458
5459 // mov ELEMADDR, SCRATCH
5460 X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5461 }
5462 break;
5463
5464 case 8:
5465
5466 if (!(pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER))
5467 {
5468#ifdef _TARGET_AMD64_
5469 if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
5470 {
5471 if (!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc))
5472 {
5473 kValueReg = (X86Reg)TransitionBlock::GetArgumentIndexFromOffset(pArrayOpScript->m_fValLoc);
5474 }
5475 else
5476 {
5477 kValueReg = (X86Reg)0; // xmm0
5478
5479 // movsd xmm0, qword ptr [rsp+??]
5480 Emit8(0xf2);
5481 X86EmitOp(0x100f, kValueReg, (X86Reg)4 /*rsp*/, ofsadjust + pArrayOpScript->m_fValLoc);
5482 }
5483
5484 // movsd qword ptr ELEMADDR, xmm?
5485 Emit8(0xf2);
5486 X86EmitOp(0x110f, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5487 }
5488 else
5489 {
5490 // mov SCRATCH, [esp + valoffset]
5491 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
5492
5493 // mov ELEMADDR, SCRATCH
5494 X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale, k64BitOp);
5495 }
5496#else // !_TARGET_AMD64_
5497 _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register: so too lazy to implement that case
5498 // mov SCRATCH, [esp + valoffset]
5499 X86EmitEspOffset(0x8b, SCRATCH_REGISTER_X86REG, pArrayOpScript->m_fValLoc + ofsadjust);
5500 // mov ELEMADDR, SCRATCH
5501 X86EmitOp(0x89, SCRATCH_REGISTER_X86REG, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5502
5503 _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register: so too lazy to implement that case
5504 // mov SCRATCH, [esp + valoffset + 4]
5505 X86EmitEspOffset(0x8b, SCRATCH_REGISTER_X86REG, pArrayOpScript->m_fValLoc + ofsadjust + 4);
5506 // mov ELEMADDR+4, SCRATCH
5507 X86EmitOp(0x89, SCRATCH_REGISTER_X86REG, elemBaseReg, elemOfs+4, elemScaledReg, elemScale);
5508#endif // !_TARGET_AMD64_
5509 break;
5510 }
5511#ifdef _TARGET_AMD64_
5512 else
5513 {
5514 _ASSERTE(SCRATCH_REGISTER_X86REG == kEAX); // value to store is already in EAX where we want it.
5515 // lea rcx, ELEMADDR
5516 X86EmitOp(0x8d, kRCX, elemBaseReg, elemOfs, elemScaledReg, elemScale, k64BitOp);
5517
5518 // mov rdx, [rsp + valoffset]
5519 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kRDX, ofsadjust);
5520 _ASSERT(kRCX != kValueReg);
5521 if (kRDX != kValueReg)
5522 X86EmitR2ROp(0x8b, kRDX, kValueReg);
5523
5524 _ASSERTE(fNeedScratchArea);
5525 X86EmitCall(NewExternalCodeLabel((PVOID)JIT_WriteBarrier), 0);
5526 break;
5527 }
5528#endif // _TARGET_AMD64_
5529 // FALL THROUGH (on x86)
5530 default:
5531 // Ensure that these registers have been saved!
5532 _ASSERTE(fSavedESI && fSavedEDI);
5533
5534#ifdef _TARGET_AMD64_
5535 // mov rsi, [rsp + valoffset]
5536 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kRSI, ofsadjust);
5537 if (kRSI != kValueReg)
5538 X86EmitR2ROp(0x8b, kRSI, kValueReg);
5539#else // !_TARGET_AMD64_
5540 _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc));
5541 // lea esi, [esp + valoffset]
5542 X86EmitEspOffset(0x8d, kESI, pArrayOpScript->m_fValLoc + ofsadjust);
5543#endif // !_TARGET_AMD64_
5544
5545 // lea edi, ELEMADDR
5546 X86EmitOp(0x8d, kEDI, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
5547 goto COPY_VALUE_CLASS;
5548 }
5549 break;
5550
5551 default:
5552 _ASSERTE(0);
5553 }
5554
5555 EmitLabel(Epilog);
5556
5557#ifdef _TARGET_AMD64_
5558 if (fNeedPrologue)
5559 {
5560 if (fNeedScratchArea)
5561 {
5562 // Throw away scratch area
5563 X86EmitAddEsp(sizeof(ArgumentRegisters) + 0x8);
5564 }
5565
5566 if (fSavedEDI)
5567 X86EmitPopReg(kRDI);
5568
5569 if (fSavedESI)
5570 X86EmitPopReg(kRSI);
5571 }
5572
5573 X86EmitReturn(0);
5574#else // !_TARGET_AMD64_
5575 // Restore the callee-saved registers
5576 X86EmitPopReg(kFactorReg);
5577 X86EmitPopReg(kTotalReg);
5578
5579#ifndef UNIX_X86_ABI
5580 // ret N
5581 X86EmitReturn(pArrayOpScript->m_cbretpop);
5582#else
5583 X86EmitReturn(0);
5584#endif
5585#endif // !_TARGET_AMD64_
5586
5587 // Exception points must clean up the stack for all those extra args.
5588 // kFactorReg and kTotalReg will be popped by the jump targets.
5589
5590 void *pvExceptionThrowFn;
5591
5592#if defined(_TARGET_AMD64_)
5593#define ARRAYOP_EXCEPTION_HELPERS(base) { (PVOID)base, (PVOID)base##_RSIRDI, (PVOID)base##_ScratchArea, (PVOID)base##_RSIRDI_ScratchArea }
5594 static void *rgNullExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubNullException);
5595 static void *rgRangeExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubRangeException);
5596 static void *rgTypeMismatchExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubTypeMismatchException);
5597#undef ARRAYOP_EXCEPTION_HELPERS
5598
5599 UINT iExceptionHelper = (fNeedRSIRDI ? 1 : 0) + (fNeedScratchArea ? 2 : 0);
5600#endif // defined(_TARGET_AMD64_)
5601
5602 EmitLabel(Inner_nullexception);
5603
5604#ifndef _TARGET_AMD64_
5605 pvExceptionThrowFn = (LPVOID)ArrayOpStubNullException;
5606
5607 Emit8(0xb8); // mov EAX, <stack cleanup>
5608 Emit32(pArrayOpScript->m_cbretpop);
5609#else //_TARGET_AMD64_
5610 pvExceptionThrowFn = rgNullExceptionHelpers[iExceptionHelper];
5611#endif //!_TARGET_AMD64_
5612 X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
5613
5614 EmitLabel(Inner_rangeexception);
5615#ifndef _TARGET_AMD64_
5616 pvExceptionThrowFn = (LPVOID)ArrayOpStubRangeException;
5617 Emit8(0xb8); // mov EAX, <stack cleanup>
5618 Emit32(pArrayOpScript->m_cbretpop);
5619#else //_TARGET_AMD64_
5620 pvExceptionThrowFn = rgRangeExceptionHelpers[iExceptionHelper];
5621#endif //!_TARGET_AMD64_
5622 X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
5623
5624 if (Inner_typeMismatchexception != NULL)
5625 {
5626 EmitLabel(Inner_typeMismatchexception);
5627#ifndef _TARGET_AMD64_
5628 pvExceptionThrowFn = (LPVOID)ArrayOpStubTypeMismatchException;
5629 Emit8(0xb8); // mov EAX, <stack cleanup>
5630 Emit32(pArrayOpScript->m_cbretpop);
5631#else //_TARGET_AMD64_
5632 pvExceptionThrowFn = rgTypeMismatchExceptionHelpers[iExceptionHelper];
5633#endif //!_TARGET_AMD64_
5634 X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
5635 }
5636}
5637#ifdef _PREFAST_
5638#pragma warning(pop)
5639#endif
5640
5641#endif // !CROSSGEN_COMPILE && !FEATURE_ARRAYSTUB_AS_IL
5642
5643#if !defined(CROSSGEN_COMPILE) && !defined(FEATURE_STUBS_AS_IL)
5644//===========================================================================
5645// Emits code to break into debugger
5646VOID StubLinkerCPU::EmitDebugBreak()
5647{
5648 STANDARD_VM_CONTRACT;
5649
5650 // int3
5651 Emit8(0xCC);
5652}
5653
5654#if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
5655
5656#ifdef _MSC_VER
5657#pragma warning(push)
5658#pragma warning (disable : 4740) // There is inline asm code in this function, which disables
5659 // global optimizations.
5660#pragma warning (disable : 4731)
5661#endif // _MSC_VER
5662Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame)
5663{
5664
5665 WRAPPER_NO_CONTRACT;
5666
5667 Thread *pThread = NULL;
5668
5669 HRESULT hr = S_OK;
5670
5671 // This means that a thread is FIRST coming in from outside the EE.
5672 BEGIN_ENTRYPOINT_THROWS;
5673 pThread = SetupThreadNoThrow(&hr);
5674 END_ENTRYPOINT_THROWS;
5675
5676 if (pThread == NULL) {
5677 // Unwind stack, and return hr
5678 // NOTE: assumes __stdcall
5679 // Note that this code does not handle the rare COM signatures that do not return HRESULT
5680 // compute the callee pop stack bytes
5681 UINT numArgStackBytes = pFrame->GetNumCallerStackBytes();
5682 unsigned frameSize = sizeof(Frame) + sizeof(LPVOID);
5683 LPBYTE iEsp = ((LPBYTE)pFrame) + ComMethodFrame::GetOffsetOfCalleeSavedRegisters();
5684 __asm
5685 {
5686 mov eax, hr
5687 mov edx, numArgStackBytes
5688 //*****************************************
5689 // reset the stack pointer
5690 // none of the locals above can be used in the asm below
5691 // if we wack the stack pointer
5692 mov esp, iEsp
5693 // pop callee saved registers
5694 pop edi
5695 pop esi
5696 pop ebx
5697 pop ebp
5698 pop ecx ; //return address
5699 // pop the callee cleanup stack args
5700 add esp, edx ;// callee cleanup of args
5701 jmp ecx; // jump to the address to continue execution
5702
5703 // We will never get here. This "ret" is just so that code-disassembling
5704 // profilers know to stop disassembling any further
5705 ret
5706 }
5707 }
5708
5709 return pThread;
5710}
5711#if defined(_MSC_VER)
5712#pragma warning(pop)
5713#endif
5714
5715#endif // FEATURE_COMINTEROP && _TARGET_X86_
5716
5717#endif // !CROSSGEN_COMPILE && !FEATURE_STUBS_AS_IL
5718
5719#endif // !DACCESS_COMPILE
5720
5721
5722#ifdef _TARGET_AMD64_
5723
5724//
5725// TailCallFrame Object Scanning
5726//
5727// This handles scanning/promotion of GC objects that were
5728// protected by the TailCallHelper routine. Note that the objects
5729// being protected is somewhat dynamic and is dependent upon the
5730// the callee...
5731//
5732
5733void TailCallFrame::GcScanRoots(promote_func *fn, ScanContext* sc)
5734{
5735 WRAPPER_NO_CONTRACT;
5736
5737 if (m_pGCLayout != NULL)
5738 {
5739 struct FrameOffsetDecoder {
5740 private:
5741 TADDR prevOffset;
5742 TADDR rangeEnd;
5743 BOOL maybeInterior;
5744 BOOL atEnd;
5745 PTR_SBYTE pbOffsets;
5746
5747 DWORD ReadNumber() {
5748 signed char i;
5749 DWORD offset = 0;
5750 while ((i = *pbOffsets++) >= 0)
5751 {
5752 offset = (offset << 7) | i;
5753 }
5754 offset = (offset << 7) | (i & 0x7F);
5755 return offset;
5756 }
5757
5758 public:
5759 FrameOffsetDecoder(PTR_GSCookie _base, TADDR offsets)
5760 : prevOffset(dac_cast<TADDR>(_base)), rangeEnd(~0LL), atEnd(FALSE), pbOffsets(dac_cast<PTR_SBYTE>(offsets)) { maybeInterior = FALSE;}
5761
5762 bool MoveNext() {
5763 LIMITED_METHOD_CONTRACT;
5764
5765 if (rangeEnd < prevOffset)
5766 {
5767 prevOffset -= sizeof(void*);
5768 return true;
5769 }
5770 if (atEnd) return false;
5771 DWORD offset = ReadNumber();
5772 atEnd = (offset & 1);
5773 BOOL range = (offset & 2);
5774 maybeInterior = (offset & 0x80000000);
5775
5776 offset &= 0x7FFFFFFC;
5777
5778#ifdef _WIN64
5779 offset <<= 1;
5780#endif
5781 offset += sizeof(void*);
5782 _ASSERTE(prevOffset > offset);
5783 prevOffset -= offset;
5784
5785 if (range)
5786 {
5787 _ASSERTE(!atEnd);
5788 _ASSERTE(!maybeInterior);
5789 DWORD offsetEnd = ReadNumber();
5790 atEnd = (offsetEnd & 1);
5791 offsetEnd = (offsetEnd & ~1) << 1;
5792 // range encoding starts with a range of 3 (2 is better to encode as
5793 // 2 offsets), so 0 == 2 (the last offset in the range)
5794 offsetEnd += sizeof(void*) * 2;
5795 rangeEnd = prevOffset - offsetEnd;
5796 }
5797
5798 return true;
5799 }
5800
5801 BOOL MaybeInterior() const { return maybeInterior; }
5802
5803 PTR_PTR_Object Current() const { return PTR_PTR_Object(prevOffset); }
5804
5805 } decoder(GetGSCookiePtr(), m_pGCLayout);
5806
5807 while (decoder.MoveNext())
5808 {
5809 PTR_PTR_Object ppRef = decoder.Current();
5810
5811 LOG((LF_GC, INFO3, "Tail Call Frame Promoting" FMT_ADDR "to",
5812 DBG_ADDR(OBJECTREF_TO_UNCHECKED_OBJECTREF(*ppRef)) ));
5813 if (decoder.MaybeInterior())
5814 PromoteCarefully(fn, ppRef, sc, GC_CALL_INTERIOR|CHECK_APP_DOMAIN);
5815 else
5816 (*fn)(ppRef, sc, 0);
5817 LOG((LF_GC, INFO3, FMT_ADDR "\n", DBG_ADDR(OBJECTREF_TO_UNCHECKED_OBJECTREF(*ppRef)) ));
5818 }
5819 }
5820}
5821
5822#ifndef DACCESS_COMPILE
5823static void EncodeOneGCOffset(CPUSTUBLINKER *pSl, ULONG delta, BOOL maybeInterior, BOOL range, BOOL last)
5824{
5825 CONTRACTL
5826 {
5827 THROWS; // From the stublinker
5828 MODE_ANY;
5829 GC_NOTRIGGER;
5830 }
5831 CONTRACTL_END;
5832
5833 // Everything should be pointer aligned
5834 // but we use a high bit for interior, and the 0 bit to denote the end of the list
5835 // we use the 1 bit to denote a range
5836 _ASSERTE((delta % sizeof(void*)) == 0);
5837
5838#if defined(_WIN64)
5839 // For 64-bit, we have 3 bits of alignment, so we allow larger frames
5840 // by shifting and gaining a free high-bit.
5841 ULONG encodedDelta = delta >> 1;
5842#else
5843 // For 32-bit, we just limit our frame size to <2GB. (I know, such a bummer!)
5844 ULONG encodedDelta = delta;
5845#endif
5846 _ASSERTE((encodedDelta & 0x80000003) == 0);
5847 if (last)
5848 {
5849 encodedDelta |= 1;
5850 }
5851
5852 if (range)
5853 {
5854 encodedDelta |= 2;
5855 }
5856 else if (maybeInterior)
5857 {
5858 _ASSERTE(!range);
5859 encodedDelta |= 0x80000000;
5860 }
5861
5862 BYTE bytes[5];
5863 UINT index = 5;
5864 bytes[--index] = (BYTE)((encodedDelta & 0x7F) | 0x80);
5865 encodedDelta >>= 7;
5866 while (encodedDelta > 0)
5867 {
5868 bytes[--index] = (BYTE)(encodedDelta & 0x7F);
5869 encodedDelta >>= 7;
5870 }
5871 pSl->EmitBytes(&bytes[index], 5 - index);
5872}
5873
5874static void EncodeGCOffsets(CPUSTUBLINKER *pSl, /* const */ ULONGARRAY & gcOffsets)
5875{
5876 CONTRACTL
5877 {
5878 THROWS;
5879 MODE_ANY;
5880 GC_NOTRIGGER;
5881 }
5882 CONTRACTL_END;
5883
5884 _ASSERTE(gcOffsets.Count() > 0);
5885
5886 ULONG prevOffset = 0;
5887 int i = 0;
5888 BOOL last = FALSE;
5889 do {
5890 ULONG offset = gcOffsets[i];
5891 // Everything should be pointer aligned
5892 // but we use the 0-bit to mean maybeInterior, for byrefs.
5893 _ASSERTE(((offset % sizeof(void*)) == 0) || ((offset % sizeof(void*)) == 1));
5894 BOOL maybeInterior = (offset & 1);
5895 offset &= ~1;
5896
5897 // Encode just deltas because they're smaller (and the list should be sorted)
5898 _ASSERTE(offset >= (prevOffset + sizeof(void*)));
5899 ULONG delta = offset - (prevOffset + sizeof(void*));
5900 if (!maybeInterior && gcOffsets.Count() > i + 2)
5901 {
5902 // Check for a potential range.
5903 // Only do it if we have 3 or more pointers in a row
5904 ULONG rangeOffset = offset;
5905 int j = i + 1;
5906 do {
5907 ULONG nextOffset = gcOffsets[j];
5908 // interior pointers can't be in ranges
5909 if (nextOffset & 1)
5910 break;
5911 // ranges must be saturated
5912 if (nextOffset != (rangeOffset + sizeof(void*)))
5913 break;
5914 j++;
5915 rangeOffset = nextOffset;
5916 } while(j < gcOffsets.Count());
5917
5918 if (j > (i + 2))
5919 {
5920 EncodeOneGCOffset(pSl, delta, FALSE, TRUE, last);
5921 i = j - 1;
5922 _ASSERTE(rangeOffset >= (offset + (sizeof(void*) * 2)));
5923 delta = rangeOffset - (offset + (sizeof(void*) * 2));
5924 offset = rangeOffset;
5925 }
5926 }
5927 last = (++i == gcOffsets.Count());
5928
5929
5930 EncodeOneGCOffset(pSl, delta, maybeInterior, FALSE, last);
5931
5932 prevOffset = offset;
5933 } while (!last);
5934}
5935
5936static void AppendGCLayout(ULONGARRAY &gcLayout, size_t baseOffset, BOOL fIsTypedRef, TypeHandle VMClsHnd)
5937{
5938 STANDARD_VM_CONTRACT;
5939
5940 _ASSERTE((baseOffset % 16) == 0);
5941 _ASSERTE(FitsInU4(baseOffset));
5942
5943 if (fIsTypedRef)
5944 {
5945 *gcLayout.AppendThrowing() = (ULONG)(baseOffset | 1); // "| 1" to mark it as an interior pointer
5946 }
5947 else if (!VMClsHnd.IsNativeValueType())
5948 {
5949 MethodTable* pMT = VMClsHnd.GetMethodTable();
5950 _ASSERTE(pMT);
5951 _ASSERTE(pMT->IsValueType());
5952
5953 BOOL isByRefLike = pMT->IsByRefLike();
5954 if (isByRefLike)
5955 {
5956 FindByRefPointerOffsetsInByRefLikeObject(
5957 pMT,
5958 0 /* baseOffset */,
5959 [&](size_t pointerOffset)
5960 {
5961 // 'gcLayout' requires stack offsets relative to the top of the stack to be recorded, such that subtracting
5962 // the offset from the stack top yields the address of the field, given that subtracting 'baseOffset' from
5963 // the stack top yields the address of the first field in this struct. See TailCallFrame::GcScanRoots() for
5964 // how these offsets are used to calculate stack addresses for fields.
5965 _ASSERTE(pointerOffset < baseOffset);
5966 size_t stackOffsetFromTop = baseOffset - pointerOffset;
5967 _ASSERTE(FitsInU4(stackOffsetFromTop));
5968
5969 // Offsets in 'gcLayout' are expected to be in increasing order
5970 int gcLayoutInsertIndex = gcLayout.Count();
5971 _ASSERTE(gcLayoutInsertIndex >= 0);
5972 for (; gcLayoutInsertIndex != 0; --gcLayoutInsertIndex)
5973 {
5974 ULONG prevStackOffsetFromTop = gcLayout[gcLayoutInsertIndex - 1] & ~(ULONG)1;
5975 if (stackOffsetFromTop > prevStackOffsetFromTop)
5976 {
5977 break;
5978 }
5979 if (stackOffsetFromTop == prevStackOffsetFromTop)
5980 {
5981 return;
5982 }
5983 }
5984
5985 _ASSERTE(gcLayout.Count() == 0 || stackOffsetFromTop > (gcLayout[gcLayout.Count() - 1] & ~(ULONG)1));
5986 *gcLayout.InsertThrowing(gcLayoutInsertIndex) = (ULONG)(stackOffsetFromTop | 1); // "| 1" to mark it as an interior pointer
5987 });
5988 }
5989
5990 // walk the GC descriptors, reporting the correct offsets
5991 if (pMT->ContainsPointers())
5992 {
5993 // size of instance when unboxed must be adjusted for the syncblock
5994 // index and the VTable pointer.
5995 DWORD size = pMT->GetBaseSize();
5996
5997 // we don't include this term in our 'ppstop' calculation below.
5998 _ASSERTE(pMT->GetComponentSize() == 0);
5999
6000 CGCDesc* map = CGCDesc::GetCGCDescFromMT(pMT);
6001 CGCDescSeries* cur = map->GetLowestSeries();
6002 CGCDescSeries* last = map->GetHighestSeries();
6003
6004 _ASSERTE(cur <= last);
6005 do
6006 {
6007 // offset to embedded references in this series must be
6008 // adjusted by the VTable pointer, when in the unboxed state.
6009 size_t adjustOffset = cur->GetSeriesOffset() - sizeof(void *);
6010
6011 _ASSERTE(baseOffset >= adjustOffset);
6012 size_t start = baseOffset - adjustOffset;
6013 size_t stop = start - (cur->GetSeriesSize() + size);
6014 for (size_t off = stop + sizeof(void*); off <= start; off += sizeof(void*))
6015 {
6016 _ASSERTE(FitsInU4(off));
6017
6018 int gcLayoutInsertIndex = gcLayout.Count();
6019 _ASSERTE(gcLayoutInsertIndex >= 0);
6020 if (isByRefLike)
6021 {
6022 // Offsets in 'gcLayout' are expected to be in increasing order and for by-ref-like types the by-refs would
6023 // have already been inserted into 'gcLayout' above. Find the appropriate index at which to insert this
6024 // offset.
6025 while (gcLayoutInsertIndex != 0 && off < gcLayout[gcLayoutInsertIndex - 1])
6026 {
6027 --gcLayoutInsertIndex;
6028 _ASSERTE(off != (gcLayout[gcLayoutInsertIndex] & ~(ULONG)1));
6029 }
6030 }
6031
6032 _ASSERTE(gcLayoutInsertIndex == 0 || off > (gcLayout[gcLayoutInsertIndex - 1] & ~(ULONG)1));
6033 *gcLayout.InsertThrowing(gcLayoutInsertIndex) = (ULONG)off;
6034 }
6035 cur++;
6036
6037 } while (cur <= last);
6038 }
6039 }
6040}
6041
6042Stub * StubLinkerCPU::CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig,
6043 MethodDesc* pMD,
6044 CorInfoHelperTailCallSpecialHandling flags)
6045{
6046 STANDARD_VM_CONTRACT;
6047
6048 CPUSTUBLINKER sl;
6049 CPUSTUBLINKER* pSl = &sl;
6050
6051 // Generates a function that looks like this:
6052 // size_t CopyArguments(va_list args, (RCX)
6053 // CONTEXT *pCtx, (RDX)
6054 // DWORD64 *pvStack, (R8)
6055 // size_t cbStack) (R9)
6056 // {
6057 // if (pCtx != NULL) {
6058 // foreach (arg in args) {
6059 // copy into pCtx or pvStack
6060 // }
6061 // }
6062 // return <size of stack needed>;
6063 // }
6064 //
6065
6066 CodeLabel *pNullLabel = pSl->NewCodeLabel();
6067
6068 // test rdx, rdx
6069 pSl->X86EmitR2ROp(0x85, kRDX, kRDX);
6070
6071 // jz NullLabel
6072 pSl->X86EmitCondJump(pNullLabel, X86CondCode::kJZ);
6073
6074 UINT nArgSlot = 0;
6075 UINT totalArgs = pSig->totalILArgs() + ((pSig->isVarArg() || pSig->hasTypeArg()) ? 1 : 0);
6076 bool fR10Loaded = false;
6077 UINT cbArg;
6078 static const UINT rgcbArgRegCtxtOffsets[4] = { offsetof(CONTEXT, Rcx), offsetof(CONTEXT, Rdx),
6079 offsetof(CONTEXT, R8), offsetof(CONTEXT, R9) };
6080 static const UINT rgcbFpArgRegCtxtOffsets[4] = { offsetof(CONTEXT, Xmm0.Low), offsetof(CONTEXT, Xmm1.Low),
6081 offsetof(CONTEXT, Xmm2.Low), offsetof(CONTEXT, Xmm3.Low) };
6082
6083 ULONGARRAY gcLayout;
6084
6085 // On input to the function R9 contains the size of the buffer
6086 // The first time this macro runs, R10 is loaded with the 'top' of the Frame
6087 // and R9 is changed to point to the 'top' of the copy buffer.
6088 // Then both R9 and R10 are decremented by the size of the struct we're copying
6089 // So R10 is the value to put in the argument slot, and R9 is where the data
6090 // should be copied to (or zeroed out in the case of the return buffer).
6091#define LOAD_STRUCT_OFFSET_IF_NEEDED(cbSize) \
6092 { \
6093 _ASSERTE(cbSize > 0); \
6094 _ASSERTE(FitsInI4(cbSize)); \
6095 __int32 offset = (__int32)cbSize; \
6096 if (!fR10Loaded) { \
6097 /* mov r10, [rdx + offset of RSP] */ \
6098 pSl->X86EmitIndexRegLoad(kR10, kRDX, offsetof(CONTEXT, Rsp)); \
6099 /* add an extra 8 because RSP is pointing at the return address */ \
6100 offset -= 8; \
6101 /* add r10, r9 */ \
6102 pSl->X86EmitAddRegReg(kR10, kR9); \
6103 /* add r9, r8 */ \
6104 pSl->X86EmitAddRegReg(kR9, kR8); \
6105 fR10Loaded = true; \
6106 } \
6107 /* sub r10, offset */ \
6108 pSl->X86EmitSubReg(kR10, offset); \
6109 /* sub r9, cbSize */ \
6110 pSl->X86EmitSubReg(kR9, cbSize); \
6111 }
6112
6113
6114 if (flags & CORINFO_TAILCALL_STUB_DISPATCH_ARG) {
6115 // This is set for stub dispatch
6116 // The JIT placed an extra argument in the list that needs to
6117 // get shoved into R11, and not counted.
6118 // pCtx->R11 = va_arg(args, DWORD64);
6119
6120 // mov rax, [rcx]
6121 pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
6122 // add rcx, 8
6123 pSl->X86EmitAddReg(kRCX, 8);
6124 // mov [rdx + offset of R11], rax
6125 pSl->X86EmitIndexRegStore(kRDX, offsetof(CONTEXT, R11), kRAX);
6126 }
6127
6128 ULONG cbStructOffset = 0;
6129
6130 // First comes the 'this' pointer
6131 if (pSig->hasThis()) {
6132 // mov rax, [rcx]
6133 pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
6134 // add rcx, 8
6135 pSl->X86EmitAddReg(kRCX, 8);
6136 // mov [rdx + offset of RCX/RDX], rax
6137 pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
6138 }
6139
6140 // Next the return buffer
6141 cbArg = 0;
6142 TypeHandle th(pSig->retTypeClass);
6143 if ((pSig->retType == CORINFO_TYPE_REFANY) || (pSig->retType == CORINFO_TYPE_VALUECLASS)) {
6144 cbArg = th.GetSize();
6145 }
6146
6147 if (ArgIterator::IsArgPassedByRef(cbArg)) {
6148 totalArgs++;
6149
6150 // We always reserve space for the return buffer, and we always zero it out,
6151 // so the GC won't complain, but if it's already pointing above the frame,
6152 // then we need to pass it in (so it will get passed out).
6153 // Otherwise we assume the caller is returning void, so we just pass in
6154 // dummy space to be overwritten.
6155 UINT cbUsed = (cbArg + 0xF) & ~0xF;
6156 LOAD_STRUCT_OFFSET_IF_NEEDED(cbUsed);
6157 // now emit a 'memset(r9, 0, cbUsed)'
6158 {
6159 // xorps xmm0, xmm0
6160 pSl->X86EmitR2ROp(X86_INSTR_XORPS, kXMM0, kXMM0);
6161 if (cbUsed <= 4 * 16) {
6162 // movaps [r9], xmm0
6163 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0);
6164 if (16 < cbUsed) {
6165 // movaps [r9 + 16], xmm0
6166 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 16);
6167 if (32 < cbUsed) {
6168 // movaps [r9 + 32], xmm0
6169 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 32);
6170 if (48 < cbUsed) {
6171 // movaps [r9 + 48], xmm0
6172 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 48);
6173 }
6174 }
6175 }
6176 }
6177 else {
6178 // a loop (one double-quadword at a time)
6179 pSl->X86EmitZeroOutReg(kR11);
6180 // LoopLabel:
6181 CodeLabel *pLoopLabel = pSl->NewCodeLabel();
6182 pSl->EmitLabel(pLoopLabel);
6183 // movaps [r9 + r11], xmm0
6184 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0, kR11, 1);
6185 // add r11, 16
6186 pSl->X86EmitAddReg(kR11, 16);
6187 // cmp r11, cbUsed
6188 pSl->X86EmitCmpRegImm32(kR11, cbUsed);
6189 // jl LoopLabel
6190 pSl->X86EmitCondJump(pLoopLabel, X86CondCode::kJL);
6191 }
6192 }
6193 cbStructOffset += cbUsed;
6194 AppendGCLayout(gcLayout, cbStructOffset, pSig->retType == CORINFO_TYPE_REFANY, th);
6195
6196 // mov rax, [rcx]
6197 pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
6198 // add rcx, 8
6199 pSl->X86EmitAddReg(kRCX, 8);
6200 // cmp rax, [rdx + offset of R12]
6201 pSl->X86EmitOffsetModRM(0x3B, kRAX, kRDX, offsetof(CONTEXT, R12));
6202
6203 CodeLabel *pSkipLabel = pSl->NewCodeLabel();
6204 // jnb SkipLabel
6205 pSl->X86EmitCondJump(pSkipLabel, X86CondCode::kJNB);
6206
6207 // Also check the lower bound of the stack in case the return buffer is on the GC heap
6208 // and the GC heap is below the stack
6209 // cmp rax, rsp
6210 pSl->X86EmitR2ROp(0x3B, kRAX, (X86Reg)4 /*kRSP*/);
6211 // jna SkipLabel
6212 pSl->X86EmitCondJump(pSkipLabel, X86CondCode::kJB);
6213 // mov rax, r10
6214 pSl->X86EmitMovRegReg(kRAX, kR10);
6215 // SkipLabel:
6216 pSl->EmitLabel(pSkipLabel);
6217 // mov [rdx + offset of RCX], rax
6218 pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
6219 }
6220
6221 // VarArgs Cookie *or* Generics Instantiation Parameter
6222 if (pSig->hasTypeArg() || pSig->isVarArg()) {
6223 // mov rax, [rcx]
6224 pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
6225 // add rcx, 8
6226 pSl->X86EmitAddReg(kRCX, 8);
6227 // mov [rdx + offset of RCX/RDX], rax
6228 pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
6229 }
6230
6231 _ASSERTE(nArgSlot <= 4);
6232
6233 // Now for *all* the 'real' arguments
6234 SigPointer ptr((PCCOR_SIGNATURE)pSig->args);
6235 Module * module = GetModule(pSig->scope);
6236 Instantiation classInst((TypeHandle*)pSig->sigInst.classInst, pSig->sigInst.classInstCount);
6237 Instantiation methodInst((TypeHandle*)pSig->sigInst.methInst, pSig->sigInst.methInstCount);
6238 SigTypeContext typeCtxt(classInst, methodInst);
6239
6240 for( ;nArgSlot < totalArgs; ptr.SkipExactlyOne()) {
6241 CorElementType et = ptr.PeekElemTypeNormalized(module, &typeCtxt);
6242 if (et == ELEMENT_TYPE_SENTINEL)
6243 continue;
6244
6245 // mov rax, [rcx]
6246 pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
6247 // add rcx, 8
6248 pSl->X86EmitAddReg(kRCX, 8);
6249 switch (et) {
6250 case ELEMENT_TYPE_INTERNAL:
6251 // TODO
6252 _ASSERTE(!"Shouldn't see ELEMENT_TYPE_INTERNAL");
6253 break;
6254 case ELEMENT_TYPE_TYPEDBYREF:
6255 case ELEMENT_TYPE_VALUETYPE:
6256 th = ptr.GetTypeHandleThrowing(module, &typeCtxt, ClassLoader::LoadTypes, CLASS_LOAD_UNRESTOREDTYPEKEY);
6257 _ASSERTE(!th.IsNull());
6258 g_IBCLogger.LogEEClassAndMethodTableAccess(th.GetMethodTable());
6259 cbArg = (UINT)th.GetSize();
6260 if (ArgIterator::IsArgPassedByRef(cbArg)) {
6261 UINT cbUsed = (cbArg + 0xF) & ~0xF;
6262 LOAD_STRUCT_OFFSET_IF_NEEDED(cbUsed);
6263 // rax has the source pointer
6264 // r9 has the intermediate copy location
6265 // r10 has the final destination
6266 if (nArgSlot < 4) {
6267 pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kR10);
6268 }
6269 else {
6270 pSl->X86EmitIndexRegStore(kR8, 8 * nArgSlot++, kR10);
6271 }
6272 // now emit a 'memcpy(rax, r9, cbUsed)'
6273 // These structs are supposed to be 16-byte aligned, but
6274 // Reflection puts them on the GC heap, which is only 8-byte
6275 // aligned. It also means we have to be careful about not
6276 // copying too much (because we might cross a page boundary)
6277 UINT cbUsed16 = (cbArg + 7) & ~0xF;
6278 _ASSERTE((cbUsed16 == cbUsed) || ((cbUsed16 + 16) == cbUsed));
6279
6280 if (cbArg <= 192) {
6281 // Unrolled version (6 x 16 bytes in parallel)
6282 UINT offset = 0;
6283 while (offset < cbUsed16) {
6284 // movups xmm0, [rax + offset]
6285 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM0, kRAX, offset);
6286 if (offset + 16 < cbUsed16) {
6287 // movups xmm1, [rax + offset + 16]
6288 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM1, kRAX, offset + 16);
6289 if (offset + 32 < cbUsed16) {
6290 // movups xmm2, [rax + offset + 32]
6291 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM2, kRAX, offset + 32);
6292 if (offset + 48 < cbUsed16) {
6293 // movups xmm3, [rax + offset + 48]
6294 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM3, kRAX, offset + 48);
6295 if (offset + 64 < cbUsed16) {
6296 // movups xmm4, [rax + offset + 64]
6297 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM4, kRAX, offset + 64);
6298 if (offset + 80 < cbUsed16) {
6299 // movups xmm5, [rax + offset + 80]
6300 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM5, kRAX, offset + 80);
6301 }
6302 }
6303 }
6304 }
6305 }
6306 // movaps [r9 + offset], xmm0
6307 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, offset);
6308 offset += 16;
6309 if (offset < cbUsed16) {
6310 // movaps [r9 + 16], xmm1
6311 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM1, kR9, offset);
6312 offset += 16;
6313 if (offset < cbUsed16) {
6314 // movaps [r9 + 32], xmm2
6315 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM2, kR9, offset);
6316 offset += 16;
6317 if (offset < cbUsed16) {
6318 // movaps [r9 + 48], xmm3
6319 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM3, kR9, offset);
6320 offset += 16;
6321 if (offset < cbUsed16) {
6322 // movaps [r9 + 64], xmm4
6323 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM4, kR9, offset);
6324 offset += 16;
6325 if (offset < cbUsed16) {
6326 // movaps [r9 + 80], xmm5
6327 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM5, kR9, offset);
6328 offset += 16;
6329 }
6330 }
6331 }
6332 }
6333 }
6334 }
6335 // Copy the last 8 bytes if needed
6336 if (cbUsed > cbUsed16) {
6337 _ASSERTE(cbUsed16 < cbArg);
6338 // movlps xmm0, [rax + offset]
6339 pSl->X86EmitOp(X86_INSTR_MOVLPS_R_RM, kXMM0, kRAX, offset);
6340 // movlps [r9 + offset], xmm0
6341 pSl->X86EmitOp(X86_INSTR_MOVLPS_RM_R, kXMM0, kR9, offset);
6342 }
6343 }
6344 else {
6345 // a loop (one double-quadword at a time)
6346 pSl->X86EmitZeroOutReg(kR11);
6347 // LoopLabel:
6348 CodeLabel *pLoopLabel = pSl->NewCodeLabel();
6349 pSl->EmitLabel(pLoopLabel);
6350 // movups xmm0, [rax + r11]
6351 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM0, kRAX, 0, kR11, 1);
6352 // movaps [r9 + r11], xmm0
6353 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0, kR11, 1);
6354 // add r11, 16
6355 pSl->X86EmitAddReg(kR11, 16);
6356 // cmp r11, cbUsed16
6357 pSl->X86EmitCmpRegImm32(kR11, cbUsed16);
6358 // jl LoopLabel
6359 pSl->X86EmitCondJump(pLoopLabel, X86CondCode::kJL);
6360 if (cbArg > cbUsed16) {
6361 _ASSERTE(cbUsed16 + 8 >= cbArg);
6362 // movlps xmm0, [rax + r11]
6363 pSl->X86EmitOp(X86_INSTR_MOVLPS_R_RM, kXMM0, kRAX, 0, kR11, 1);
6364 // movlps [r9 + r11], xmm0
6365 pSl->X86EmitOp(X86_INSTR_MOVLPS_RM_R, kXMM0, kR9, 0, kR11, 1);
6366 }
6367 }
6368 cbStructOffset += cbUsed;
6369 AppendGCLayout(gcLayout, cbStructOffset, et == ELEMENT_TYPE_TYPEDBYREF, th);
6370 break;
6371 }
6372
6373 //
6374 // Explicit Fall-Through for non-IsArgPassedByRef
6375 //
6376
6377 default:
6378 if (nArgSlot < 4) {
6379 pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot], kRAX);
6380 if ((et == ELEMENT_TYPE_R4) || (et == ELEMENT_TYPE_R8)) {
6381 pSl->X86EmitIndexRegStore(kRDX, rgcbFpArgRegCtxtOffsets[nArgSlot], kRAX);
6382 }
6383 }
6384 else {
6385 pSl->X86EmitIndexRegStore(kR8, 8 * nArgSlot, kRAX);
6386 }
6387 nArgSlot++;
6388 break;
6389 }
6390 }
6391
6392#undef LOAD_STRUCT_OFFSET_IF_NEEDED
6393
6394 // Keep our 4 shadow slots and even number of slots (to keep 16-byte aligned)
6395 if (nArgSlot < 4)
6396 nArgSlot = 4;
6397 else if (nArgSlot & 1)
6398 nArgSlot++;
6399
6400 _ASSERTE((cbStructOffset % 16) == 0);
6401
6402 // xor eax, eax
6403 pSl->X86EmitZeroOutReg(kRAX);
6404 // ret
6405 pSl->X86EmitReturn(0);
6406
6407 // NullLabel:
6408 pSl->EmitLabel(pNullLabel);
6409
6410 CodeLabel *pGCLayoutLabel = NULL;
6411 if (gcLayout.Count() == 0) {
6412 // xor eax, eax
6413 pSl->X86EmitZeroOutReg(kRAX);
6414 }
6415 else {
6416 // lea rax, [rip + offset to gclayout]
6417 pGCLayoutLabel = pSl->NewCodeLabel();
6418 pSl->X86EmitLeaRIP(pGCLayoutLabel, kRAX);
6419 }
6420 // mov [r9], rax
6421 pSl->X86EmitIndexRegStore(kR9, 0, kRAX);
6422 // mov rax, cbStackNeeded
6423 pSl->X86EmitRegLoad(kRAX, cbStructOffset + nArgSlot * 8);
6424 // ret
6425 pSl->X86EmitReturn(0);
6426
6427 if (gcLayout.Count() > 0) {
6428 // GCLayout:
6429 pSl->EmitLabel(pGCLayoutLabel);
6430 EncodeGCOffsets(pSl, gcLayout);
6431 }
6432
6433 LoaderHeap* pHeap = pMD->GetLoaderAllocatorForCode()->GetStubHeap();
6434 return pSl->Link(pHeap);
6435}
6436#endif // DACCESS_COMPILE
6437
6438#endif // _TARGET_AMD64_
6439
6440
6441#ifdef HAS_FIXUP_PRECODE
6442
6443#ifdef HAS_FIXUP_PRECODE_CHUNKS
6444TADDR FixupPrecode::GetMethodDesc()
6445{
6446 LIMITED_METHOD_CONTRACT;
6447 SUPPORTS_DAC;
6448
6449 // This lookup is also manually inlined in PrecodeFixupThunk assembly code
6450 TADDR base = *PTR_TADDR(GetBase());
6451 if (base == NULL)
6452 return NULL;
6453 return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT);
6454}
6455#endif
6456
6457#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6458PCODE FixupPrecode::GetDynamicMethodPrecodeFixupJumpStub()
6459{
6460 WRAPPER_NO_CONTRACT;
6461 _ASSERTE(((PTR_MethodDesc)GetMethodDesc())->IsLCGMethod());
6462
6463 // The precode fixup jump stub is shared by all fixup precodes in a chunk, and immediately follows the MethodDesc. Jump
6464 // stubs cannot be reused currently for the same method:
6465 // - The jump stub's target would change separately from the precode being updated from "call Func" to "jmp Func", both
6466 // changes would have to be done atomically with runtime suspension, which is not done currently
6467 // - When changing the entry point from one version of jitted code to another, the jump stub's target pointer is not
6468 // aligned to 8 bytes in order to be able to do an interlocked update of the target address
6469 // So, when initially the precode intends to be of the form "call PrecodeFixupThunk", if the target address happens to be
6470 // too far for a relative 32-bit jump, it will use the shared precode fixup jump stub. When changing the entry point to
6471 // jitted code, the jump stub associated with the precode is patched, and the precode is updated to use that jump stub.
6472 //
6473 // Notes:
6474 // - Dynamic method descs, and hence their precodes and preallocated jump stubs, may be reused for a different method
6475 // (along with reinitializing the precode), but only with a transition where the original method is no longer accessible
6476 // to user code
6477 // - Concurrent calls to a dynamic method that has not yet been jitted may trigger multiple writes to the jump stub
6478 // associated with the precode, but only to the same target address (and while the precode is still pointing to
6479 // PrecodeFixupThunk)
6480 return GetBase() + sizeof(PTR_MethodDesc);
6481}
6482
6483PCODE FixupPrecode::GetDynamicMethodEntryJumpStub()
6484{
6485 WRAPPER_NO_CONTRACT;
6486 _ASSERTE(((PTR_MethodDesc)GetMethodDesc())->IsLCGMethod());
6487
6488 // m_PrecodeChunkIndex has a value inverted to the order of precodes in memory (the precode at the lowest address has the
6489 // highest index, and the precode at the highest address has the lowest index). To map a precode to its jump stub by memory
6490 // order, invert the precode index to get the jump stub index. Also skip the precode fixup jump stub (see
6491 // GetDynamicMethodPrecodeFixupJumpStub()).
6492 UINT32 count = ((PTR_MethodDesc)GetMethodDesc())->GetMethodDescChunk()->GetCount();
6493 _ASSERTE(m_PrecodeChunkIndex < count);
6494 SIZE_T jumpStubIndex = count - m_PrecodeChunkIndex;
6495
6496 return GetBase() + sizeof(PTR_MethodDesc) + jumpStubIndex * BACK_TO_BACK_JUMP_ALLOCATE_SIZE;
6497}
6498#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6499
6500#ifdef DACCESS_COMPILE
6501void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
6502{
6503 SUPPORTS_DAC;
6504 DacEnumMemoryRegion(dac_cast<TADDR>(this), sizeof(FixupPrecode));
6505
6506 DacEnumMemoryRegion(GetBase(), sizeof(TADDR));
6507}
6508#endif // DACCESS_COMPILE
6509
6510#endif // HAS_FIXUP_PRECODE
6511
6512#ifndef DACCESS_COMPILE
6513
6514void rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, MethodDesc* pMD)
6515{
6516 CONTRACTL
6517 {
6518 THROWS; // Creating a JumpStub could throw OutOfMemory
6519 GC_TRIGGERS;
6520 }
6521 CONTRACTL_END;
6522
6523 INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD);
6524
6525 _ASSERTE(IS_ALIGNED(pRel32, sizeof(INT32)));
6526 FastInterlockExchange((LONG*)pRel32, (LONG)targetRel32);
6527}
6528
6529BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD)
6530{
6531 CONTRACTL
6532 {
6533 THROWS; // Creating a JumpStub could throw OutOfMemory
6534 GC_TRIGGERS;
6535 }
6536 CONTRACTL_END;
6537
6538 BYTE* callAddrAdj = (BYTE*)pRel32 + 4;
6539 INT32 expectedRel32 = static_cast<INT32>((BYTE*)expected - callAddrAdj);
6540
6541 INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD);
6542
6543 _ASSERTE(IS_ALIGNED(pRel32, sizeof(INT32)));
6544 return FastInterlockCompareExchange((LONG*)pRel32, (LONG)targetRel32, (LONG)expectedRel32) == (LONG)expectedRel32;
6545}
6546
6547void StubPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */,
6548 BYTE type /* = StubPrecode::Type */, TADDR target /* = NULL */)
6549{
6550 WRAPPER_NO_CONTRACT;
6551
6552 IN_WIN64(m_movR10 = X86_INSTR_MOV_R10_IMM64); // mov r10, pMethodDesc
6553 IN_WIN32(m_movEAX = X86_INSTR_MOV_EAX_IMM32); // mov eax, pMethodDesc
6554 m_pMethodDesc = (TADDR)pMD;
6555 IN_WIN32(m_mov_rm_r = X86_INSTR_MOV_RM_R); // mov reg,reg
6556 m_type = type;
6557 m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
6558
6559 if (pLoaderAllocator != NULL)
6560 {
6561 // Use pMD == NULL in all precode initialization methods to allocate the initial jump stub in non-dynamic heap
6562 // that has the same lifetime like as the precode itself
6563 if (target == NULL)
6564 target = GetPreStubEntryPoint();
6565 m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, pLoaderAllocator);
6566 }
6567}
6568
6569#ifdef HAS_NDIRECT_IMPORT_PRECODE
6570
6571void NDirectImportPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
6572{
6573 WRAPPER_NO_CONTRACT;
6574 StubPrecode::Init(pMD, pLoaderAllocator, NDirectImportPrecode::Type, GetEEFuncEntryPoint(NDirectImportThunk));
6575}
6576
6577#endif // HAS_NDIRECT_IMPORT_PRECODE
6578
6579
6580#ifdef HAS_FIXUP_PRECODE
6581void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/)
6582{
6583 WRAPPER_NO_CONTRACT;
6584
6585 m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
6586 m_type = FixupPrecode::TypePrestub;
6587
6588 // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work.
6589 if (m_PrecodeChunkIndex == 0)
6590 {
6591 _ASSERTE(FitsInU1(iPrecodeChunkIndex));
6592 m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
6593 }
6594
6595 if (iMethodDescChunkIndex != -1)
6596 {
6597 if (m_MethodDescChunkIndex == 0)
6598 {
6599 _ASSERTE(FitsInU1(iMethodDescChunkIndex));
6600 m_MethodDescChunkIndex = static_cast<BYTE>(iMethodDescChunkIndex);
6601 }
6602
6603 if (*(void**)GetBase() == NULL)
6604 *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT);
6605 }
6606
6607 _ASSERTE(GetMethodDesc() == (TADDR)pMD);
6608
6609 PCODE target = (PCODE)GetEEFuncEntryPoint(PrecodeFixupThunk);
6610#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6611 if (pMD->IsLCGMethod())
6612 {
6613 m_rel32 = rel32UsingPreallocatedJumpStub(&m_rel32, target, GetDynamicMethodPrecodeFixupJumpStub(), false /* emitJump */);
6614 return;
6615 }
6616#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6617 if (pLoaderAllocator != NULL)
6618 {
6619 m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, pLoaderAllocator);
6620 }
6621}
6622
6623void FixupPrecode::ResetTargetInterlocked()
6624{
6625 CONTRACTL
6626 {
6627 THROWS; // Creating a JumpStub could throw OutOfMemory
6628 GC_NOTRIGGER;
6629 }
6630 CONTRACTL_END;
6631
6632 FixupPrecode newValue = *this;
6633 newValue.m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
6634 newValue.m_type = FixupPrecode::TypePrestub;
6635
6636 PCODE target = (PCODE)GetEEFuncEntryPoint(PrecodeFixupThunk);
6637 MethodDesc* pMD = (MethodDesc*)GetMethodDesc();
6638#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6639 // The entry point of LCG methods cannot revert back to the original entry point, as their jump stubs would have to be
6640 // reused, which is currently not supported. This method is intended for resetting the entry point while the method is
6641 // callable, which implies that the entry point may later be changed again to something else. Currently, this is not done
6642 // for LCG methods. See GetDynamicMethodPrecodeFixupJumpStub() for more.
6643 _ASSERTE(!pMD->IsLCGMethod());
6644#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6645
6646 newValue.m_rel32 = rel32UsingJumpStub(&m_rel32, target, pMD);
6647
6648 _ASSERTE(IS_ALIGNED(this, sizeof(INT64)));
6649 EnsureWritableExecutablePages(this, sizeof(INT64));
6650 FastInterlockExchangeLong((INT64*)this, *(INT64*)&newValue);
6651}
6652
6653BOOL FixupPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
6654{
6655 CONTRACTL
6656 {
6657 THROWS; // Creating a JumpStub could throw OutOfMemory
6658 GC_NOTRIGGER;
6659 }
6660 CONTRACTL_END;
6661
6662 INT64 oldValue = *(INT64*)this;
6663 BYTE* pOldValue = (BYTE*)&oldValue;
6664
6665 MethodDesc * pMD = (MethodDesc*)GetMethodDesc();
6666 g_IBCLogger.LogMethodPrecodeWriteAccess(pMD);
6667
6668#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6669 // A different jump stub is used for this case, see Init(). This call is unexpected for resetting the entry point.
6670 _ASSERTE(!pMD->IsLCGMethod() || target != (TADDR)GetEEFuncEntryPoint(PrecodeFixupThunk));
6671#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6672
6673 INT64 newValue = oldValue;
6674 BYTE* pNewValue = (BYTE*)&newValue;
6675
6676 if (pOldValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] == FixupPrecode::TypePrestub)
6677 {
6678 pNewValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] = FixupPrecode::Type;
6679
6680 pOldValue[offsetof(FixupPrecode, m_op)] = X86_INSTR_CALL_REL32;
6681 pNewValue[offsetof(FixupPrecode, m_op)] = X86_INSTR_JMP_REL32;
6682 }
6683 else if (pOldValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] == FixupPrecode::Type)
6684 {
6685#ifdef FEATURE_CODE_VERSIONING
6686 // No change needed, jmp is already in place
6687#else
6688 // Setting the target more than once is unexpected
6689 return FALSE;
6690#endif
6691 }
6692 else
6693 {
6694 // Pre-existing code doesn't conform to the expectations for a FixupPrecode
6695 return FALSE;
6696 }
6697
6698 *(INT32*)(&pNewValue[offsetof(FixupPrecode, m_rel32)]) =
6699#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6700 pMD->IsLCGMethod() ?
6701 rel32UsingPreallocatedJumpStub(&m_rel32, target, GetDynamicMethodEntryJumpStub(), true /* emitJump */) :
6702#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6703 rel32UsingJumpStub(&m_rel32, target, pMD);
6704
6705 _ASSERTE(IS_ALIGNED(this, sizeof(INT64)));
6706 EnsureWritableExecutablePages(this, sizeof(INT64));
6707 return FastInterlockCompareExchangeLong((INT64*) this, newValue, oldValue) == oldValue;
6708}
6709
6710#ifdef FEATURE_NATIVE_IMAGE_GENERATION
6711// Partial initialization. Used to save regrouped chunks.
6712void FixupPrecode::InitForSave(int iPrecodeChunkIndex)
6713{
6714 m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
6715 m_type = FixupPrecode::TypePrestub;
6716
6717 _ASSERTE(FitsInU1(iPrecodeChunkIndex));
6718 m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
6719
6720 // The rest is initialized in code:FixupPrecode::Fixup
6721}
6722
6723void FixupPrecode::Fixup(DataImage *image, MethodDesc * pMD)
6724{
6725 STANDARD_VM_CONTRACT;
6726
6727 // Note that GetMethodDesc() does not return the correct value because of
6728 // regrouping of MethodDescs into hot and cold blocks. That's why the caller
6729 // has to supply the actual MethodDesc
6730
6731 SSIZE_T mdChunkOffset;
6732 ZapNode * pMDChunkNode = image->GetNodeForStructure(pMD, &mdChunkOffset);
6733 ZapNode * pHelperThunk = image->GetHelperThunk(CORINFO_HELP_EE_PRECODE_FIXUP);
6734
6735 image->FixupFieldToNode(this, offsetof(FixupPrecode, m_rel32),
6736 pHelperThunk, 0, IMAGE_REL_BASED_REL32);
6737
6738 // Set the actual chunk index
6739 FixupPrecode * pNewPrecode = (FixupPrecode *)image->GetImagePointer(this);
6740
6741 size_t mdOffset = mdChunkOffset - sizeof(MethodDescChunk);
6742 size_t chunkIndex = mdOffset / MethodDesc::ALIGNMENT;
6743 _ASSERTE(FitsInU1(chunkIndex));
6744 pNewPrecode->m_MethodDescChunkIndex = (BYTE) chunkIndex;
6745
6746 // Fixup the base of MethodDescChunk
6747 if (m_PrecodeChunkIndex == 0)
6748 {
6749 image->FixupFieldToNode(this, (BYTE *)GetBase() - (BYTE *)this,
6750 pMDChunkNode, sizeof(MethodDescChunk));
6751 }
6752}
6753#endif // FEATURE_NATIVE_IMAGE_GENERATION
6754
6755#endif // HAS_FIXUP_PRECODE
6756
6757#endif // !DACCESS_COMPILE
6758
6759
6760#ifdef HAS_THISPTR_RETBUF_PRECODE
6761
6762// rel32 jmp target that points back to the jump (infinite loop).
6763// Used to mark uninitialized ThisPtrRetBufPrecode target
6764#define REL32_JMP_SELF (-5)
6765
6766#ifndef DACCESS_COMPILE
6767void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
6768{
6769 WRAPPER_NO_CONTRACT;
6770
6771 IN_WIN64(m_nop1 = X86_INSTR_NOP;) // nop
6772#ifdef UNIX_AMD64_ABI
6773 m_prefix1 = 0x48;
6774 m_movScratchArg0 = 0xC78B; // mov rax,rdi
6775 m_prefix2 = 0x48;
6776 m_movArg0Arg1 = 0xFE8B; // mov rdi,rsi
6777 m_prefix3 = 0x48;
6778 m_movArg1Scratch = 0xF08B; // mov rsi,rax
6779#else
6780 IN_WIN64(m_prefix1 = 0x48;)
6781 m_movScratchArg0 = 0xC889; // mov r/eax,r/ecx
6782 IN_WIN64(m_prefix2 = 0x48;)
6783 m_movArg0Arg1 = 0xD189; // mov r/ecx,r/edx
6784 IN_WIN64(m_prefix3 = 0x48;)
6785 m_movArg1Scratch = 0xC289; // mov r/edx,r/eax
6786#endif
6787 m_nop2 = X86_INSTR_NOP; // nop
6788 m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
6789 m_pMethodDesc = (TADDR)pMD;
6790
6791 // This precode is never patched lazily - avoid unnecessary jump stub allocation
6792 m_rel32 = REL32_JMP_SELF;
6793}
6794
6795BOOL ThisPtrRetBufPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
6796{
6797 CONTRACTL
6798 {
6799 THROWS;
6800 GC_TRIGGERS;
6801 }
6802 CONTRACTL_END;
6803
6804 // This precode is never patched lazily - the interlocked semantics is not required.
6805 _ASSERTE(m_rel32 == REL32_JMP_SELF);
6806
6807 // Use pMD == NULL to allocate the jump stub in non-dynamic heap that has the same lifetime as the precode itself
6808 INT32 newRel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, ((MethodDesc *)GetMethodDesc())->GetLoaderAllocatorForCode());
6809
6810 _ASSERTE(IS_ALIGNED(&m_rel32, sizeof(INT32)));
6811 FastInterlockExchange((LONG *)&m_rel32, (LONG)newRel32);
6812 return TRUE;
6813}
6814#endif // !DACCESS_COMPILE
6815
6816PCODE ThisPtrRetBufPrecode::GetTarget()
6817{
6818 LIMITED_METHOD_DAC_CONTRACT;
6819
6820 // This precode is never patched lazily - pretend that the uninitialized m_rel32 points to prestub
6821 if (m_rel32 == REL32_JMP_SELF)
6822 return GetPreStubEntryPoint();
6823
6824 return rel32Decode(PTR_HOST_MEMBER_TADDR(ThisPtrRetBufPrecode, this, m_rel32));
6825}
6826
6827#endif // HAS_THISPTR_RETBUF_PRECODE
6828