| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | // |
| 5 | // File: VirtualCallStub.h |
| 6 | // |
| 7 | |
| 8 | |
| 9 | |
| 10 | // |
| 11 | // See code:VirtualCallStubManager for details |
| 12 | // |
| 13 | // ============================================================================ |
| 14 | |
| 15 | #ifndef _VIRTUAL_CALL_STUB_H |
| 16 | #define _VIRTUAL_CALL_STUB_H |
| 17 | |
| 18 | #ifndef CROSSGEN_COMPILE |
| 19 | |
| 20 | #define CHAIN_LOOKUP |
| 21 | |
| 22 | #if defined(_TARGET_X86_) |
| 23 | // If this is uncommented, leaves a file "StubLog_<pid>.log" with statistics on the behavior |
| 24 | // of stub-based interface dispatch. |
| 25 | //#define STUB_LOGGING |
| 26 | #endif |
| 27 | |
| 28 | #include "stubmgr.h" |
| 29 | |
| 30 | ///////////////////////////////////////////////////////////////////////////////////// |
| 31 | // Forward class declarations |
| 32 | class FastTable; |
| 33 | class BucketTable; |
| 34 | class Entry; |
| 35 | class Prober; |
| 36 | class VirtualCallStubManager; |
| 37 | class VirtualCallStubManagerManager; |
| 38 | struct LookupHolder; |
| 39 | struct DispatchHolder; |
| 40 | struct ResolveHolder; |
| 41 | struct VTableCallHolder; |
| 42 | |
| 43 | ///////////////////////////////////////////////////////////////////////////////////// |
| 44 | // Forward function declarations |
| 45 | extern "C" void InContextTPQuickDispatchAsmStub(); |
| 46 | |
| 47 | extern "C" PCODE STDCALL StubDispatchFixupWorker(TransitionBlock * pTransitionBlock, |
| 48 | TADDR siteAddrForRegisterIndirect, |
| 49 | DWORD sectionIndex, |
| 50 | Module * pModule); |
| 51 | |
| 52 | extern "C" PCODE STDCALL VSD_ResolveWorker(TransitionBlock * pTransitionBlock, |
| 53 | TADDR siteAddrForRegisterIndirect, |
| 54 | size_t token |
| 55 | #ifndef _TARGET_X86_ |
| 56 | , UINT_PTR flags |
| 57 | #endif |
| 58 | ); |
| 59 | |
| 60 | |
| 61 | ///////////////////////////////////////////////////////////////////////////////////// |
| 62 | #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) |
| 63 | typedef INT32 DISPL; |
| 64 | #endif |
| 65 | |
| 66 | ///////////////////////////////////////////////////////////////////////////////////// |
| 67 | // Represents the struct that is added to the resolve cache |
| 68 | // NOTE: If you change the layout of this struct, you'll need to update various |
| 69 | // ASM helpers in VirtualCallStubCpu that rely on offsets of members. |
| 70 | // |
| 71 | struct ResolveCacheElem |
| 72 | { |
| 73 | void *pMT; |
| 74 | size_t token; // DispatchToken |
| 75 | void *target; |
| 76 | |
| 77 | // These are used for chaining |
| 78 | ResolveCacheElem *pNext; |
| 79 | ResolveCacheElem *Next() |
| 80 | { LIMITED_METHOD_CONTRACT; return VolatileLoad(&pNext); } |
| 81 | |
| 82 | #ifdef _DEBUG |
| 83 | UINT16 debug_hash; |
| 84 | UINT16 debug_index; |
| 85 | #endif // _DEBUG |
| 86 | |
| 87 | BOOL Equals(size_t token, void *pMT) |
| 88 | { LIMITED_METHOD_CONTRACT; return (this->pMT == pMT && this->token == token); } |
| 89 | |
| 90 | BOOL Equals(ResolveCacheElem *pElem) |
| 91 | { WRAPPER_NO_CONTRACT; return Equals(pElem->token, pElem->pMT); } |
| 92 | |
| 93 | }; |
| 94 | |
| 95 | enum |
| 96 | { |
| 97 | e_resolveCacheElem_sizeof_mt = sizeof(void *), |
| 98 | e_resolveCacheElem_sizeof_token = sizeof(size_t), |
| 99 | e_resolveCacheElem_sizeof_target = sizeof(void *), |
| 100 | e_resolveCacheElem_sizeof_next = sizeof(ResolveCacheElem *), |
| 101 | |
| 102 | e_resolveCacheElem_offset_mt = 0, |
| 103 | e_resolveCacheElem_offset_token = e_resolveCacheElem_offset_mt + e_resolveCacheElem_sizeof_mt, |
| 104 | e_resolveCacheElem_offset_target = e_resolveCacheElem_offset_token + e_resolveCacheElem_sizeof_token, |
| 105 | e_resolveCacheElem_offset_next = e_resolveCacheElem_offset_target + e_resolveCacheElem_sizeof_target, |
| 106 | }; |
| 107 | |
| 108 | ///////////////////////////////////////////////////////////////////////////////////// |
| 109 | // A utility class to help manipulate a call site |
| 110 | struct StubCallSite |
| 111 | { |
| 112 | friend class VirtualCallStubManager; |
| 113 | |
| 114 | private: |
| 115 | |
| 116 | // On x86 are four possible kinds of callsites when you take into account all features |
| 117 | // Relative: direct call, e.g. "call addr". Not used currently. |
| 118 | // RelativeIndirect (JmpRel): indirect call through a relative address, e.g. "call [addr]" |
| 119 | // RegisterIndirect: indirect call through a register, e.g. "call [eax]" |
| 120 | // DelegateCallSite: anything else, tail called through a register by shuffle thunk, e.g. "jmp [eax]" |
| 121 | // |
| 122 | // On all other platforms we always use an indirect call through an indirection cell |
| 123 | // In these cases all calls are made by the platform equivalent of "call [addr]". |
| 124 | // |
| 125 | // DelegateCallSite are particular in that they can come in a variety of forms: |
| 126 | // a direct delegate call has a sequence defined by the jit but a multicast or secure delegate |
| 127 | // are defined in a stub and have a different shape |
| 128 | // |
| 129 | PTR_PCODE m_siteAddr; // Stores the address of an indirection cell |
| 130 | PCODE m_returnAddr; |
| 131 | |
| 132 | public: |
| 133 | |
| 134 | #if defined(_TARGET_X86_) |
| 135 | StubCallSite(TADDR siteAddrForRegisterIndirect, PCODE returnAddr); |
| 136 | |
| 137 | PCODE GetCallerAddress(); |
| 138 | #else // !defined(_TARGET_X86_) |
| 139 | // On platforms where we always use an indirection cell things |
| 140 | // are much simpler - the siteAddr always stores a pointer to a |
| 141 | // value that in turn points to the indirection cell. |
| 142 | |
| 143 | StubCallSite(TADDR siteAddr, PCODE returnAddr) |
| 144 | { LIMITED_METHOD_CONTRACT; m_siteAddr = dac_cast<PTR_PCODE>(siteAddr); m_returnAddr = returnAddr; } |
| 145 | |
| 146 | PCODE GetCallerAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; } |
| 147 | #endif // !defined(_TARGET_X86_) |
| 148 | |
| 149 | PCODE GetSiteTarget() { WRAPPER_NO_CONTRACT; return *(GetIndirectCell()); } |
| 150 | void SetSiteTarget(PCODE newTarget); |
| 151 | PTR_PCODE GetIndirectCell() { LIMITED_METHOD_CONTRACT; return dac_cast<PTR_PCODE>(m_siteAddr); } |
| 152 | PTR_PCODE * GetIndirectCellAddress() { LIMITED_METHOD_CONTRACT; return &m_siteAddr; } |
| 153 | |
| 154 | PCODE GetReturnAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; } |
| 155 | }; |
| 156 | |
| 157 | #ifdef FEATURE_PREJIT |
| 158 | extern "C" void StubDispatchFixupStub(); // for lazy fixup of ngen call sites |
| 159 | #endif |
| 160 | |
| 161 | // These are the assembly language entry points that the stubs use when they want to go into the EE |
| 162 | |
| 163 | extern "C" void ResolveWorkerAsmStub(); // resolve a token and transfer control to that method |
| 164 | extern "C" void ResolveWorkerChainLookupAsmStub(); // for chaining of entries in the cache |
| 165 | |
| 166 | #ifdef _TARGET_X86_ |
| 167 | extern "C" void BackPatchWorkerAsmStub(); // backpatch a call site to point to a different stub |
| 168 | #ifdef FEATURE_PAL |
| 169 | extern "C" void BackPatchWorkerStaticStub(PCODE returnAddr, TADDR siteAddrForRegisterIndirect); |
| 170 | #endif // FEATURE_PAL |
| 171 | #endif // _TARGET_X86_ |
| 172 | |
| 173 | |
| 174 | typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager; |
| 175 | |
| 176 | // VirtualCallStubManager is the heart of the stub dispatch logic. See the book of the runtime entry |
| 177 | // |
| 178 | // file:../../doc/BookOfTheRuntime/ClassLoader/VirtualStubDispatchDesign.doc |
| 179 | // |
| 180 | // The basic idea is that a call to an interface (it could also be used for virtual calls in general, but we |
| 181 | // do not do this), is simply the code |
| 182 | // |
| 183 | // call [DispatchCell] |
| 184 | // |
| 185 | // Where we make sure 'DispatchCell' points at stubs that will do the right thing. DispatchCell is writable |
| 186 | // so we can udpate the code over time. There are three basic types of stubs that the dispatch cell can point |
| 187 | // to. |
| 188 | // * Lookup: The intial stub that has no 'fast path' and simply pushes a ID for interface being called |
| 189 | // and calls into the runtime at code:VirtualCallStubManager.ResolveWorkerStatic. |
| 190 | // * Dispatch: Lookup stubs are patched to this stub which has a fast path that checks for a particular |
| 191 | // Method Table and if that fails jumps to code that |
| 192 | // * Decrements a 'missCount' (starts out as code:STUB_MISS_COUNT_VALUE). If this count goes to zero |
| 193 | // code:VirtualCallStubManager.BackPatchWorkerStatic is called, morphs it into a resolve stub |
| 194 | // (however since this decrementing logic is SHARED among all dispatch stubs, it may take |
| 195 | // multiples of code:STUB_MISS_COUNT_VALUE if mulitple call sites are actively polymorphic (this |
| 196 | // seems unlikley). |
| 197 | // * Calls a resolve stub (Whenever a dispatch stub is created, it always has a cooresponding resolve |
| 198 | // stub (but the resolve stubs are shared among many dispatch stubs). |
| 199 | // * Resolve: see code:ResolveStub. This looks up the Method table in a process wide cache (see |
| 200 | // code:ResolveCacheElem, and if found, jumps to it. This code path is about 17 instructions long (so |
| 201 | // pretty fast, but certainly much slower than a normal call). If the method table is not found in |
| 202 | // the cache, it calls into the runtime code:VirtualCallStubManager.ResolveWorkerStatic, which |
| 203 | // populates it. |
| 204 | // So the general progression is call site's cells |
| 205 | // * start out life pointing to a lookup stub |
| 206 | // * On first call they get updated into a dispatch stub. When this misses, it calls a resolve stub, |
| 207 | // which populates a resovle stub's cache, but does not update the call site' cell (thus it is still |
| 208 | // pointing at the dispatch cell. |
| 209 | // * After code:STUB_MISS_COUNT_VALUE misses, we update the call site's cell to point directly at the |
| 210 | // resolve stub (thus avoiding the overhead of the quick check that always seems to be failing and |
| 211 | // the miss count update). |
| 212 | // |
| 213 | // QUESTION: What is the lifetimes of the various stubs and hash table entries? |
| 214 | // |
| 215 | // QUESTION: There does not seem to be any logic that will change a call site's cell once it becomes a |
| 216 | // Resolve stub. Thus once a particular call site becomes a Resolve stub we live with the Resolve stub's |
| 217 | // (in)efficiency forever. |
| 218 | // |
| 219 | // see code:#StubDispatchNotes for more |
| 220 | class VirtualCallStubManager : public StubManager |
| 221 | { |
| 222 | friend class ClrDataAccess; |
| 223 | friend class VirtualCallStubManagerManager; |
| 224 | friend class VirtualCallStubManagerIterator; |
| 225 | |
| 226 | VPTR_VTABLE_CLASS(VirtualCallStubManager, StubManager) |
| 227 | |
| 228 | public: |
| 229 | #ifdef _DEBUG |
| 230 | virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManager" ; } |
| 231 | #endif |
| 232 | |
| 233 | // The reason for our existence, return a callstub for type id and slot number |
| 234 | // where type id = 0 for the class contract (i.e. a virtual call), and type id > 0 for an |
| 235 | // interface invoke where the id indicates which interface it is. |
| 236 | // |
| 237 | // The function is idempotent, i.e. |
| 238 | // you'll get the same callstub twice if you call it with identical inputs. |
| 239 | PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD); |
| 240 | PCODE GetCallStub(TypeHandle ownerType, DWORD slot); |
| 241 | |
| 242 | // Stubs for vtable-based virtual calls with no lookups |
| 243 | PCODE GetVTableCallStub(DWORD slot); |
| 244 | |
| 245 | // Generate an fresh indirection cell. |
| 246 | BYTE* GenerateStubIndirection(PCODE stub, BOOL fUseRecycledCell = FALSE); |
| 247 | |
| 248 | // Set up static data structures - called during EEStartup |
| 249 | static void InitStatic(); |
| 250 | static void UninitStatic(); |
| 251 | |
| 252 | // Per instance initialization - called during AppDomain::Init and ::Uninit and for collectible loader allocators |
| 253 | void Init(BaseDomain* pDomain, LoaderAllocator *pLoaderAllocator); |
| 254 | void Uninit(); |
| 255 | |
| 256 | //@TODO: the logging should be tied into the VMs normal loggin mechanisms, |
| 257 | //@TODO: for now we just always write a short log file called "StubLog_<pid>.log" |
| 258 | static void StartupLogging(); |
| 259 | static void LoggingDump(); |
| 260 | static void FinishLogging(); |
| 261 | |
| 262 | static void ResetCache(); |
| 263 | |
| 264 | // Reclaim/rearrange any structures that can only be done during a gc sync point. |
| 265 | // This is the mechanism we are using to avoid synchronization of alot of our |
| 266 | // cache and hash table accesses. We are requiring that during a gc sync point we are not |
| 267 | // executing any stub code at all, hence at this time we are serialized on a single thread (gc) |
| 268 | // and no other thread is accessing the data structures. |
| 269 | static void ReclaimAll(); |
| 270 | void Reclaim(); |
| 271 | |
| 272 | #ifndef DACCESS_COMPILE |
| 273 | VirtualCallStubManager() |
| 274 | : StubManager(), |
| 275 | lookup_rangeList(), |
| 276 | resolve_rangeList(), |
| 277 | dispatch_rangeList(), |
| 278 | cache_entry_rangeList(), |
| 279 | vtable_rangeList(), |
| 280 | parentDomain(NULL), |
| 281 | isCollectible(false), |
| 282 | m_initialReservedMemForHeaps(NULL), |
| 283 | m_FreeIndCellList(NULL), |
| 284 | m_RecycledIndCellList(NULL), |
| 285 | indcell_heap(NULL), |
| 286 | cache_entry_heap(NULL), |
| 287 | lookup_heap(NULL), |
| 288 | dispatch_heap(NULL), |
| 289 | resolve_heap(NULL), |
| 290 | #ifdef _TARGET_AMD64_ |
| 291 | m_fShouldAllocateLongJumpDispatchStubs(FALSE), |
| 292 | #endif |
| 293 | lookups(NULL), |
| 294 | cache_entries(NULL), |
| 295 | dispatchers(NULL), |
| 296 | resolvers(NULL), |
| 297 | m_counters(NULL), |
| 298 | m_cur_counter_block(NULL), |
| 299 | m_cur_counter_block_for_reclaim(NULL), |
| 300 | m_cur_counter_block_for_reclaim_index(NULL), |
| 301 | m_pNext(NULL) |
| 302 | { |
| 303 | LIMITED_METHOD_CONTRACT; |
| 304 | ZeroMemory(&stats, sizeof(stats)); |
| 305 | } |
| 306 | |
| 307 | ~VirtualCallStubManager(); |
| 308 | #endif // !DACCESS_COMPILE |
| 309 | |
| 310 | |
| 311 | enum StubKind { |
| 312 | SK_UNKNOWN, |
| 313 | SK_LOOKUP, // Lookup Stubs are SLOW stubs that simply call into the runtime to do all work. |
| 314 | SK_DISPATCH, // Dispatch Stubs have a fast check for one type otherwise jumps to runtime. Works for monomorphic sites |
| 315 | SK_RESOLVE, // Resolve Stubs do a hash lookup before fallling back to the runtime. Works for polymorphic sites. |
| 316 | SK_VTABLECALL, // Stub that jumps to a target method using vtable-based indirections. Works for non-interface calls. |
| 317 | SK_BREAKPOINT |
| 318 | }; |
| 319 | |
| 320 | // peek at the assembly code and predict which kind of a stub we have |
| 321 | StubKind predictStubKind(PCODE stubStartAddress); |
| 322 | |
| 323 | /* know thine own stubs. It is possible that when multiple |
| 324 | virtualcallstub managers are built that these may need to become |
| 325 | non-static, and the callers modified accordingly */ |
| 326 | StubKind getStubKind(PCODE stubStartAddress, BOOL usePredictStubKind = TRUE) |
| 327 | { |
| 328 | WRAPPER_NO_CONTRACT; |
| 329 | SUPPORTS_DAC; |
| 330 | |
| 331 | // This method can called with stubStartAddress==NULL, e.g. when handling null reference exceptions |
| 332 | // caused by IP=0. Early out for this case to avoid confusing handled access violations inside predictStubKind. |
| 333 | if (PCODEToPINSTR(stubStartAddress) == NULL) |
| 334 | return SK_UNKNOWN; |
| 335 | |
| 336 | // Rather than calling IsInRange(stubStartAddress) for each possible stub kind |
| 337 | // we can peek at the assembly code and predict which kind of a stub we have |
| 338 | StubKind predictedKind = (usePredictStubKind) ? predictStubKind(stubStartAddress) : SK_UNKNOWN; |
| 339 | |
| 340 | if (predictedKind == SK_DISPATCH) |
| 341 | { |
| 342 | if (isDispatchingStub(stubStartAddress)) |
| 343 | return SK_DISPATCH; |
| 344 | } |
| 345 | else if (predictedKind == SK_LOOKUP) |
| 346 | { |
| 347 | if (isLookupStub(stubStartAddress)) |
| 348 | return SK_LOOKUP; |
| 349 | } |
| 350 | else if (predictedKind == SK_RESOLVE) |
| 351 | { |
| 352 | if (isResolvingStub(stubStartAddress)) |
| 353 | return SK_RESOLVE; |
| 354 | } |
| 355 | else if (predictedKind == SK_VTABLECALL) |
| 356 | { |
| 357 | if (isVTableCallStub(stubStartAddress)) |
| 358 | return SK_VTABLECALL; |
| 359 | } |
| 360 | |
| 361 | // This is the slow case. If the predict returned SK_UNKNOWN, SK_BREAKPOINT, |
| 362 | // or the predict was found to be incorrect when checked against the RangeLists |
| 363 | // (isXXXStub), then we'll check each stub heap in sequence. |
| 364 | if (isDispatchingStub(stubStartAddress)) |
| 365 | return SK_DISPATCH; |
| 366 | else if (isLookupStub(stubStartAddress)) |
| 367 | return SK_LOOKUP; |
| 368 | else if (isResolvingStub(stubStartAddress)) |
| 369 | return SK_RESOLVE; |
| 370 | else if (isVTableCallStub(stubStartAddress)) |
| 371 | return SK_VTABLECALL; |
| 372 | |
| 373 | return SK_UNKNOWN; |
| 374 | } |
| 375 | |
| 376 | inline BOOL isStub(PCODE stubStartAddress) |
| 377 | { |
| 378 | WRAPPER_NO_CONTRACT; |
| 379 | SUPPORTS_DAC; |
| 380 | |
| 381 | return (getStubKind(stubStartAddress) != SK_UNKNOWN); |
| 382 | } |
| 383 | |
| 384 | BOOL isDispatchingStub(PCODE stubStartAddress) |
| 385 | { |
| 386 | WRAPPER_NO_CONTRACT; |
| 387 | SUPPORTS_DAC; |
| 388 | |
| 389 | return GetDispatchRangeList()->IsInRange(stubStartAddress); |
| 390 | } |
| 391 | |
| 392 | BOOL isResolvingStub(PCODE stubStartAddress) |
| 393 | { |
| 394 | WRAPPER_NO_CONTRACT; |
| 395 | SUPPORTS_DAC; |
| 396 | |
| 397 | return GetResolveRangeList()->IsInRange(stubStartAddress); |
| 398 | } |
| 399 | |
| 400 | BOOL isLookupStub(PCODE stubStartAddress) |
| 401 | { |
| 402 | WRAPPER_NO_CONTRACT; |
| 403 | SUPPORTS_DAC; |
| 404 | |
| 405 | return GetLookupRangeList()->IsInRange(stubStartAddress); |
| 406 | } |
| 407 | |
| 408 | BOOL isVTableCallStub(PCODE stubStartAddress) |
| 409 | { |
| 410 | WRAPPER_NO_CONTRACT; |
| 411 | SUPPORTS_DAC; |
| 412 | |
| 413 | return GetVTableCallRangeList()->IsInRange(stubStartAddress); |
| 414 | } |
| 415 | |
| 416 | static BOOL isDispatchingStubStatic(PCODE addr) |
| 417 | { |
| 418 | WRAPPER_NO_CONTRACT; |
| 419 | StubKind stubKind; |
| 420 | FindStubManager(addr, &stubKind); |
| 421 | return stubKind == SK_DISPATCH; |
| 422 | } |
| 423 | |
| 424 | static BOOL isResolvingStubStatic(PCODE addr) |
| 425 | { |
| 426 | WRAPPER_NO_CONTRACT; |
| 427 | StubKind stubKind; |
| 428 | FindStubManager(addr, &stubKind); |
| 429 | return stubKind == SK_RESOLVE; |
| 430 | } |
| 431 | |
| 432 | static BOOL isLookupStubStatic(PCODE addr) |
| 433 | { |
| 434 | WRAPPER_NO_CONTRACT; |
| 435 | StubKind stubKind; |
| 436 | FindStubManager(addr, &stubKind); |
| 437 | return stubKind == SK_LOOKUP; |
| 438 | } |
| 439 | |
| 440 | static BOOL isVtableCallStubStatic(PCODE addr) |
| 441 | { |
| 442 | WRAPPER_NO_CONTRACT; |
| 443 | StubKind stubKind; |
| 444 | FindStubManager(addr, &stubKind); |
| 445 | return stubKind == SK_VTABLECALL; |
| 446 | } |
| 447 | |
| 448 | //use range lists to track the chunks of memory that are part of each heap |
| 449 | LockedRangeList lookup_rangeList; |
| 450 | LockedRangeList resolve_rangeList; |
| 451 | LockedRangeList dispatch_rangeList; |
| 452 | LockedRangeList cache_entry_rangeList; |
| 453 | LockedRangeList vtable_rangeList; |
| 454 | |
| 455 | // Get dac-ized pointers to rangelist. |
| 456 | RangeList* GetLookupRangeList() |
| 457 | { |
| 458 | SUPPORTS_DAC; |
| 459 | |
| 460 | TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, lookup_rangeList); |
| 461 | return PTR_RangeList(addr); |
| 462 | } |
| 463 | RangeList* GetResolveRangeList() |
| 464 | { |
| 465 | SUPPORTS_DAC; |
| 466 | |
| 467 | TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, resolve_rangeList); |
| 468 | return PTR_RangeList(addr); |
| 469 | } |
| 470 | RangeList* GetDispatchRangeList() |
| 471 | { |
| 472 | SUPPORTS_DAC; |
| 473 | |
| 474 | TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, dispatch_rangeList); |
| 475 | return PTR_RangeList(addr); |
| 476 | } |
| 477 | RangeList* GetCacheEntryRangeList() |
| 478 | { |
| 479 | SUPPORTS_DAC; |
| 480 | TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, cache_entry_rangeList); |
| 481 | return PTR_RangeList(addr); |
| 482 | } |
| 483 | RangeList* GetVTableCallRangeList() |
| 484 | { |
| 485 | SUPPORTS_DAC; |
| 486 | TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, vtable_rangeList); |
| 487 | return PTR_RangeList(addr); |
| 488 | } |
| 489 | |
| 490 | private: |
| 491 | |
| 492 | //allocate and initialize a stub of the desired kind |
| 493 | DispatchHolder *GenerateDispatchStub(PCODE addrOfCode, |
| 494 | PCODE addrOfFail, |
| 495 | void *pMTExpected, |
| 496 | size_t dispatchToken); |
| 497 | |
| 498 | #ifdef _TARGET_AMD64_ |
| 499 | // Used to allocate a long jump dispatch stub. See comment around |
| 500 | // m_fShouldAllocateLongJumpDispatchStubs for explaination. |
| 501 | DispatchHolder *GenerateDispatchStubLong(PCODE addrOfCode, |
| 502 | PCODE addrOfFail, |
| 503 | void *pMTExpected, |
| 504 | size_t dispatchToken); |
| 505 | #endif |
| 506 | |
| 507 | ResolveHolder *GenerateResolveStub(PCODE addrOfResolver, |
| 508 | PCODE addrOfPatcher, |
| 509 | size_t dispatchToken); |
| 510 | |
| 511 | LookupHolder *GenerateLookupStub(PCODE addrOfResolver, |
| 512 | size_t dispatchToken); |
| 513 | |
| 514 | VTableCallHolder* GenerateVTableCallStub(DWORD slot); |
| 515 | |
| 516 | template <typename STUB_HOLDER> |
| 517 | void AddToCollectibleVSDRangeList(STUB_HOLDER *holder) |
| 518 | { |
| 519 | if (isCollectible) |
| 520 | { |
| 521 | parentDomain->GetCollectibleVSDRanges()->AddRange(reinterpret_cast<BYTE *>(holder->stub()), |
| 522 | reinterpret_cast<BYTE *>(holder->stub()) + holder->stub()->size(), |
| 523 | this); |
| 524 | } |
| 525 | } |
| 526 | |
| 527 | // The resolve cache is static across all AppDomains |
| 528 | ResolveCacheElem *GenerateResolveCacheElem(void *addrOfCode, |
| 529 | void *pMTExpected, |
| 530 | size_t token); |
| 531 | |
| 532 | ResolveCacheElem *GetResolveCacheElem(void *pMT, |
| 533 | size_t token, |
| 534 | void *target); |
| 535 | |
| 536 | //Given a dispatch token, an object and a method table, determine the |
| 537 | //target address to go to. The return value (BOOL) states whether this address |
| 538 | //is cacheable or not. |
| 539 | static BOOL Resolver(MethodTable * pMT, |
| 540 | DispatchToken token, |
| 541 | OBJECTREF * protectedObj, |
| 542 | PCODE * ppTarget, |
| 543 | BOOL throwOnConflict); |
| 544 | |
| 545 | // This can be used to find a target without needing the ability to throw |
| 546 | static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace); |
| 547 | |
| 548 | public: |
| 549 | // Return the MethodDesc corresponding to this token. |
| 550 | static MethodDesc *GetRepresentativeMethodDescFromToken(DispatchToken token, MethodTable *pMT); |
| 551 | static MethodDesc *GetInterfaceMethodDescFromToken(DispatchToken token); |
| 552 | static MethodTable *GetTypeFromToken(DispatchToken token); |
| 553 | |
| 554 | //This is used to get the token out of a stub |
| 555 | static size_t GetTokenFromStub(PCODE stub); |
| 556 | |
| 557 | //This is used to get the token out of a stub and we know the stub manager and stub kind |
| 558 | static size_t GetTokenFromStubQuick(VirtualCallStubManager * pMgr, PCODE stub, StubKind kind); |
| 559 | |
| 560 | // General utility functions |
| 561 | // Quick lookup in the cache. NOTHROW, GC_NOTRIGGER |
| 562 | static PCODE CacheLookup(size_t token, UINT16 tokenHash, MethodTable *pMT); |
| 563 | |
| 564 | // Full exhaustive lookup. THROWS, GC_TRIGGERS |
| 565 | static PCODE GetTarget(DispatchToken token, MethodTable *pMT, BOOL throwOnConflict); |
| 566 | |
| 567 | private: |
| 568 | // Given a dispatch token, return true if the token represents an interface, false if just a slot. |
| 569 | static BOOL IsInterfaceToken(DispatchToken token); |
| 570 | |
| 571 | // Given a dispatch token, return true if the token represents a slot on the target. |
| 572 | static BOOL IsClassToken(DispatchToken token); |
| 573 | |
| 574 | #ifdef CHAIN_LOOKUP |
| 575 | static ResolveCacheElem* __fastcall PromoteChainEntry(ResolveCacheElem *pElem); |
| 576 | #endif |
| 577 | |
| 578 | // Flags used by the non-x86 versions of VSD_ResolveWorker |
| 579 | |
| 580 | #define SDF_ResolveBackPatch (0x01) |
| 581 | #define SDF_ResolvePromoteChain (0x02) |
| 582 | #define SDF_ResolveFlags (0x03) |
| 583 | |
| 584 | // These method needs to call the instance methods. |
| 585 | friend PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock, |
| 586 | TADDR siteAddrForRegisterIndirect, |
| 587 | size_t token |
| 588 | #ifndef _TARGET_X86_ |
| 589 | , UINT_PTR flags |
| 590 | #endif |
| 591 | ); |
| 592 | |
| 593 | #if defined(_TARGET_X86_) && defined(FEATURE_PAL) |
| 594 | friend void BackPatchWorkerStaticStub(PCODE returnAddr, TADDR siteAddrForRegisterIndirect); |
| 595 | #endif |
| 596 | |
| 597 | //These are the entrypoints that the stubs actually end up calling via the |
| 598 | // xxxAsmStub methods above |
| 599 | static void STDCALL BackPatchWorkerStatic(PCODE returnAddr, TADDR siteAddrForRegisterIndirect); |
| 600 | |
| 601 | public: |
| 602 | PCODE ResolveWorker(StubCallSite* pCallSite, OBJECTREF *protectedObj, DispatchToken token, StubKind stubKind); |
| 603 | void BackPatchWorker(StubCallSite* pCallSite); |
| 604 | |
| 605 | //Change the callsite to point to stub |
| 606 | void BackPatchSite(StubCallSite* pCallSite, PCODE stub); |
| 607 | |
| 608 | public: |
| 609 | /* the following two public functions are to support tracing or stepping thru |
| 610 | stubs via the debugger. */ |
| 611 | virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress); |
| 612 | virtual BOOL TraceManager(Thread *thread, |
| 613 | TraceDestination *trace, |
| 614 | T_CONTEXT *pContext, |
| 615 | BYTE **pRetAddr); |
| 616 | size_t GetSize() |
| 617 | { |
| 618 | LIMITED_METHOD_CONTRACT; |
| 619 | size_t retval=0; |
| 620 | if(indcell_heap) |
| 621 | retval+=indcell_heap->GetSize(); |
| 622 | if(cache_entry_heap) |
| 623 | retval+=cache_entry_heap->GetSize(); |
| 624 | if(lookup_heap) |
| 625 | retval+=lookup_heap->GetSize(); |
| 626 | if(dispatch_heap) |
| 627 | retval+=dispatch_heap->GetSize(); |
| 628 | if(resolve_heap) |
| 629 | retval+=resolve_heap->GetSize(); |
| 630 | return retval; |
| 631 | }; |
| 632 | |
| 633 | private: |
| 634 | /* the following two private functions are to support tracing or stepping thru |
| 635 | stubs via the debugger. */ |
| 636 | virtual BOOL DoTraceStub(PCODE stubStartAddress, |
| 637 | TraceDestination *trace); |
| 638 | |
| 639 | private: |
| 640 | // The parent domain of this manager |
| 641 | PTR_BaseDomain parentDomain; |
| 642 | bool isCollectible; |
| 643 | |
| 644 | BYTE * m_initialReservedMemForHeaps; |
| 645 | |
| 646 | static const UINT32 INDCELLS_PER_BLOCK = 32; // 32 indirection cells per block. |
| 647 | |
| 648 | CrstExplicitInit m_indCellLock; |
| 649 | |
| 650 | // List of free indirection cells. The cells were directly allocated from the loader heap |
| 651 | // (code:VirtualCallStubManager::GenerateStubIndirection) |
| 652 | BYTE * m_FreeIndCellList; |
| 653 | |
| 654 | // List of recycled indirection cells. The cells were recycled from finalized dynamic methods |
| 655 | // (code:LCGMethodResolver::RecycleIndCells). |
| 656 | BYTE * m_RecycledIndCellList; |
| 657 | |
| 658 | #ifndef DACCESS_COMPILE |
| 659 | // This methods returns the a free cell from m_FreeIndCellList. It returns NULL if the list is empty. |
| 660 | BYTE * GetOneFreeIndCell() |
| 661 | { |
| 662 | WRAPPER_NO_CONTRACT; |
| 663 | |
| 664 | return GetOneIndCell(&m_FreeIndCellList); |
| 665 | } |
| 666 | |
| 667 | // This methods returns the a recycled cell from m_RecycledIndCellList. It returns NULL if the list is empty. |
| 668 | BYTE * GetOneRecycledIndCell() |
| 669 | { |
| 670 | WRAPPER_NO_CONTRACT; |
| 671 | |
| 672 | return GetOneIndCell(&m_RecycledIndCellList); |
| 673 | } |
| 674 | |
| 675 | // This methods returns the a cell from ppList. It returns NULL if the list is empty. |
| 676 | BYTE * GetOneIndCell(BYTE ** ppList) |
| 677 | { |
| 678 | CONTRACT (BYTE*) { |
| 679 | NOTHROW; |
| 680 | GC_NOTRIGGER; |
| 681 | MODE_ANY; |
| 682 | PRECONDITION(CheckPointer(ppList)); |
| 683 | PRECONDITION(m_indCellLock.OwnedByCurrentThread()); |
| 684 | } CONTRACT_END; |
| 685 | |
| 686 | BYTE * temp = *ppList; |
| 687 | |
| 688 | if (temp) |
| 689 | { |
| 690 | BYTE * pNext = *((BYTE **)temp); |
| 691 | *ppList = pNext; |
| 692 | RETURN temp; |
| 693 | } |
| 694 | |
| 695 | RETURN NULL; |
| 696 | } |
| 697 | |
| 698 | // insert a linked list of indirection cells at the beginning of m_FreeIndCellList |
| 699 | void InsertIntoFreeIndCellList(BYTE * head, BYTE * tail) |
| 700 | { |
| 701 | WRAPPER_NO_CONTRACT; |
| 702 | |
| 703 | InsertIntoIndCellList(&m_FreeIndCellList, head, tail); |
| 704 | } |
| 705 | |
| 706 | // insert a linked list of indirection cells at the beginning of ppList |
| 707 | void InsertIntoIndCellList(BYTE ** ppList, BYTE * head, BYTE * tail) |
| 708 | { |
| 709 | CONTRACTL { |
| 710 | NOTHROW; |
| 711 | GC_NOTRIGGER; |
| 712 | PRECONDITION(CheckPointer(ppList)); |
| 713 | PRECONDITION(CheckPointer(head)); |
| 714 | PRECONDITION(CheckPointer(tail)); |
| 715 | PRECONDITION(m_indCellLock.OwnedByCurrentThread()); |
| 716 | } CONTRACTL_END; |
| 717 | |
| 718 | BYTE * temphead = *ppList; |
| 719 | *((BYTE**)tail) = temphead; |
| 720 | *ppList = head; |
| 721 | } |
| 722 | #endif // !DACCESS_COMPILE |
| 723 | |
| 724 | PTR_LoaderHeap indcell_heap; // indirection cells go here |
| 725 | PTR_LoaderHeap cache_entry_heap; // resolve cache elem entries go here |
| 726 | PTR_LoaderHeap lookup_heap; // lookup stubs go here |
| 727 | PTR_LoaderHeap dispatch_heap; // dispatch stubs go here |
| 728 | PTR_LoaderHeap resolve_heap; // resolve stubs go here |
| 729 | PTR_LoaderHeap vtable_heap; // vtable-based jump stubs go here |
| 730 | |
| 731 | #ifdef _TARGET_AMD64_ |
| 732 | // When we layout the stub heaps, we put them close together in a sequential order |
| 733 | // so that we maximize performance with respect to branch predictions. On AMD64, |
| 734 | // dispatch stubs use a rel32 jump on failure to the resolve stub. This works for |
| 735 | // a while because of the ordering, but as soon as we have to start allocating more |
| 736 | // memory for either the dispatch or resolve heaps we have a chance that we'll be |
| 737 | // further away than a rel32 jump can reach, because we're in a 64-bit address |
| 738 | // space. As such, this flag will indicate when we allocate the first dispatch stub |
| 739 | // that cannot reach a resolve stub, and when this happens we'll switch over to |
| 740 | // allocating the larger version of the dispatch stub which contains an abs64 jump. |
| 741 | //@TODO: This is a bit of a workaround, but the limitations of LoaderHeap require that we |
| 742 | //@TODO: take this approach. Hopefully in Orcas we'll have a chance to rewrite LoaderHeap. |
| 743 | BOOL m_fShouldAllocateLongJumpDispatchStubs; // Defaults to FALSE. |
| 744 | #endif |
| 745 | |
| 746 | BucketTable * lookups; // hash table of lookups keyed by tokens |
| 747 | BucketTable * cache_entries; // hash table of dispatch token/target structs for dispatch cache |
| 748 | BucketTable * dispatchers; // hash table of dispatching stubs keyed by tokens/actualtype |
| 749 | BucketTable * resolvers; // hash table of resolvers keyed by tokens/resolverstub |
| 750 | BucketTable * vtableCallers; // hash table of vtable call stubs keyed by slot values |
| 751 | |
| 752 | // This structure is used to keep track of the fail counters. |
| 753 | // We only need one fail counter per ResolveStub, |
| 754 | // and most programs use less than 250 ResolveStubs |
| 755 | // We allocate these on the main heap using "new counter block" |
| 756 | struct counter_block |
| 757 | { |
| 758 | static const UINT32 MAX_COUNTER_ENTRIES = 256-2; // 254 counters should be enough for most cases. |
| 759 | |
| 760 | counter_block * next; // the next block |
| 761 | UINT32 used; // the index of the next free entry |
| 762 | INT32 block[MAX_COUNTER_ENTRIES]; // the counters |
| 763 | }; |
| 764 | |
| 765 | counter_block *m_counters; // linked list of counter blocks of failure counters |
| 766 | counter_block *m_cur_counter_block; // current block for updating counts |
| 767 | counter_block *m_cur_counter_block_for_reclaim; // current block for updating |
| 768 | UINT32 m_cur_counter_block_for_reclaim_index; // index into the current block for updating |
| 769 | |
| 770 | // Used to keep track of all the VCSManager objects in the system. |
| 771 | PTR_VirtualCallStubManager m_pNext; // Linked list pointer |
| 772 | |
| 773 | public: |
| 774 | // Given a stub address, find the VCSManager that owns it. |
| 775 | static VirtualCallStubManager *FindStubManager(PCODE addr, |
| 776 | StubKind* wbStubKind = NULL, |
| 777 | BOOL usePredictStubKind = TRUE); |
| 778 | |
| 779 | #ifndef DACCESS_COMPILE |
| 780 | // insert a linked list of indirection cells at the beginning of m_RecycledIndCellList |
| 781 | void InsertIntoRecycledIndCellList_Locked(BYTE * head, BYTE * tail) |
| 782 | { |
| 783 | CONTRACTL { |
| 784 | NOTHROW; |
| 785 | GC_TRIGGERS; |
| 786 | MODE_ANY; |
| 787 | } CONTRACTL_END; |
| 788 | |
| 789 | CrstHolder lh(&m_indCellLock); |
| 790 | |
| 791 | InsertIntoIndCellList(&m_RecycledIndCellList, head, tail); |
| 792 | } |
| 793 | #endif // !DACCESS_COMPILE |
| 794 | |
| 795 | // These are the counters for keeping statistics |
| 796 | struct |
| 797 | { |
| 798 | UINT32 site_counter; //# of call sites |
| 799 | UINT32 stub_lookup_counter; //# of lookup stubs |
| 800 | UINT32 stub_poly_counter; //# of resolve stubs |
| 801 | UINT32 stub_mono_counter; //# of dispatch stubs |
| 802 | UINT32 stub_vtable_counter; //# of vtable call stubs |
| 803 | UINT32 site_write; //# of call site backpatch writes |
| 804 | UINT32 site_write_poly; //# of call site backpatch writes to point to resolve stubs |
| 805 | UINT32 site_write_mono; //# of call site backpatch writes to point to dispatch stubs |
| 806 | UINT32 worker_call; //# of calls into ResolveWorker |
| 807 | UINT32 worker_call_no_patch; //# of times call_worker resulted in no patch |
| 808 | UINT32 worker_collide_to_mono; //# of times we converted a poly stub to a mono stub instead of writing the cache entry |
| 809 | UINT32 stub_space; //# of bytes of stubs |
| 810 | UINT32 cache_entry_counter; //# of cache structs |
| 811 | UINT32 cache_entry_space; //# of bytes used by cache lookup structs |
| 812 | } stats; |
| 813 | |
| 814 | void LogStats(); |
| 815 | |
| 816 | #ifdef DACCESS_COMPILE |
| 817 | protected: |
| 818 | virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags); |
| 819 | virtual LPCWSTR GetStubManagerName(PCODE addr) |
| 820 | { |
| 821 | WRAPPER_NO_CONTRACT; |
| 822 | CONSISTENCY_CHECK(isStub(addr)); |
| 823 | |
| 824 | if (isLookupStub(addr)) |
| 825 | { |
| 826 | return W("VSD_LookupStub" ); |
| 827 | } |
| 828 | else if (isDispatchingStub(addr)) |
| 829 | { |
| 830 | return W("VSD_DispatchStub" ); |
| 831 | } |
| 832 | else |
| 833 | { |
| 834 | CONSISTENCY_CHECK(isResolvingStub(addr)); |
| 835 | return W("VSD_ResolveStub" ); |
| 836 | } |
| 837 | } |
| 838 | #endif |
| 839 | }; |
| 840 | |
| 841 | /******************************************************************************************************** |
| 842 | ********************************************************************************************************/ |
| 843 | typedef VPTR(class VirtualCallStubManagerManager) PTR_VirtualCallStubManagerManager; |
| 844 | |
| 845 | class VirtualCallStubManagerIterator; |
| 846 | class VirtualCallStubManagerManager : public StubManager |
| 847 | { |
| 848 | VPTR_VTABLE_CLASS(VirtualCallStubManagerManager, StubManager) |
| 849 | |
| 850 | friend class StubManager; |
| 851 | friend class VirtualCallStubManager; |
| 852 | friend class VirtualCallStubManagerIterator; |
| 853 | friend class StubManagerIterator; |
| 854 | |
| 855 | public: |
| 856 | virtual BOOL TraceManager(Thread *thread, TraceDestination *trace, |
| 857 | T_CONTEXT *pContext, BYTE **pRetAddr); |
| 858 | |
| 859 | virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress); |
| 860 | |
| 861 | virtual BOOL DoTraceStub(PCODE stubStartAddress, TraceDestination *trace); |
| 862 | |
| 863 | static MethodDesc *Entry2MethodDesc(PCODE stubStartAddress, MethodTable *pMT); |
| 864 | |
| 865 | #ifdef DACCESS_COMPILE |
| 866 | virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags); |
| 867 | virtual LPCWSTR GetStubManagerName(PCODE addr) |
| 868 | { WRAPPER_NO_CONTRACT; return FindVirtualCallStubManager(addr)->GetStubManagerName(addr); } |
| 869 | #endif |
| 870 | |
| 871 | private: |
| 872 | // Used to keep track of all the VCSManager objects in the system. |
| 873 | PTR_VirtualCallStubManager m_pManagers; // Head of the linked list |
| 874 | |
| 875 | #ifndef DACCESS_COMPILE |
| 876 | // Ctor. This is only used by StaticInit. |
| 877 | VirtualCallStubManagerManager(); |
| 878 | #endif |
| 879 | |
| 880 | // A cache element to quickly check the last matched manager. |
| 881 | Volatile<VirtualCallStubManager*> m_pCacheElem; |
| 882 | |
| 883 | // RW lock for reading entries and removing them. |
| 884 | SimpleRWLock m_RWLock; |
| 885 | |
| 886 | // This will look through all the managers in an intelligent fashion to |
| 887 | // find the manager that owns the address. |
| 888 | VirtualCallStubManager *FindVirtualCallStubManager(PCODE stubAddress); |
| 889 | |
| 890 | protected: |
| 891 | // Add a VCSManager to the linked list. |
| 892 | void AddStubManager(VirtualCallStubManager *pMgr); |
| 893 | |
| 894 | // Remove a VCSManager from the linked list. |
| 895 | void RemoveStubManager(VirtualCallStubManager *pMgr); |
| 896 | |
| 897 | VirtualCallStubManager *FirstManager() |
| 898 | { WRAPPER_NO_CONTRACT; return m_pManagers; } |
| 899 | |
| 900 | #ifndef DACCESS_COMPILE |
| 901 | static void InitStatic(); |
| 902 | #endif |
| 903 | |
| 904 | public: |
| 905 | SPTR_DECL(VirtualCallStubManagerManager, g_pManager); |
| 906 | |
| 907 | static VirtualCallStubManagerManager *GlobalManager() |
| 908 | { LIMITED_METHOD_DAC_CONTRACT; CONSISTENCY_CHECK(CheckPointer(g_pManager)); return g_pManager; } |
| 909 | |
| 910 | VirtualCallStubManagerIterator IterateVirtualCallStubManagers(); |
| 911 | |
| 912 | #ifdef _DEBUG |
| 913 | // Debug helper to help identify stub-managers. |
| 914 | virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManagerManager" ; } |
| 915 | #endif |
| 916 | }; |
| 917 | |
| 918 | /******************************************************************************************************** |
| 919 | ********************************************************************************************************/ |
| 920 | class VirtualCallStubManagerIterator |
| 921 | { |
| 922 | friend class VirtualCallStubManagerManager; |
| 923 | |
| 924 | public: |
| 925 | BOOL Next(); |
| 926 | VirtualCallStubManager *Current(); |
| 927 | |
| 928 | // Copy ctor |
| 929 | inline VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it); |
| 930 | |
| 931 | protected: |
| 932 | inline VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr); |
| 933 | |
| 934 | BOOL m_fIsStart; |
| 935 | VirtualCallStubManager *m_pCurMgr; |
| 936 | }; |
| 937 | |
| 938 | ///////////////////////////////////////////////////////////////////////////////////////////// |
| 939 | // Ctor |
| 940 | inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(VirtualCallStubManagerManager *pMgr) |
| 941 | : m_fIsStart(TRUE), m_pCurMgr(pMgr->m_pManagers) |
| 942 | { |
| 943 | LIMITED_METHOD_DAC_CONTRACT; |
| 944 | CONSISTENCY_CHECK(CheckPointer(pMgr)); |
| 945 | } |
| 946 | |
| 947 | ///////////////////////////////////////////////////////////////////////////////////////////// |
| 948 | // Copy ctor |
| 949 | inline VirtualCallStubManagerIterator::VirtualCallStubManagerIterator(const VirtualCallStubManagerIterator &it) |
| 950 | : m_fIsStart(it.m_fIsStart), m_pCurMgr(it.m_pCurMgr) |
| 951 | { |
| 952 | LIMITED_METHOD_DAC_CONTRACT; |
| 953 | } |
| 954 | |
| 955 | /******************************************************************************************************** |
| 956 | #StubDispatchNotes |
| 957 | |
| 958 | A note on approach. The cache and hash tables used by the stub and lookup mechanism |
| 959 | are designed with an eye to minimizing interlocking and/or syncing and/or locking operations. |
| 960 | They are intended to run in a highly concurrent environment. Since there is no magic, |
| 961 | some tradeoffs and and some implementation constraints are required. The basic notion |
| 962 | is that if all reads and writes are atomic and if all functions and operations operate |
| 963 | correctly in the face of commutative reorderings of the visibility of all reads and writes |
| 964 | across threads, then we don't have to interlock, sync, or serialize. Our approximation of |
| 965 | this is: |
| 966 | |
| 967 | 1. All reads and all writes to tables must be atomic. This effectively limits the actual entry |
| 968 | size in a table to be a pointer or a pointer sized thing. |
| 969 | |
| 970 | 2. All functions, like comparisons for equality or computation of hash values must function |
| 971 | correctly in the face of concurrent updating of the underlying table. This is accomplished |
| 972 | by making the underlying structures/entries effectively immutable, if concurrency is in anyway possible. |
| 973 | By effectively immutatable, we mean that the stub or token structure is either immutable or that |
| 974 | if it is ever written, all possibley concurrent writes are attempting to write the same value (atomically) |
| 975 | or that the competing (atomic) values do not affect correctness, and that the function operates correctly whether |
| 976 | or not any of the writes have taken place (is visible yet). The constraint we maintain is that all competeing |
| 977 | updates (and their visibility or lack thereof) do not alter the correctness of the program. |
| 978 | |
| 979 | 3. All tables are inexact. The counts they hold (e.g. number of contained entries) may be inaccurrate, |
| 980 | but that inaccurracy cannot affect their correctness. Table modifications, such as insertion of |
| 981 | an new entry may not succeed, but such failures cannot affect correctness. This implies that just |
| 982 | because a stub/entry is not present in a table, e.g. has been removed, that does not mean that |
| 983 | it is not in use. It also implies that internal table structures, such as discarded hash table buckets, |
| 984 | cannot be freely recycled since another concurrent thread may still be walking thru it. |
| 985 | |
| 986 | 4. Occassionaly it is necessary to pick up the pieces that have been dropped on the floor |
| 987 | so to speak, e.g. actually recycle hash buckets that aren't in use. Since we have a natural |
| 988 | sync point already in the GC, we use that to provide cleanup points. We need to make sure that code that |
| 989 | is walking our structures is not a GC safe point. Hence if the GC calls back into us inside the GC |
| 990 | sync point, we know that nobody is inside our stuctures and we can safely rearrange and recycle things. |
| 991 | ********************************************************************************************************/ |
| 992 | |
| 993 | //initial and increment value for fail stub counters |
| 994 | #ifdef STUB_LOGGING |
| 995 | extern UINT32 STUB_MISS_COUNT_VALUE; |
| 996 | extern UINT32 STUB_COLLIDE_WRITE_PCT; |
| 997 | extern UINT32 STUB_COLLIDE_MONO_PCT; |
| 998 | #else // !STUB_LOGGING |
| 999 | #define STUB_MISS_COUNT_VALUE 100 |
| 1000 | #define STUB_COLLIDE_WRITE_PCT 100 |
| 1001 | #define STUB_COLLIDE_MONO_PCT 0 |
| 1002 | #endif // !STUB_LOGGING |
| 1003 | |
| 1004 | //size and mask of the cache used by resolve stubs |
| 1005 | // CALL_STUB_CACHE_SIZE must be equal to 2^CALL_STUB_CACHE_NUM_BITS |
| 1006 | #define CALL_STUB_CACHE_NUM_BITS 12 //10 |
| 1007 | #define CALL_STUB_CACHE_SIZE 4096 //1024 |
| 1008 | #define CALL_STUB_CACHE_MASK (CALL_STUB_CACHE_SIZE-1) |
| 1009 | #define CALL_STUB_CACHE_PROBES 5 |
| 1010 | //min sizes for BucketTable and buckets and the growth and hashing constants |
| 1011 | #define CALL_STUB_MIN_BUCKETS 32 |
| 1012 | #define CALL_STUB_MIN_ENTRIES 4 |
| 1013 | //this is so that the very first growth will jump from 4 to 32 entries, then double from there. |
| 1014 | #define CALL_STUB_SECONDARY_ENTRIES 8 |
| 1015 | #define CALL_STUB_GROWTH_FACTOR 2 |
| 1016 | #define CALL_STUB_LOAD_FACTOR 90 |
| 1017 | #define CALL_STUB_HASH_CONST1 1327 |
| 1018 | #define CALL_STUB_HASH_CONST2 43627 |
| 1019 | #define LARGE_PRIME 7199369 |
| 1020 | //internal layout of buckets=size-1,count,entries.... |
| 1021 | #define CALL_STUB_MASK_INDEX 0 |
| 1022 | #define CALL_STUB_COUNT_INDEX 1 |
| 1023 | #define CALL_STUB_DEAD_LINK 2 |
| 1024 | #define CALL_STUB_FIRST_INDEX 3 |
| 1025 | //marker entries in cache and hash tables |
| 1026 | #define CALL_STUB_EMPTY_ENTRY 0 |
| 1027 | // number of successes for a chained element before it gets moved to the front |
| 1028 | #define CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT (0x100) |
| 1029 | |
| 1030 | /******************************************************************************************************* |
| 1031 | Entry is an abstract class. We will make specific subclasses for each kind of |
| 1032 | entry. Entries hold references to stubs or tokens. The principle thing they provide |
| 1033 | is a virtual Equals function that is used by the caching and hashing tables within which |
| 1034 | the stubs and tokens are stored. Entries are typically stack allocated by the routines |
| 1035 | that call into the hash and caching functions, and the functions stuff stubs into the entry |
| 1036 | to do the comparisons. Essentially specific entry subclasses supply a vtable to a stub |
| 1037 | as and when needed. This means we don't have to have vtables attached to stubs. |
| 1038 | |
| 1039 | Summarizing so far, there is a struct for each kind of stub or token of the form XXXXStub. |
| 1040 | They provide that actual storage layouts. |
| 1041 | There is a stuct in which each stub which has code is containted of the form XXXXHolder. |
| 1042 | They provide alignment and anciliary storage for the stub code. |
| 1043 | There is a subclass of Entry for each kind of stub or token, of the form XXXXEntry. |
| 1044 | They provide the specific implementations of the virtual functions declared in Entry. */ |
| 1045 | class Entry |
| 1046 | { |
| 1047 | public: |
| 1048 | //access and compare the keys of the entry |
| 1049 | virtual BOOL Equals(size_t keyA, size_t keyB)=0; |
| 1050 | virtual size_t KeyA()=0; |
| 1051 | virtual size_t KeyB()=0; |
| 1052 | |
| 1053 | //contents is the struct or token that the entry exposes |
| 1054 | virtual void SetContents(size_t contents)=0; |
| 1055 | }; |
| 1056 | |
| 1057 | /* define the platform specific Stubs and stub holders */ |
| 1058 | |
| 1059 | #include <virtualcallstubcpu.hpp> |
| 1060 | |
| 1061 | #if USES_LOOKUP_STUBS |
| 1062 | /********************************************************************************************** |
| 1063 | LookupEntry wraps LookupStubs and provide the concrete implementation of the abstract class Entry. |
| 1064 | Virtual and interface call sites when they are first jitted point to LookupStubs. The hash table |
| 1065 | that contains look up stubs is keyed by token, hence the Equals function uses the embedded token in |
| 1066 | the stub for comparison purposes. Since we are willing to allow duplicates in the hash table (as |
| 1067 | long as they are relatively rare) we do use direct comparison of the tokens rather than extracting |
| 1068 | the fields from within the tokens, for perf reasons. */ |
| 1069 | class LookupEntry : public Entry |
| 1070 | { |
| 1071 | public: |
| 1072 | //Creates an entry that wraps lookup stub s |
| 1073 | LookupEntry(size_t s) |
| 1074 | { |
| 1075 | LIMITED_METHOD_CONTRACT; |
| 1076 | _ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)s)); |
| 1077 | stub = (LookupStub*) s; |
| 1078 | } |
| 1079 | |
| 1080 | //default contructor to allow stack and inline allocation of lookup entries |
| 1081 | LookupEntry() {LIMITED_METHOD_CONTRACT; stub = NULL;} |
| 1082 | |
| 1083 | //implementations of abstract class Entry |
| 1084 | BOOL Equals(size_t keyA, size_t keyB) |
| 1085 | { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); } |
| 1086 | |
| 1087 | size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } |
| 1088 | size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; } |
| 1089 | |
| 1090 | void SetContents(size_t contents) |
| 1091 | { |
| 1092 | LIMITED_METHOD_CONTRACT; |
| 1093 | _ASSERTE(VirtualCallStubManager::isLookupStubStatic((PCODE)contents)); |
| 1094 | stub = LookupHolder::FromLookupEntry((PCODE)contents)->stub(); |
| 1095 | } |
| 1096 | |
| 1097 | //extract the token of the underlying lookup stub |
| 1098 | |
| 1099 | inline size_t Token() { LIMITED_METHOD_CONTRACT; return stub ? stub->token() : 0; } |
| 1100 | |
| 1101 | private: |
| 1102 | LookupStub* stub; //the stub the entry wrapping |
| 1103 | }; |
| 1104 | #endif // USES_LOOKUP_STUBS |
| 1105 | |
| 1106 | class VTableCallEntry : public Entry |
| 1107 | { |
| 1108 | public: |
| 1109 | //Creates an entry that wraps vtable call stub |
| 1110 | VTableCallEntry(size_t s) |
| 1111 | { |
| 1112 | LIMITED_METHOD_CONTRACT; |
| 1113 | _ASSERTE(VirtualCallStubManager::isVtableCallStubStatic((PCODE)s)); |
| 1114 | stub = (VTableCallStub*)s; |
| 1115 | } |
| 1116 | |
| 1117 | //default contructor to allow stack and inline allocation of vtable call entries |
| 1118 | VTableCallEntry() { LIMITED_METHOD_CONTRACT; stub = NULL; } |
| 1119 | |
| 1120 | //implementations of abstract class Entry |
| 1121 | BOOL Equals(size_t keyA, size_t keyB) |
| 1122 | { |
| 1123 | WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); |
| 1124 | } |
| 1125 | |
| 1126 | size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } |
| 1127 | size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; } |
| 1128 | |
| 1129 | void SetContents(size_t contents) |
| 1130 | { |
| 1131 | LIMITED_METHOD_CONTRACT; |
| 1132 | _ASSERTE(VirtualCallStubManager::isVtableCallStubStatic((PCODE)contents)); |
| 1133 | stub = VTableCallHolder::FromVTableCallEntry((PCODE)contents)->stub(); |
| 1134 | } |
| 1135 | |
| 1136 | //extract the token of the underlying lookup stub |
| 1137 | |
| 1138 | inline size_t Token() { LIMITED_METHOD_CONTRACT; return stub ? stub->token() : 0; } |
| 1139 | |
| 1140 | private: |
| 1141 | VTableCallStub* stub; //the stub the entry wrapping |
| 1142 | }; |
| 1143 | |
| 1144 | /********************************************************************************************** |
| 1145 | ResolveCacheEntry wraps a ResolveCacheElem and provides lookup functionality for entries that |
| 1146 | were created that may be added to the ResolveCache |
| 1147 | */ |
| 1148 | class ResolveCacheEntry : public Entry |
| 1149 | { |
| 1150 | public: |
| 1151 | ResolveCacheEntry(size_t elem) |
| 1152 | { |
| 1153 | LIMITED_METHOD_CONTRACT; |
| 1154 | _ASSERTE(elem != 0); |
| 1155 | pElem = (ResolveCacheElem*) elem; |
| 1156 | } |
| 1157 | |
| 1158 | //default contructor to allow stack and inline allocation of lookup entries |
| 1159 | ResolveCacheEntry() { LIMITED_METHOD_CONTRACT; pElem = NULL; } |
| 1160 | |
| 1161 | //access and compare the keys of the entry |
| 1162 | virtual BOOL Equals(size_t keyA, size_t keyB) |
| 1163 | { WRAPPER_NO_CONTRACT; return pElem && (keyA == KeyA()) && (keyB == KeyB()); } |
| 1164 | virtual size_t KeyA() |
| 1165 | { LIMITED_METHOD_CONTRACT; return pElem != NULL ? pElem->token : 0; } |
| 1166 | virtual size_t KeyB() |
| 1167 | { LIMITED_METHOD_CONTRACT; return pElem != NULL ? (size_t) pElem->pMT : 0; } |
| 1168 | |
| 1169 | //contents is the struct or token that the entry exposes |
| 1170 | virtual void SetContents(size_t contents) |
| 1171 | { |
| 1172 | LIMITED_METHOD_CONTRACT; |
| 1173 | pElem = (ResolveCacheElem*) contents; |
| 1174 | } |
| 1175 | |
| 1176 | inline const BYTE *Target() |
| 1177 | { |
| 1178 | LIMITED_METHOD_CONTRACT; |
| 1179 | return pElem != NULL ? (const BYTE *)pElem->target : NULL; |
| 1180 | } |
| 1181 | |
| 1182 | private: |
| 1183 | ResolveCacheElem *pElem; |
| 1184 | }; |
| 1185 | |
| 1186 | /********************************************************************************************** |
| 1187 | ResolveEntry wraps ResolveStubs and provide the concrete implementation of the abstract class Entry. |
| 1188 | Polymorphic call sites and monomorphic calls that fail end up in a ResolveStub. Resolve stubs |
| 1189 | are stored in hash tables keyed by token, hence the Equals function uses the embedded token in |
| 1190 | the stub for comparison purposes. Since we are willing to allow duplicates in the hash table (as |
| 1191 | long as they are relatively rare) we do use direct comparison of the tokens rather than extracting |
| 1192 | the fields from within the tokens, for perf reasons. */ |
| 1193 | class ResolveEntry : public Entry |
| 1194 | { |
| 1195 | public: |
| 1196 | //Creates an entry that wraps resolve stub s |
| 1197 | ResolveEntry (size_t s) |
| 1198 | { |
| 1199 | LIMITED_METHOD_CONTRACT; |
| 1200 | _ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)s)); |
| 1201 | stub = (ResolveStub*) s; |
| 1202 | } |
| 1203 | //default contructor to allow stack and inline allocation of resovler entries |
| 1204 | ResolveEntry() { LIMITED_METHOD_CONTRACT; stub = CALL_STUB_EMPTY_ENTRY; } |
| 1205 | |
| 1206 | //implementations of abstract class Entry |
| 1207 | inline BOOL Equals(size_t keyA, size_t keyB) |
| 1208 | { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); } |
| 1209 | inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } |
| 1210 | inline size_t KeyB() { WRAPPER_NO_CONTRACT; return (size_t)0; } |
| 1211 | |
| 1212 | void SetContents(size_t contents) |
| 1213 | { |
| 1214 | LIMITED_METHOD_CONTRACT; |
| 1215 | _ASSERTE(VirtualCallStubManager::isResolvingStubStatic((PCODE)contents)); |
| 1216 | stub = ResolveHolder::FromResolveEntry((PCODE)contents)->stub(); |
| 1217 | } |
| 1218 | //extract the token of the underlying resolve stub |
| 1219 | inline size_t Token() { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->token()) : 0; } |
| 1220 | private: |
| 1221 | ResolveStub* stub; //the stub the entry is wrapping |
| 1222 | }; |
| 1223 | |
| 1224 | /********************************************************************************************** |
| 1225 | DispatchEntry wraps DispatchStubs and provide the concrete implementation of the abstract class Entry. |
| 1226 | Monomorphic and mostly monomorphic call sites eventually point to DispatchStubs. Dispatch stubs |
| 1227 | are placed in hash and cache tables keyed by the expected Method Table and token they are built for. |
| 1228 | Since we are willing to allow duplicates in the hash table (as long as they are relatively rare) |
| 1229 | we do use direct comparison of the tokens rather than extracting the fields from within the tokens, |
| 1230 | for perf reasons.*/ |
| 1231 | class DispatchEntry : public Entry |
| 1232 | { |
| 1233 | public: |
| 1234 | //Creates an entry that wraps dispatch stub s |
| 1235 | DispatchEntry (size_t s) |
| 1236 | { |
| 1237 | LIMITED_METHOD_CONTRACT; |
| 1238 | _ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)s)); |
| 1239 | stub = (DispatchStub*) s; |
| 1240 | } |
| 1241 | //default contructor to allow stack and inline allocation of resovler entries |
| 1242 | DispatchEntry() { LIMITED_METHOD_CONTRACT; stub = CALL_STUB_EMPTY_ENTRY; } |
| 1243 | |
| 1244 | //implementations of abstract class Entry |
| 1245 | inline BOOL Equals(size_t keyA, size_t keyB) |
| 1246 | { WRAPPER_NO_CONTRACT; return stub && (keyA == KeyA()) && (keyB == KeyB()); } |
| 1247 | inline size_t KeyA() { WRAPPER_NO_CONTRACT; return Token(); } |
| 1248 | inline size_t KeyB() { WRAPPER_NO_CONTRACT; return ExpectedMT();} |
| 1249 | |
| 1250 | void SetContents(size_t contents) |
| 1251 | { |
| 1252 | LIMITED_METHOD_CONTRACT; |
| 1253 | _ASSERTE(VirtualCallStubManager::isDispatchingStubStatic((PCODE)contents)); |
| 1254 | stub = DispatchHolder::FromDispatchEntry((PCODE)contents)->stub(); |
| 1255 | } |
| 1256 | |
| 1257 | //extract the fields of the underlying dispatch stub |
| 1258 | inline size_t ExpectedMT() |
| 1259 | { WRAPPER_NO_CONTRACT; return stub ? (size_t)(stub->expectedMT()) : 0; } |
| 1260 | |
| 1261 | size_t Token() |
| 1262 | { |
| 1263 | WRAPPER_NO_CONTRACT; |
| 1264 | if (stub) |
| 1265 | { |
| 1266 | ResolveHolder * resolveHolder = ResolveHolder::FromFailEntry(stub->failTarget()); |
| 1267 | size_t token = resolveHolder->stub()->token(); |
| 1268 | _ASSERTE(token == VirtualCallStubManager::GetTokenFromStub((PCODE)stub)); |
| 1269 | return token; |
| 1270 | } |
| 1271 | else |
| 1272 | { |
| 1273 | return 0; |
| 1274 | } |
| 1275 | } |
| 1276 | |
| 1277 | inline PCODE Target() |
| 1278 | { WRAPPER_NO_CONTRACT; return stub ? stub->implTarget() : 0; } |
| 1279 | |
| 1280 | private: |
| 1281 | DispatchStub* stub; |
| 1282 | }; |
| 1283 | |
| 1284 | /************************************************************************************************* |
| 1285 | DispatchCache is the cache table that the resolve stubs use for inline polymorphic resolution |
| 1286 | of a call. The cache entry is logically a triplet of (method table, token, impl address) where method table |
| 1287 | is the type of the calling frame's <this>, token identifies the method being invoked, |
| 1288 | i.e. is a (type id,slot #) pair, and impl address is the address of the method implementation. |
| 1289 | */ |
| 1290 | class DispatchCache |
| 1291 | { |
| 1292 | public: |
| 1293 | static const UINT16 INVALID_HASH = (UINT16)(-1); |
| 1294 | |
| 1295 | DispatchCache(); |
| 1296 | |
| 1297 | //read and write the cache keyed by (method table,token) pair. |
| 1298 | inline ResolveCacheElem* Lookup(size_t token, void* mt) |
| 1299 | { WRAPPER_NO_CONTRACT; return Lookup(token, INVALID_HASH, mt);} |
| 1300 | |
| 1301 | ResolveCacheElem* Lookup(size_t token, UINT16 tokenHash, void* mt); |
| 1302 | |
| 1303 | enum InsertKind {IK_NONE, IK_DISPATCH, IK_RESOLVE, IK_SHARED, IK_EXTERNAL}; |
| 1304 | |
| 1305 | BOOL Insert(ResolveCacheElem* elem, InsertKind insertKind); |
| 1306 | #ifdef CHAIN_LOOKUP |
| 1307 | void PromoteChainEntry(ResolveCacheElem* elem); |
| 1308 | #endif |
| 1309 | |
| 1310 | // This is the heavyweight hashing algorithm. Use sparingly. |
| 1311 | static UINT16 HashToken(size_t token); |
| 1312 | |
| 1313 | inline void GetLoadFactor(size_t *total, size_t *used) |
| 1314 | { |
| 1315 | LIMITED_METHOD_CONTRACT; |
| 1316 | |
| 1317 | *total = CALL_STUB_CACHE_SIZE; |
| 1318 | size_t count = 0; |
| 1319 | for (size_t i = 0; i < CALL_STUB_CACHE_SIZE; i++) |
| 1320 | if (cache[i] != empty) |
| 1321 | count++; |
| 1322 | *used = count; |
| 1323 | } |
| 1324 | |
| 1325 | inline void *GetCacheBaseAddr() |
| 1326 | { LIMITED_METHOD_CONTRACT; return &cache[0]; } |
| 1327 | inline size_t GetCacheCount() |
| 1328 | { LIMITED_METHOD_CONTRACT; return CALL_STUB_CACHE_SIZE; } |
| 1329 | inline ResolveCacheElem *GetCacheEntry(size_t idx) |
| 1330 | { LIMITED_METHOD_CONTRACT; return VolatileLoad(&cache[idx]); } |
| 1331 | inline BOOL IsCacheEntryEmpty(size_t idx) |
| 1332 | { LIMITED_METHOD_CONTRACT; return cache[idx] == empty; } |
| 1333 | |
| 1334 | inline void SetCacheEntry(size_t idx, ResolveCacheElem *elem) |
| 1335 | { |
| 1336 | LIMITED_METHOD_CONTRACT; |
| 1337 | #ifdef STUB_LOGGING |
| 1338 | cacheData[idx].numWrites++; |
| 1339 | #endif |
| 1340 | #ifdef CHAIN_LOOKUP |
| 1341 | CONSISTENCY_CHECK(m_writeLock.OwnedByCurrentThread()); |
| 1342 | #endif |
| 1343 | cache[idx] = elem; |
| 1344 | } |
| 1345 | |
| 1346 | inline void ClearCacheEntry(size_t idx) |
| 1347 | { |
| 1348 | LIMITED_METHOD_CONTRACT; |
| 1349 | #ifdef STUB_LOGGING |
| 1350 | cacheData[idx].numClears++; |
| 1351 | #endif |
| 1352 | cache[idx] = empty; |
| 1353 | } |
| 1354 | |
| 1355 | struct |
| 1356 | { |
| 1357 | UINT32 insert_cache_external; //# of times Insert was called for IK_EXTERNAL |
| 1358 | UINT32 insert_cache_shared; //# of times Insert was called for IK_SHARED |
| 1359 | UINT32 insert_cache_dispatch; //# of times Insert was called for IK_DISPATCH |
| 1360 | UINT32 insert_cache_resolve; //# of times Insert was called for IK_RESOLVE |
| 1361 | UINT32 insert_cache_hit; //# of times Insert found an empty cache entry |
| 1362 | UINT32 insert_cache_miss; //# of times Insert already had a matching cache entry |
| 1363 | UINT32 insert_cache_collide; //# of times Insert found a used cache entry |
| 1364 | UINT32 insert_cache_write; //# of times Insert wrote a cache entry |
| 1365 | } stats; |
| 1366 | |
| 1367 | void LogStats(); |
| 1368 | |
| 1369 | // Unlocked iterator of entries. Use only when read/write access to the cache |
| 1370 | // is safe. This would typically be at GC sync points, currently needed during |
| 1371 | // appdomain unloading. |
| 1372 | class Iterator |
| 1373 | { |
| 1374 | public: |
| 1375 | Iterator(DispatchCache *pCache); |
| 1376 | inline BOOL IsValid() |
| 1377 | { WRAPPER_NO_CONTRACT; return (m_curBucket < (INT32)m_pCache->GetCacheCount()); } |
| 1378 | void Next(); |
| 1379 | // Unlink the current entry. |
| 1380 | // **NOTE** Using this method implicitly performs a call to Next to move |
| 1381 | // past the unlinked entry. Thus, one could accidentally skip |
| 1382 | // entries unless you take this into consideration. |
| 1383 | ResolveCacheElem *UnlinkEntry(); |
| 1384 | inline ResolveCacheElem *Entry() |
| 1385 | { LIMITED_METHOD_CONTRACT; CONSISTENCY_CHECK(IsValid()); return *m_ppCurElem; } |
| 1386 | |
| 1387 | private: |
| 1388 | void NextValidBucket(); |
| 1389 | inline void NextBucket() |
| 1390 | { LIMITED_METHOD_CONTRACT; m_curBucket++; m_ppCurElem = &m_pCache->cache[m_curBucket]; } |
| 1391 | |
| 1392 | DispatchCache *m_pCache; |
| 1393 | INT32 m_curBucket; |
| 1394 | ResolveCacheElem **m_ppCurElem; |
| 1395 | }; |
| 1396 | |
| 1397 | private: |
| 1398 | #ifdef CHAIN_LOOKUP |
| 1399 | Crst m_writeLock; |
| 1400 | #endif |
| 1401 | |
| 1402 | //the following hash computation is also inlined in the resolve stub in asm (SO NO TOUCHIE) |
| 1403 | inline static UINT16 HashMT(UINT16 tokenHash, void* mt) |
| 1404 | { |
| 1405 | LIMITED_METHOD_CONTRACT; |
| 1406 | |
| 1407 | UINT16 hash; |
| 1408 | |
| 1409 | size_t mtHash = (size_t) mt; |
| 1410 | mtHash = (((mtHash >> CALL_STUB_CACHE_NUM_BITS) + mtHash) >> LOG2_PTRSIZE) & CALL_STUB_CACHE_MASK; |
| 1411 | hash = (UINT16) mtHash; |
| 1412 | |
| 1413 | hash ^= (tokenHash & CALL_STUB_CACHE_MASK); |
| 1414 | |
| 1415 | return hash; |
| 1416 | } |
| 1417 | |
| 1418 | ResolveCacheElem* cache[CALL_STUB_CACHE_SIZE]; //must be first |
| 1419 | ResolveCacheElem* empty; //empty entry, initialized to fail all comparisons |
| 1420 | #ifdef STUB_LOGGING |
| 1421 | public: |
| 1422 | struct CacheEntryData { |
| 1423 | UINT32 numWrites; |
| 1424 | UINT16 numClears; |
| 1425 | }; |
| 1426 | CacheEntryData cacheData[CALL_STUB_CACHE_SIZE]; |
| 1427 | #endif // STUB_LOGGING |
| 1428 | }; |
| 1429 | |
| 1430 | /************************************************************************************************** |
| 1431 | The hash tables are accessed via instances of the Prober. Prober is a probe into a bucket |
| 1432 | of the hash table, and therefore has an index which is the current probe position. |
| 1433 | It includes a count of the number of probes done in that bucket so far and a stride |
| 1434 | to step thru the bucket with. To do comparisons, it has a reference to an entry with which |
| 1435 | it can do comparisons (Equals(...)) of the entries (stubs) inside the hash table. It also has |
| 1436 | the key pair (keyA, keyB) that it is looking for. |
| 1437 | |
| 1438 | Typically, an entry of the appropriate type is created on the stack and then the prober is created passing |
| 1439 | in a reference to the entry. The prober is used for a complete operation, such as look for and find an |
| 1440 | entry (stub), creating and inserting it as necessary. |
| 1441 | |
| 1442 | The initial index and the stride are orthogonal hashes of the key pair, i.e. we are doing a varient of |
| 1443 | double hashing. When we initialize the prober (see FormHash below) we set the initial probe based on |
| 1444 | one hash. The stride (used as a modulo addition of the probe position) is based on a different hash and |
| 1445 | is such that it will vist every location in the bucket before repeating. Hence it is imperative that |
| 1446 | the bucket size and the stride be relative prime wrt each other. We have chosen to make bucket sizes |
| 1447 | a power of 2, so we force stride to be odd. |
| 1448 | |
| 1449 | Note -- it must be assumed that multiple probers are walking the same tables and buckets at the same time. |
| 1450 | Additionally, the counts may not be accurate, and there may be duplicates in the tables. Since the tables |
| 1451 | do not allow concurrrent deletion, some of the concurrency issues are ameliorated. |
| 1452 | */ |
| 1453 | class Prober |
| 1454 | { |
| 1455 | friend class FastTable; |
| 1456 | friend class BucketTable; |
| 1457 | public: |
| 1458 | Prober(Entry* e) {LIMITED_METHOD_CONTRACT; comparer = e;} |
| 1459 | //find the requested entry, if not there return CALL_STUB_EMPTY_ENTRY |
| 1460 | size_t Find(); |
| 1461 | //add the entry into the bucket, if it is not already in the bucket. |
| 1462 | //return the entry actually in the bucket (existing or added) |
| 1463 | size_t Add(size_t entry); |
| 1464 | private: |
| 1465 | //return the bucket (FastTable*) that the prober is currently walking |
| 1466 | inline size_t* items() {LIMITED_METHOD_CONTRACT; return &base[-CALL_STUB_FIRST_INDEX];} |
| 1467 | //are there more probes possible, or have we probed everything in the bucket |
| 1468 | inline BOOL NoMore() {LIMITED_METHOD_CONTRACT; return probes>mask;} //both probes and mask are (-1) |
| 1469 | //advance the probe to a new place in the bucket |
| 1470 | inline BOOL Next() |
| 1471 | { |
| 1472 | WRAPPER_NO_CONTRACT; |
| 1473 | index = (index + stride) & mask; |
| 1474 | probes++; |
| 1475 | return !NoMore(); |
| 1476 | } |
| 1477 | inline size_t Read() |
| 1478 | { |
| 1479 | LIMITED_METHOD_CONTRACT; |
| 1480 | _ASSERTE(base); |
| 1481 | return VolatileLoad(&base[index]); |
| 1482 | } |
| 1483 | //initialize a prober across a bucket (table) for the specified keys. |
| 1484 | void InitProber(size_t key1, size_t key2, size_t* table); |
| 1485 | //set up the initial index and stride and probe count |
| 1486 | inline void FormHash() |
| 1487 | { |
| 1488 | LIMITED_METHOD_CONTRACT; |
| 1489 | |
| 1490 | probes = 0; |
| 1491 | //these two hash functions have not been formally measured for effectiveness |
| 1492 | //but they are at least orthogonal |
| 1493 | |
| 1494 | size_t a = ((keyA>>16) + keyA); |
| 1495 | size_t b = ((keyB>>16) ^ keyB); |
| 1496 | index = (((a*CALL_STUB_HASH_CONST1)>>4)+((b*CALL_STUB_HASH_CONST2)>>4)+CALL_STUB_HASH_CONST1) & mask; |
| 1497 | stride = ((a+(b*CALL_STUB_HASH_CONST1)+CALL_STUB_HASH_CONST2) | 1) & mask; |
| 1498 | } |
| 1499 | //atomically grab an empty slot so we can insert a new entry into the bucket |
| 1500 | BOOL GrabEntry(size_t entryValue); |
| 1501 | size_t keyA; //key pair we are looking for |
| 1502 | size_t keyB; |
| 1503 | size_t* base; //we have our own pointer to the bucket, so races don't matter. |
| 1504 | // We won't care if we do the lookup in an |
| 1505 | // outdated bucket (has grown out from under us). |
| 1506 | // All that will happen is possibly dropping an entry |
| 1507 | // on the floor or adding a duplicate. |
| 1508 | size_t index; //current probe point in the bucket |
| 1509 | size_t stride; //amount to step on each successive probe, must be relatively prime wrt the bucket size |
| 1510 | size_t mask; //size of bucket - 1 |
| 1511 | size_t probes; //number probes - 1 |
| 1512 | Entry* comparer;//used to compare an entry against the sought after key pair |
| 1513 | }; |
| 1514 | |
| 1515 | /******************************************************************************************************** |
| 1516 | FastTable is used to implement the buckets of a BucketTable, a bucketized hash table. A FastTable is |
| 1517 | an array of entries (contents). The first two slots of contents store the size-1 and count of entries |
| 1518 | actually in the FastTable. Note that the count may be inaccurate and there may be duplicates. Careful |
| 1519 | attention must be paid to eliminate the need for interlocked or serialized or locked operations in face |
| 1520 | of concurrency. |
| 1521 | */ |
| 1522 | #ifdef _MSC_VER |
| 1523 | #pragma warning(push) |
| 1524 | #pragma warning(disable : 4200) // disable zero-sized array warning |
| 1525 | #endif // _MSC_VER |
| 1526 | class FastTable |
| 1527 | { |
| 1528 | friend class BucketTable; |
| 1529 | public: |
| 1530 | private: |
| 1531 | FastTable() { LIMITED_METHOD_CONTRACT; } |
| 1532 | ~FastTable() { LIMITED_METHOD_CONTRACT; } |
| 1533 | |
| 1534 | //initialize a prober for the specified keys. |
| 1535 | inline BOOL SetUpProber(size_t keyA, size_t keyB, Prober* probe) |
| 1536 | { |
| 1537 | CONTRACTL { |
| 1538 | NOTHROW; |
| 1539 | GC_NOTRIGGER; |
| 1540 | FORBID_FAULT; |
| 1541 | } CONTRACTL_END; |
| 1542 | |
| 1543 | _ASSERTE(probe); |
| 1544 | _ASSERTE(contents); |
| 1545 | probe->InitProber(keyA, keyB, &contents[0]); |
| 1546 | return TRUE; |
| 1547 | } |
| 1548 | //find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY |
| 1549 | size_t Find(Prober* probe); |
| 1550 | //add the entry, if it is not already there. Probe is used to search. |
| 1551 | //Return the entry actually containted (existing or added) |
| 1552 | size_t Add(size_t entry, Prober* probe); |
| 1553 | void IncrementCount(); |
| 1554 | |
| 1555 | // Create a FastTable with space for numberOfEntries. Please note that this method |
| 1556 | // does not throw on OOM. **YOU MUST CHECK FOR NULL RETURN** |
| 1557 | static FastTable* MakeTable(size_t numberOfEntries) |
| 1558 | { |
| 1559 | CONTRACTL { |
| 1560 | THROWS; |
| 1561 | GC_TRIGGERS; |
| 1562 | INJECT_FAULT(COMPlusThrowOM();); |
| 1563 | } CONTRACTL_END; |
| 1564 | |
| 1565 | size_t size = CALL_STUB_MIN_ENTRIES; |
| 1566 | while (size < numberOfEntries) {size = size<<1;} |
| 1567 | // if (size == CALL_STUB_MIN_ENTRIES) |
| 1568 | // size += 3; |
| 1569 | size_t* bucket = new size_t[(sizeof(FastTable)/sizeof(size_t))+size+CALL_STUB_FIRST_INDEX]; |
| 1570 | FastTable* table = new (bucket) FastTable(); |
| 1571 | table->InitializeContents(size); |
| 1572 | return table; |
| 1573 | } |
| 1574 | //Initialize as empty |
| 1575 | void InitializeContents(size_t size) |
| 1576 | { |
| 1577 | LIMITED_METHOD_CONTRACT; |
| 1578 | memset(&contents[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(BYTE*)); |
| 1579 | contents[CALL_STUB_MASK_INDEX] = size-1; |
| 1580 | } |
| 1581 | inline size_t tableMask() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_MASK_INDEX]);} |
| 1582 | inline size_t tableSize() {LIMITED_METHOD_CONTRACT; return tableMask()+1;} |
| 1583 | inline size_t tableCount() {LIMITED_METHOD_CONTRACT; return (size_t) (contents[CALL_STUB_COUNT_INDEX]);} |
| 1584 | inline BOOL isFull() |
| 1585 | { |
| 1586 | LIMITED_METHOD_CONTRACT; |
| 1587 | return (tableCount()+1) * 100 / CALL_STUB_LOAD_FACTOR >= tableSize(); |
| 1588 | } |
| 1589 | //we store (size-1) in bucket[CALL_STUB_MASK_INDEX==0], |
| 1590 | //we store the used count in bucket[CALL_STUB_COUNT_INDEX==1], |
| 1591 | //we have an unused cell to use as a temp at bucket[CALL_STUB_DEAD_LINK==2], |
| 1592 | //and the table starts at bucket[CALL_STUB_FIRST_INDEX==3], |
| 1593 | size_t contents[]; |
| 1594 | }; |
| 1595 | #ifdef _MSC_VER |
| 1596 | #pragma warning(pop) |
| 1597 | #endif |
| 1598 | |
| 1599 | /****************************************************************************************************** |
| 1600 | BucketTable is a bucketized hash table. It uses FastTables for its buckets. The hash tables |
| 1601 | used by the VirtualCallStubManager are BucketTables. The number of buckets is fixed at the time |
| 1602 | the table is created. The actual buckets are allocated as needed, and grow as necessary. The reason |
| 1603 | for using buckets is primarily to reduce the cost of growing, since only a single bucket is actually |
| 1604 | grown at any given time. Since the hash tables are accessed infrequently, the load factor that |
| 1605 | controls growth is quite high (90%). Since we use hashing to pick the bucket, and we use hashing to |
| 1606 | lookup inside the bucket, it is important that the hashing function used here is orthogonal to the ones |
| 1607 | used in the buckets themselves (see FastTable::FormHash). |
| 1608 | */ |
| 1609 | class BucketTable |
| 1610 | { |
| 1611 | public: |
| 1612 | BucketTable(size_t numberOfBuckets) |
| 1613 | { |
| 1614 | WRAPPER_NO_CONTRACT; |
| 1615 | size_t size = CALL_STUB_MIN_BUCKETS; |
| 1616 | while (size < numberOfBuckets) {size = size<<1;} |
| 1617 | buckets = AllocateBuckets(size); |
| 1618 | // Initialize statistics counters |
| 1619 | memset(&stats, 0, sizeof(stats)); |
| 1620 | } |
| 1621 | |
| 1622 | ~BucketTable() |
| 1623 | { |
| 1624 | LIMITED_METHOD_CONTRACT; |
| 1625 | if(buckets != NULL) |
| 1626 | { |
| 1627 | size_t size = bucketCount()+CALL_STUB_FIRST_INDEX; |
| 1628 | for(size_t ix = CALL_STUB_FIRST_INDEX; ix < size; ix++) delete (FastTable*)(buckets[ix]); |
| 1629 | delete buckets; |
| 1630 | } |
| 1631 | } |
| 1632 | |
| 1633 | //initialize a prober for the specified keys. |
| 1634 | BOOL SetUpProber(size_t keyA, size_t keyB, Prober *prober); |
| 1635 | //find the requested entry (keys of prober), if not there return CALL_STUB_EMPTY_ENTRY |
| 1636 | inline size_t Find(Prober* probe) {WRAPPER_NO_CONTRACT; return probe->Find();} |
| 1637 | //add the entry, if it is not already there. Probe is used to search. |
| 1638 | size_t Add(size_t entry, Prober* probe); |
| 1639 | //reclaim abandoned buckets. Buckets are abaondoned when they need to grow. |
| 1640 | //needs to be called inside a gc sync point. |
| 1641 | static void Reclaim(); |
| 1642 | |
| 1643 | struct |
| 1644 | { |
| 1645 | UINT32 bucket_space; //# of bytes in caches and tables, not including the stubs themselves |
| 1646 | UINT32 bucket_space_dead; //# of bytes of abandoned buckets not yet recycled. |
| 1647 | } stats; |
| 1648 | |
| 1649 | void LogStats(); |
| 1650 | |
| 1651 | private: |
| 1652 | inline size_t bucketMask() {LIMITED_METHOD_CONTRACT; return (size_t) (buckets[CALL_STUB_MASK_INDEX]);} |
| 1653 | inline size_t bucketCount() {LIMITED_METHOD_CONTRACT; return bucketMask()+1;} |
| 1654 | inline size_t ComputeBucketIndex(size_t keyA, size_t keyB) |
| 1655 | { |
| 1656 | LIMITED_METHOD_CONTRACT; |
| 1657 | size_t a = ((keyA>>16) + keyA); |
| 1658 | size_t b = ((keyB>>16) ^ keyB); |
| 1659 | return CALL_STUB_FIRST_INDEX+(((((a*CALL_STUB_HASH_CONST2)>>5)^((b*CALL_STUB_HASH_CONST1)>>5))+CALL_STUB_HASH_CONST2) & bucketMask()); |
| 1660 | } |
| 1661 | //grows the bucket referenced by probe. |
| 1662 | BOOL GetMoreSpace(const Prober* probe); |
| 1663 | //creates storage in which to store references to the buckets |
| 1664 | static size_t* AllocateBuckets(size_t size) |
| 1665 | { |
| 1666 | LIMITED_METHOD_CONTRACT; |
| 1667 | size_t* buckets = new size_t[size+CALL_STUB_FIRST_INDEX]; |
| 1668 | if (buckets != NULL) |
| 1669 | { |
| 1670 | memset(&buckets[0], CALL_STUB_EMPTY_ENTRY, (size+CALL_STUB_FIRST_INDEX)*sizeof(void*)); |
| 1671 | buckets[CALL_STUB_MASK_INDEX] = size-1; |
| 1672 | } |
| 1673 | return buckets; |
| 1674 | } |
| 1675 | inline size_t Read(size_t index) |
| 1676 | { |
| 1677 | LIMITED_METHOD_CONTRACT; |
| 1678 | CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX); |
| 1679 | return VolatileLoad(&buckets[index]); |
| 1680 | } |
| 1681 | |
| 1682 | #ifdef _MSC_VER |
| 1683 | #pragma warning(disable: 4267) //work-around for the compiler |
| 1684 | #endif |
| 1685 | inline void Write(size_t index, size_t value) |
| 1686 | { |
| 1687 | LIMITED_METHOD_CONTRACT; |
| 1688 | CONSISTENCY_CHECK(index <= bucketMask()+CALL_STUB_FIRST_INDEX); |
| 1689 | VolatileStore(&buckets[index], value); |
| 1690 | } |
| 1691 | #ifdef _MSC_VER |
| 1692 | #pragma warning(default: 4267) |
| 1693 | #endif |
| 1694 | |
| 1695 | // We store (#buckets-1) in bucket[CALL_STUB_MASK_INDEX ==0] |
| 1696 | // We have two unused cells at bucket[CALL_STUB_COUNT_INDEX ==1] |
| 1697 | // and bucket[CALL_STUB_DEAD_LINK ==2] |
| 1698 | // and the table starts at bucket[CALL_STUB_FIRST_INDEX ==3] |
| 1699 | // the number of elements is bucket[CALL_STUB_MASK_INDEX]+CALL_STUB_FIRST_INDEX |
| 1700 | size_t* buckets; |
| 1701 | static FastTable* dead; //linked list head of to be deleted (abandoned) buckets |
| 1702 | }; |
| 1703 | |
| 1704 | #endif // !CROSSGEN_COMPILE |
| 1705 | |
| 1706 | #endif // !_VIRTUAL_CALL_STUB_H |
| 1707 | |