1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | // |
5 | // threadsuspend.CPP |
6 | // |
7 | // This file contains the implementation of thread suspension. The implementation of thread suspension |
8 | // used to be spread through multiple places. That is why, many methods still live in their own homes |
9 | // (class Thread, class ThreadStore, etc.). They should be eventually refactored into class ThreadSuspend. |
10 | // |
11 | |
12 | #include "common.h" |
13 | |
14 | #include "threadsuspend.h" |
15 | |
16 | #include "finalizerthread.h" |
17 | #include "dbginterface.h" |
18 | |
19 | #include "mdaassistants.h" |
20 | |
21 | // from ntstatus.h |
22 | #define STATUS_SUSPEND_COUNT_EXCEEDED ((NTSTATUS)0xC000004AL) |
23 | |
24 | #define HIJACK_NONINTERRUPTIBLE_THREADS |
25 | |
26 | bool ThreadSuspend::s_fSuspendRuntimeInProgress = false; |
27 | |
28 | CLREvent* ThreadSuspend::g_pGCSuspendEvent = NULL; |
29 | |
30 | ThreadSuspend::SUSPEND_REASON ThreadSuspend::m_suspendReason; |
31 | Thread* ThreadSuspend::m_pThreadAttemptingSuspendForGC; |
32 | |
33 | CLREventBase * ThreadSuspend::s_hAbortEvt = NULL; |
34 | CLREventBase * ThreadSuspend::s_hAbortEvtCache = NULL; |
35 | |
36 | // If you add any thread redirection function, make sure the debugger can 1) recognize the redirection |
37 | // function, and 2) retrieve the original CONTEXT. See code:Debugger.InitializeHijackFunctionAddress and |
38 | // code:DacDbiInterfaceImpl.RetrieveHijackedContext. |
39 | extern "C" void RedirectedHandledJITCaseForGCThreadControl_Stub(void); |
40 | extern "C" void RedirectedHandledJITCaseForDbgThreadControl_Stub(void); |
41 | extern "C" void RedirectedHandledJITCaseForUserSuspend_Stub(void); |
42 | |
43 | #define GetRedirectHandlerForGCThreadControl() \ |
44 | ((PFN_REDIRECTTARGET) GetEEFuncEntryPoint(RedirectedHandledJITCaseForGCThreadControl_Stub)) |
45 | #define GetRedirectHandlerForDbgThreadControl() \ |
46 | ((PFN_REDIRECTTARGET) GetEEFuncEntryPoint(RedirectedHandledJITCaseForDbgThreadControl_Stub)) |
47 | #define GetRedirectHandlerForUserSuspend() \ |
48 | ((PFN_REDIRECTTARGET) GetEEFuncEntryPoint(RedirectedHandledJITCaseForUserSuspend_Stub)) |
49 | |
50 | #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM_) || defined(_TARGET_ARM64_) |
51 | #if defined(HAVE_GCCOVER) && defined(USE_REDIRECT_FOR_GCSTRESS) // GCCOVER |
52 | extern "C" void RedirectedHandledJITCaseForGCStress_Stub(void); |
53 | #define GetRedirectHandlerForGCStress() \ |
54 | ((PFN_REDIRECTTARGET) GetEEFuncEntryPoint(RedirectedHandledJITCaseForGCStress_Stub)) |
55 | #endif // HAVE_GCCOVER && USE_REDIRECT_FOR_GCSTRESS |
56 | #endif // _TARGET_AMD64_ || _TARGET_ARM_ |
57 | |
58 | |
59 | // Every PING_JIT_TIMEOUT ms, check to see if a thread in JITted code has wandered |
60 | // into some fully interruptible code (or should have a different hijack to improve |
61 | // our chances of snagging it at a safe spot). |
62 | #define PING_JIT_TIMEOUT 10 |
63 | |
64 | // When we find a thread in a spot that's not safe to abort -- how long to wait before |
65 | // we try again. |
66 | #define ABORT_POLL_TIMEOUT 10 |
67 | #ifdef _DEBUG |
68 | #define ABORT_FAIL_TIMEOUT 40000 |
69 | #endif // _DEBUG |
70 | |
71 | // |
72 | // CANNOT USE IsBad*Ptr() methods here. They are *banned* APIs because of various |
73 | // reasons (see http://winweb/wincet/bannedapis.htm). |
74 | // |
75 | #define IS_VALID_WRITE_PTR(addr, size) _ASSERTE(addr != NULL) |
76 | #define IS_VALID_CODE_PTR(addr) _ASSERTE(addr != NULL) |
77 | |
78 | |
79 | void ThreadSuspend::SetSuspendRuntimeInProgress() |
80 | { |
81 | LIMITED_METHOD_CONTRACT; |
82 | _ASSERTE(ThreadStore::HoldingThreadStore() || IsAtProcessExit()); |
83 | _ASSERTE(!s_fSuspendRuntimeInProgress || IsAtProcessExit()); |
84 | s_fSuspendRuntimeInProgress = true; |
85 | } |
86 | |
87 | void ThreadSuspend::ResetSuspendRuntimeInProgress() |
88 | { |
89 | LIMITED_METHOD_CONTRACT; |
90 | _ASSERTE(ThreadStore::HoldingThreadStore() || IsAtProcessExit()); |
91 | _ASSERTE(s_fSuspendRuntimeInProgress || IsAtProcessExit()); |
92 | s_fSuspendRuntimeInProgress = false; |
93 | } |
94 | |
95 | |
96 | // When SuspendThread returns, target thread may still be executing user code. |
97 | // We can not access data, e.g. m_fPreemptiveGCDisabled, changed by target thread. |
98 | // But our code depends on reading these data. To make this operation safe, we |
99 | // call GetThreadContext which returns only after target thread does not execute |
100 | // any user code. |
101 | |
102 | // Message from David Cutler |
103 | /* |
104 | After SuspendThread returns, can the suspended thread continue to execute code in user mode? |
105 | |
106 | [David Cutler] The suspended thread cannot execute any more user code, but it might be currently "running" |
107 | on a logical processor whose other logical processor is currently actually executing another thread. |
108 | In this case the target thread will not suspend until the hardware switches back to executing instructions |
109 | on its logical processor. In this case even the memory barrier would not necessarily work - a better solution |
110 | would be to use interlocked operations on the variable itself. |
111 | |
112 | After SuspendThread returns, does the store buffer of the CPU for the suspended thread still need to drain? |
113 | |
114 | Historically, we've assumed that the answer to both questions is No. But on one 4/8 hyper-threaded machine |
115 | running Win2K3 SP1 build 1421, we've seen two stress failures where SuspendThread returns while writes seem to still be in flight. |
116 | |
117 | Usually after we suspend a thread, we then call GetThreadContext. This seems to guarantee consistency. |
118 | But there are places we would like to avoid GetThreadContext, if it's safe and legal. |
119 | |
120 | [David Cutler] Get context delivers a APC to the target thread and waits on an event that will be set |
121 | when the target thread has delivered its context. |
122 | |
123 | Chris. |
124 | */ |
125 | |
126 | // Message from Neill Clift |
127 | /* |
128 | What SuspendThread does is insert an APC block into a target thread and request an inter-processor interrupt to |
129 | do the APC interrupt. It doesn't wait till the thread actually enters some state or the interrupt has been serviced. |
130 | |
131 | I took a quick look at the APIC spec in the Intel manuals this morning. Writing to the APIC posts a message on a bus. |
132 | Processors accept messages and presumably queue the s/w interrupts at this time. We don't wait for this acceptance |
133 | when we send the IPI so at least on APIC machines when you suspend a thread it continues to execute code for some short time |
134 | after the routine returns. We use other mechanisms for IPI and so it could work differently on different h/w. |
135 | |
136 | */ |
137 | BOOL EnsureThreadIsSuspended (HANDLE hThread, Thread* pThread) |
138 | { |
139 | STATIC_CONTRACT_NOTHROW; |
140 | STATIC_CONTRACT_GC_NOTRIGGER; |
141 | |
142 | WRAPPER_NO_CONTRACT; |
143 | |
144 | CONTEXT ctx; |
145 | ctx.ContextFlags = CONTEXT_INTEGER; |
146 | |
147 | BOOL ret; |
148 | ret = ::GetThreadContext(hThread, &ctx); |
149 | return ret; |
150 | } |
151 | |
152 | FORCEINLINE VOID MyEnterLogLock() |
153 | { |
154 | EnterLogLock(); |
155 | } |
156 | FORCEINLINE VOID MyLeaveLogLock() |
157 | { |
158 | LeaveLogLock(); |
159 | } |
160 | |
161 | // On non-Windows CORECLR platforms remove Thread::SuspendThread support |
162 | #ifndef DISABLE_THREADSUSPEND |
163 | // SuspendThread |
164 | // Attempts to OS-suspend the thread, whichever GC mode it is in. |
165 | // Arguments: |
166 | // fOneTryOnly - If TRUE, report failure if the thread has its |
167 | // m_dwForbidSuspendThread flag set. If FALSE, retry. |
168 | // pdwSuspendCount - If non-NULL, will contain the return code |
169 | // of the underlying OS SuspendThread call on success, |
170 | // undefined on any kind of failure. |
171 | // Return value: |
172 | // A SuspendThreadResult value indicating success or failure. |
173 | Thread::SuspendThreadResult Thread::SuspendThread(BOOL fOneTryOnly, DWORD *pdwSuspendCount) |
174 | { |
175 | CONTRACTL { |
176 | NOTHROW; |
177 | GC_NOTRIGGER; |
178 | } |
179 | CONTRACTL_END; |
180 | |
181 | #ifdef STRESS_LOG |
182 | if (StressLog::StressLogOn((unsigned int)-1, 0)) |
183 | { |
184 | // Make sure to create the stress log for the current thread |
185 | // (if needed) before we suspend the target thread. The target |
186 | // thread may be holding the stress log lock when we suspend it, |
187 | // which could cause a deadlock. |
188 | if (StressLog::CreateThreadStressLog() == NULL) |
189 | { |
190 | return STR_NoStressLog; |
191 | } |
192 | } |
193 | #endif |
194 | |
195 | Volatile<HANDLE> hThread; |
196 | SuspendThreadResult str = (SuspendThreadResult) -1; |
197 | DWORD dwSuspendCount = 0; |
198 | DWORD tries = 1; |
199 | #if defined(_DEBUG) |
200 | int nCnt = 0; |
201 | bool bDiagSuspend = g_pConfig->GetDiagnosticSuspend(); |
202 | ULONGLONG i64TimestampStart = CLRGetTickCount64(); |
203 | ULONGLONG i64TimestampCur = i64TimestampStart; |
204 | ULONGLONG i64TimestampPrev = i64TimestampStart; |
205 | |
206 | // This is the max allowed timestamp ticks to transpire from beginning of |
207 | // our attempt to suspend the thread, before we'll assert (implying we believe |
208 | // there might be a deadlock) - (default = 2000). |
209 | ULONGLONG i64TimestampTicksMax = g_pConfig->SuspendThreadDeadlockTimeoutMs(); |
210 | #endif // _DEBUG |
211 | |
212 | #if defined(_DEBUG) |
213 | // Stop the stress log from allocating any new memory while in this function |
214 | // as that can lead to deadlocks |
215 | CantAllocHolder hldrCantAlloc; |
216 | #endif |
217 | |
218 | DWORD dwSwitchCount = 0; |
219 | |
220 | while (TRUE) { |
221 | StateHolder<MyEnterLogLock, MyLeaveLogLock> LogLockHolder(FALSE); |
222 | |
223 | CounterHolder handleHolder(&m_dwThreadHandleBeingUsed); |
224 | |
225 | // Whether or not "goto retry" should YieldProcessor and __SwitchToThread |
226 | BOOL doSwitchToThread = TRUE; |
227 | |
228 | hThread = GetThreadHandle(); |
229 | if (hThread == INVALID_HANDLE_VALUE) { |
230 | str = STR_UnstartedOrDead; |
231 | break; |
232 | } |
233 | else if (hThread == SWITCHOUT_HANDLE_VALUE) { |
234 | str = STR_SwitchedOut; |
235 | break; |
236 | } |
237 | |
238 | { |
239 | // We do not want to suspend the target thread while it is holding the log lock. |
240 | // By acquiring the lock ourselves, we know that this is not the case. |
241 | LogLockHolder.Acquire(); |
242 | |
243 | // It is important to avoid two threads suspending each other. |
244 | // Before a thread suspends another, it increments its own m_dwForbidSuspendThread count first, |
245 | // then it checks the target thread's m_dwForbidSuspendThread. |
246 | ForbidSuspendThreadHolder forbidSuspend; |
247 | if ((m_dwForbidSuspendThread != 0)) |
248 | { |
249 | #if defined(_DEBUG) |
250 | // Enable the diagnostic ::SuspendThread() if the |
251 | // DiagnosticSuspend config setting is set. |
252 | // This will interfere with the mutual suspend race but it's |
253 | // here only for diagnostic purposes anyway |
254 | if (!bDiagSuspend) |
255 | #endif // _DEBUG |
256 | goto retry; |
257 | } |
258 | |
259 | dwSuspendCount = ::SuspendThread(hThread); |
260 | |
261 | // |
262 | // Since SuspendThread is asynchronous, we now must wait for the thread to |
263 | // actually be suspended before decrementing our own m_dwForbidSuspendThread count. |
264 | // Otherwise there would still be a chance for the "suspended" thread to suspend us |
265 | // before it really stops running. |
266 | // |
267 | if ((int)dwSuspendCount >= 0) |
268 | { |
269 | if (!EnsureThreadIsSuspended(hThread, this)) |
270 | { |
271 | ::ResumeThread(hThread); |
272 | str = STR_Failure; |
273 | break; |
274 | } |
275 | } |
276 | } |
277 | if ((int)dwSuspendCount >= 0) |
278 | { |
279 | if (hThread == GetThreadHandle()) |
280 | { |
281 | if (m_dwForbidSuspendThread != 0) |
282 | { |
283 | #if defined(_DEBUG) |
284 | // Log diagnostic below 8 times during the i64TimestampTicksMax period |
285 | if (i64TimestampCur-i64TimestampStart >= nCnt*(i64TimestampTicksMax>>3) ) |
286 | { |
287 | CONTEXT ctx; |
288 | SetIP(&ctx, -1); |
289 | ctx.ContextFlags = CONTEXT_CONTROL; |
290 | this->GetThreadContext(&ctx); |
291 | STRESS_LOG7(LF_SYNC, LL_INFO1000, |
292 | "Thread::SuspendThread[%p]: EIP=%p. nCnt=%d. result=%d.\n" |
293 | "\t\t\t\t\t\t\t\t\t forbidSuspend=%d. coop=%d. state=%x.\n" , |
294 | this, GetIP(&ctx), nCnt, dwSuspendCount, |
295 | (LONG)this->m_dwForbidSuspendThread, (ULONG)this->m_fPreemptiveGCDisabled, this->GetSnapshotState()); |
296 | |
297 | // Enable a preemptive assert in diagnostic mode: before we |
298 | // resume the target thread to get its current state in the debugger |
299 | if (bDiagSuspend) |
300 | { |
301 | // triggered after 6 * 250msec |
302 | _ASSERTE(nCnt < 6 && "Timing out in Thread::SuspendThread" ); |
303 | } |
304 | |
305 | ++nCnt; |
306 | } |
307 | #endif // _DEBUG |
308 | ::ResumeThread(hThread); |
309 | |
310 | #if defined(_DEBUG) |
311 | // If the suspend diagnostics are enabled we need to spin here in order to avoid |
312 | // the case where we Suspend/Resume the target thread without giving it a chance to run. |
313 | if ((!fOneTryOnly) && bDiagSuspend) |
314 | { |
315 | while ( m_dwForbidSuspendThread != 0 && |
316 | CLRGetTickCount64()-i64TimestampStart < nCnt*(i64TimestampTicksMax>>3) ) |
317 | { |
318 | if (g_SystemInfo.dwNumberOfProcessors > 1) |
319 | { |
320 | if ((tries++) % 20 != 0) |
321 | { |
322 | YieldProcessor(); // play nice on hyperthreaded CPUs |
323 | } else { |
324 | __SwitchToThread(0, ++dwSwitchCount); |
325 | } |
326 | } |
327 | else |
328 | { |
329 | __SwitchToThread(0, ++dwSwitchCount); // don't spin on uniproc machines |
330 | } |
331 | } |
332 | } |
333 | #endif // _DEBUG |
334 | goto retry; |
335 | } |
336 | // We suspend the right thread |
337 | #ifdef _DEBUG |
338 | Thread * pCurThread = GetThread(); |
339 | if (pCurThread != NULL) |
340 | { |
341 | pCurThread->dbg_m_cSuspendedThreads ++; |
342 | _ASSERTE(pCurThread->dbg_m_cSuspendedThreads > 0); |
343 | } |
344 | #endif |
345 | IncCantAllocCount(); |
346 | |
347 | m_ThreadHandleForResume = hThread; |
348 | str = STR_Success; |
349 | break; |
350 | } |
351 | else |
352 | { |
353 | // A thread was switch out but in again. |
354 | // We suspend a wrong thread. |
355 | ::ResumeThread(hThread); |
356 | doSwitchToThread = FALSE; |
357 | goto retry; |
358 | } |
359 | } |
360 | else { |
361 | // We can get here either SuspendThread fails |
362 | // Or the fiber thread dies after this fiber switched out. |
363 | |
364 | if ((int)dwSuspendCount != -1) { |
365 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "In Thread::SuspendThread ::SuspendThread returned %x\n" , dwSuspendCount); |
366 | } |
367 | if (GetThreadHandle() == SWITCHOUT_HANDLE_VALUE) { |
368 | str = STR_SwitchedOut; |
369 | break; |
370 | } |
371 | else { |
372 | // Our callers generally expect that STR_Failure means that |
373 | // the thread has exited. |
374 | #ifndef FEATURE_PAL |
375 | _ASSERTE(NtCurrentTeb()->LastStatusValue != STATUS_SUSPEND_COUNT_EXCEEDED); |
376 | #endif // !FEATURE_PAL |
377 | str = STR_Failure; |
378 | break; |
379 | } |
380 | } |
381 | |
382 | retry: |
383 | handleHolder.Release(); |
384 | LogLockHolder.Release(); |
385 | |
386 | if (fOneTryOnly) |
387 | { |
388 | str = STR_Forbidden; |
389 | break; |
390 | } |
391 | |
392 | #if defined(_DEBUG) |
393 | i64TimestampPrev = i64TimestampCur; |
394 | i64TimestampCur = CLRGetTickCount64(); |
395 | // CLRGetTickCount64() is global per machine (not per CPU, like getTimeStamp()). |
396 | // Next ASSERT states that CLRGetTickCount64() is increasing, or has wrapped. |
397 | // If it wrapped, the last iteration should have executed faster then 0.5 seconds. |
398 | _ASSERTE(i64TimestampCur >= i64TimestampPrev || i64TimestampCur <= 500); |
399 | |
400 | if (i64TimestampCur - i64TimestampStart >= i64TimestampTicksMax) |
401 | { |
402 | dwSuspendCount = ::SuspendThread(hThread); |
403 | _ASSERTE(!"It takes too long to suspend a thread" ); |
404 | if ((int)dwSuspendCount >= 0) |
405 | ::ResumeThread(hThread); |
406 | } |
407 | #endif // _DEBUG |
408 | |
409 | if (doSwitchToThread) |
410 | { |
411 | // When looking for deadlocks we need to allow the target thread to run in order to make some progress. |
412 | // On multi processor machines we saw the suspending thread resuming immediately after the __SwitchToThread() |
413 | // because it has another few processors available. As a consequence the target thread was being Resumed and |
414 | // Suspended right away, w/o a real chance to make any progress. |
415 | if (g_SystemInfo.dwNumberOfProcessors > 1) |
416 | { |
417 | if ((tries++) % 20 != 0) { |
418 | YieldProcessor(); // play nice on hyperthreaded CPUs |
419 | } else { |
420 | __SwitchToThread(0, ++dwSwitchCount); |
421 | } |
422 | } |
423 | else |
424 | { |
425 | __SwitchToThread(0, ++dwSwitchCount); // don't spin on uniproc machines |
426 | } |
427 | } |
428 | |
429 | } |
430 | |
431 | #ifdef PROFILING_SUPPORTED |
432 | { |
433 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
434 | if (str == STR_Success) |
435 | { |
436 | g_profControlBlock.pProfInterface->RuntimeThreadSuspended((ThreadID)this); |
437 | } |
438 | END_PIN_PROFILER(); |
439 | } |
440 | #endif // PROFILING_SUPPORTED |
441 | |
442 | if (pdwSuspendCount != NULL) |
443 | { |
444 | *pdwSuspendCount = dwSuspendCount; |
445 | } |
446 | _ASSERTE(str != (SuspendThreadResult) -1); |
447 | return str; |
448 | |
449 | } |
450 | #endif // DISABLE_THREADSUSPEND |
451 | |
452 | // On non-Windows CORECLR platforms remove Thread::ResumeThread support |
453 | #ifndef DISABLE_THREADSUSPEND |
454 | DWORD Thread::ResumeThread() |
455 | { |
456 | CONTRACTL |
457 | { |
458 | NOTHROW; |
459 | GC_NOTRIGGER; |
460 | SO_TOLERANT; |
461 | MODE_ANY; |
462 | } |
463 | CONTRACTL_END; |
464 | |
465 | _ASSERTE (m_ThreadHandleForResume != INVALID_HANDLE_VALUE); |
466 | |
467 | _ASSERTE (GetThreadHandle() != SWITCHOUT_HANDLE_VALUE); |
468 | |
469 | //DWORD res = ::ResumeThread(GetThreadHandle()); |
470 | DWORD res = ::ResumeThread(m_ThreadHandleForResume); |
471 | _ASSERTE (res != 0 && "Thread is not previously suspended" ); |
472 | #ifdef _DEBUG_IMPL |
473 | _ASSERTE (!m_Creater.IsCurrentThread()); |
474 | if ((res != (DWORD)-1) && (res != 0)) |
475 | { |
476 | Thread * pCurThread = GetThread(); |
477 | if (pCurThread != NULL) |
478 | { |
479 | _ASSERTE(pCurThread->dbg_m_cSuspendedThreads > 0); |
480 | pCurThread->dbg_m_cSuspendedThreads --; |
481 | _ASSERTE(pCurThread->dbg_m_cSuspendedThreadsWithoutOSLock <= pCurThread->dbg_m_cSuspendedThreads); |
482 | } |
483 | } |
484 | #endif |
485 | if (res != (DWORD) -1 && res != 0) |
486 | { |
487 | DecCantAllocCount(); |
488 | } |
489 | #ifdef PROFILING_SUPPORTED |
490 | { |
491 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
492 | if ((res != 0) && (res != (DWORD)-1)) |
493 | { |
494 | g_profControlBlock.pProfInterface->RuntimeThreadResumed((ThreadID)this); |
495 | } |
496 | END_PIN_PROFILER(); |
497 | } |
498 | #endif |
499 | return res; |
500 | |
501 | } |
502 | #endif // DISABLE_THREADSUSPEND |
503 | |
504 | #ifdef _DEBUG |
505 | void* forceStackA; |
506 | |
507 | // CheckSuspended |
508 | // Checks whether the given thread is currently suspended. |
509 | // Note that if we cannot determine the true suspension status |
510 | // of the thread, we succeed. Intended to be used in asserts |
511 | // in operations that require the target thread to be suspended. |
512 | // Arguments: |
513 | // pThread - The thread to examine. |
514 | // Return value: |
515 | // FALSE, if the thread is definitely not suspended. |
516 | // TRUE, otherwise. |
517 | static inline BOOL CheckSuspended(Thread *pThread) |
518 | { |
519 | CONTRACTL |
520 | { |
521 | NOTHROW; |
522 | GC_NOTRIGGER; |
523 | DEBUG_ONLY; |
524 | } |
525 | CONTRACTL_END; |
526 | |
527 | _ASSERTE(GetThread() != pThread); |
528 | _ASSERTE(CheckPointer(pThread)); |
529 | |
530 | #ifndef DISABLE_THREADSUSPEND |
531 | // Only perform this test if we're allowed to call back into the host. |
532 | // Thread::SuspendThread contains several potential calls into the host. |
533 | if (CanThisThreadCallIntoHost()) |
534 | { |
535 | DWORD dwSuspendCount; |
536 | Thread::SuspendThreadResult str = pThread->SuspendThread(FALSE, &dwSuspendCount); |
537 | forceStackA = &dwSuspendCount; |
538 | if (str == Thread::STR_Success) |
539 | { |
540 | pThread->ResumeThread(); |
541 | return dwSuspendCount >= 1; |
542 | } |
543 | } |
544 | #endif // !DISABLE_THREADSUSPEND |
545 | return TRUE; |
546 | } |
547 | #endif //_DEBUG |
548 | |
549 | BOOL EEGetThreadContext(Thread *pThread, CONTEXT *pContext) |
550 | { |
551 | CONTRACTL { |
552 | NOTHROW; |
553 | GC_NOTRIGGER; |
554 | } |
555 | CONTRACTL_END; |
556 | |
557 | _ASSERTE(CheckSuspended(pThread)); |
558 | |
559 | BOOL ret = pThread->GetThreadContext(pContext); |
560 | |
561 | STRESS_LOG6(LF_SYNC, LL_INFO1000, "Got thread context ret = %d EIP = %p ESP = %p EBP = %p, pThread = %p, ContextFlags = 0x%x\n" , |
562 | ret, GetIP(pContext), GetSP(pContext), GetFP(pContext), pThread, pContext->ContextFlags); |
563 | |
564 | return ret; |
565 | |
566 | } |
567 | |
568 | BOOL EESetThreadContext(Thread *pThread, const CONTEXT *pContext) |
569 | { |
570 | CONTRACTL { |
571 | NOTHROW; |
572 | GC_NOTRIGGER; |
573 | } |
574 | CONTRACTL_END; |
575 | |
576 | #ifdef _TARGET_X86_ |
577 | _ASSERTE(CheckSuspended(pThread)); |
578 | #endif |
579 | |
580 | BOOL ret = pThread->SetThreadContext(pContext); |
581 | |
582 | STRESS_LOG6(LF_SYNC, LL_INFO1000, "Set thread context ret = %d EIP = %p ESP = %p EBP = %p, pThread = %p, ContextFlags = 0x%x\n" , |
583 | ret, GetIP((CONTEXT*)pContext), GetSP((CONTEXT*)pContext), GetFP((CONTEXT*)pContext), pThread, pContext->ContextFlags); |
584 | |
585 | return ret; |
586 | } |
587 | |
588 | // The AbortReason must be cleared at the following times: |
589 | // |
590 | // 1. When the application performs a ResetAbort. |
591 | // |
592 | // 2. When the physical thread stops running. That's because we must eliminate any |
593 | // cycles that would otherwise be uncollectible, between the Reason and the Thread. |
594 | // Nobody can retrieve the Reason after the thread stops running anyway. |
595 | // |
596 | // We don't have to do any work when the AppDomain containing the Reason object is unloaded. |
597 | // That's because the HANDLE is released as part of the tear-down. The 'adid' prevents us |
598 | // from ever using the trash handle value thereafter. |
599 | |
600 | void Thread::ClearAbortReason(BOOL pNoLock) |
601 | { |
602 | CONTRACTL |
603 | { |
604 | GC_NOTRIGGER; |
605 | MODE_COOPERATIVE; |
606 | NOTHROW; |
607 | } |
608 | CONTRACTL_END; |
609 | |
610 | OBJECTHANDLE oh; |
611 | ADID adid; |
612 | |
613 | if (pNoLock){ |
614 | // Stash the fields so we can destroy the OBJECTHANDLE if appropriate. |
615 | oh = m_AbortReason; |
616 | adid = m_AbortReasonDomainID; |
617 | |
618 | // Clear the fields. |
619 | m_AbortReason = 0; |
620 | m_AbortReasonDomainID = ADID(INVALID_APPDOMAIN_ID); |
621 | } |
622 | else |
623 | // Scope the lock to stashing and clearing the two fields on the Thread object. |
624 | { |
625 | // Atomically get the OBJECTHANDLE and ADID of the object, and then |
626 | // clear them. |
627 | |
628 | // NOTE: get the lock on this thread object, not on the executing thread. |
629 | Thread::AbortRequestLockHolder lock(this); |
630 | |
631 | // Stash the fields so we can destroy the OBJECTHANDLE if appropriate. |
632 | oh = m_AbortReason; |
633 | adid = m_AbortReasonDomainID; |
634 | |
635 | // Clear the fields. |
636 | m_AbortReason = 0; |
637 | m_AbortReasonDomainID = ADID(INVALID_APPDOMAIN_ID); |
638 | } |
639 | |
640 | // If there is an OBJECTHANDLE, try to clear it. |
641 | if (oh != 0 && adid.m_dwId != 0) |
642 | DestroyHandle(oh); |
643 | } |
644 | |
645 | |
646 | // Context passed down through a stack crawl (see code below). |
647 | struct StackCrawlContext |
648 | { |
649 | enum SCCType |
650 | { |
651 | SCC_CheckWithinEH = 0x00000001, |
652 | SCC_CheckWithinCer = 0x00000002, |
653 | }; |
654 | Thread* pAbortee; |
655 | int eType; |
656 | BOOL fUnprotectedCode; |
657 | BOOL fWithinEHClause; |
658 | BOOL fWithinCer; |
659 | BOOL fHasManagedCodeOnStack; |
660 | BOOL fWriteToStressLog; |
661 | |
662 | BOOL fHaveLatchedCF; |
663 | CrawlFrame LatchedCF; |
664 | }; |
665 | |
666 | // Crawl the stack looking for Thread Abort related information (whether we're executing inside a CER or an error handling clauses |
667 | // of some sort). |
668 | static StackWalkAction TAStackCrawlCallBackWorker(CrawlFrame* pCf, StackCrawlContext *pData) |
669 | { |
670 | CONTRACTL { |
671 | NOTHROW; |
672 | GC_NOTRIGGER; |
673 | } |
674 | CONTRACTL_END; |
675 | |
676 | _ASSERTE(pData->eType & (StackCrawlContext::SCC_CheckWithinCer | StackCrawlContext::SCC_CheckWithinEH)); |
677 | |
678 | if(pCf->IsFrameless()) |
679 | { |
680 | IJitManager* pJitManager = pCf->GetJitManager(); |
681 | _ASSERTE(pJitManager); |
682 | if (pJitManager && !pData->fHasManagedCodeOnStack) |
683 | { |
684 | pData->fHasManagedCodeOnStack = TRUE; |
685 | } |
686 | } |
687 | |
688 | // Get the method for this frame if it exists (might not be a managed method, so check the explicit frame if that's what we're |
689 | // looking at). |
690 | MethodDesc *pMD = pCf->GetFunction(); |
691 | Frame *pFrame = pCf->GetFrame(); |
692 | if (pMD == NULL && pFrame != NULL) |
693 | pMD = pFrame->GetFunction(); |
694 | |
695 | // Non-method frames don't interest us. |
696 | if (pMD == NULL) |
697 | return SWA_CONTINUE; |
698 | |
699 | #if defined(_DEBUG) |
700 | #define METHODNAME(pFunc) (pFunc?pFunc->m_pszDebugMethodName:"<n/a>") |
701 | #else |
702 | #define METHODNAME(pFunc) "<n/a>" |
703 | #endif |
704 | if (pData->fWriteToStressLog) |
705 | { |
706 | STRESS_LOG5(LF_EH, LL_INFO100, "TAStackCrawlCallBack: STACKCRAWL method:%pM ('%s'), offset %x, Frame:%p, FrameVtable = %pV\n" , |
707 | pMD, METHODNAME(pMD), pCf->IsFrameless()?pCf->GetRelOffset():0, pFrame, pCf->IsFrameless()?0:(*(void**)pFrame)); |
708 | } |
709 | #undef METHODNAME |
710 | |
711 | |
712 | // If we weren't asked about EH clauses then we can return now (stop the stack trace if we have a definitive answer on the CER |
713 | // question, move to the next frame otherwise). |
714 | if ((pData->eType & StackCrawlContext::SCC_CheckWithinEH) == 0) |
715 | return ((pData->fWithinCer || pData->fUnprotectedCode) && pData->fHasManagedCodeOnStack) ? SWA_ABORT : SWA_CONTINUE; |
716 | |
717 | // If we already discovered we're within an EH clause but are still processing (presumably to determine whether we're within a |
718 | // CER), then we can just skip to the next frame straight away. Also terminate here if the current frame is not frameless since |
719 | // there isn't any useful EH information for non-managed frames. |
720 | if (pData->fWithinEHClause || !pCf->IsFrameless()) |
721 | return SWA_CONTINUE; |
722 | |
723 | IJitManager* pJitManager = pCf->GetJitManager(); |
724 | _ASSERTE(pJitManager); |
725 | |
726 | EH_CLAUSE_ENUMERATOR pEnumState; |
727 | unsigned EHCount = pJitManager->InitializeEHEnumeration(pCf->GetMethodToken(), &pEnumState); |
728 | if (EHCount == 0) |
729 | // We do not have finally clause here. |
730 | return SWA_CONTINUE; |
731 | |
732 | DWORD offs = (DWORD)pCf->GetRelOffset(); |
733 | |
734 | if (!pCf->IsActiveFrame()) |
735 | { |
736 | // If we aren't the topmost method, then our IP is a return address and |
737 | // we can't use it to directly compare against the EH ranges because we |
738 | // may be in an cloned finally which has a call as the last instruction. |
739 | |
740 | offs--; |
741 | } |
742 | |
743 | if (pData->fWriteToStressLog) |
744 | { |
745 | STRESS_LOG1(LF_EH, LL_INFO100, "TAStackCrawlCallBack: STACKCRAWL Offset 0x%x V\n" , offs); |
746 | } |
747 | EE_ILEXCEPTION_CLAUSE EHClause; |
748 | |
749 | StackWalkAction action = SWA_CONTINUE; |
750 | #ifndef WIN64EXCEPTIONS |
751 | // On X86, the EH encoding for catch clause is completely mess. |
752 | // If catch clause is in its own basic block, the end of catch includes everything in the basic block. |
753 | // For nested catch, the end of catch may include several jmp instructions after JIT_EndCatch call. |
754 | // To better decide if we are inside a nested catch, we check if offs-1 is in more than one catch clause. |
755 | DWORD countInCatch = 0; |
756 | BOOL fAtJitEndCatch = FALSE; |
757 | if (pData->pAbortee == GetThread() && |
758 | pData->pAbortee->ThrewControlForThread() == Thread::InducedThreadRedirectAtEndOfCatch && |
759 | GetControlPC(pCf->GetRegisterSet()) == (PCODE)GetIP(pData->pAbortee->GetAbortContext())) |
760 | { |
761 | fAtJitEndCatch = TRUE; |
762 | offs -= 1; |
763 | } |
764 | #endif // !WIN64EXCEPTIONS |
765 | |
766 | for(ULONG i=0; i < EHCount; i++) |
767 | { |
768 | pJitManager->GetNextEHClause(&pEnumState, &EHClause); |
769 | _ASSERTE(IsValidClause(&EHClause)); |
770 | |
771 | // !!! If this function is called on Aborter thread, we should check for finally only. |
772 | |
773 | // !!! If this function is called on Aborter thread, we should check for finally only. |
774 | // !!! Catch and filter clause are skipped. In UserAbort, the first thing after ReadyForAbort |
775 | // !!! is to check if the target thread is processing exception. |
776 | // !!! If exception is in flight, we don't induce ThreadAbort. Instead at the end of Jit_EndCatch |
777 | // !!! we will handle abort. |
778 | if (pData->pAbortee != GetThread() && !IsFaultOrFinally(&EHClause)) |
779 | { |
780 | continue; |
781 | } |
782 | if (offs >= EHClause.HandlerStartPC && |
783 | offs < EHClause.HandlerEndPC) |
784 | { |
785 | #ifndef WIN64EXCEPTIONS |
786 | if (fAtJitEndCatch) |
787 | { |
788 | // On X86, JIT's EH info may include the instruction after JIT_EndCatch inside the same catch |
789 | // clause if it is in the same basic block. |
790 | // So for this case, the offs is in at least one catch handler, but since we are at the end of |
791 | // catch, this one should not be counted. |
792 | countInCatch ++; |
793 | if (countInCatch == 1) |
794 | { |
795 | continue; |
796 | } |
797 | } |
798 | #endif // !WIN64EXCEPTIONS |
799 | pData->fWithinEHClause = true; |
800 | // We're within an EH clause. If we're asking about CERs too then stop the stack walk if we've reached a conclusive |
801 | // result or continue looking otherwise. Else we can stop the stackwalk now. |
802 | if (pData->eType & StackCrawlContext::SCC_CheckWithinCer) |
803 | { |
804 | action = (pData->fWithinCer || pData->fUnprotectedCode) ? SWA_ABORT : SWA_CONTINUE; |
805 | } |
806 | else |
807 | { |
808 | action = SWA_ABORT; |
809 | } |
810 | break; |
811 | } |
812 | } |
813 | |
814 | #ifndef WIN64EXCEPTIONS |
815 | #ifdef _DEBUG |
816 | if (fAtJitEndCatch) |
817 | { |
818 | _ASSERTE (countInCatch > 0); |
819 | } |
820 | #endif // _DEBUG |
821 | #endif // !WIN64EXCEPTIONS_ |
822 | return action; |
823 | } |
824 | |
825 | // Wrapper around code:TAStackCrawlCallBackWorker that abstracts away the differences between the reporting order |
826 | // of x86 and 64-bit stackwalker implementations, and also deals with interop calls that have an implicit reliability |
827 | // contract. If a P/Invoke or CLR->COM call returns SafeHandle or CriticalHandle, the IL stub could be aborted |
828 | // before having a chance to store the native handle into the Safe/CriticalHandle object. Therefore such calls are |
829 | // treated as unbreakable by convention. |
830 | StackWalkAction TAStackCrawlCallBack(CrawlFrame* pCf, void* data) |
831 | { |
832 | CONTRACTL { |
833 | NOTHROW; |
834 | GC_NOTRIGGER; |
835 | } |
836 | CONTRACTL_END; |
837 | |
838 | StackCrawlContext *pData = (StackCrawlContext *)data; |
839 | |
840 | // We have the current frame in pCf and possibly one latched frame in pData->LatchedCF. This enumeration |
841 | // describes which of these should be passed to code:TAStackCrawlCallBackWorker and in what order. |
842 | enum LatchedFrameAction |
843 | { |
844 | DiscardLatchedFrame, // forget the latched frame, report the current one |
845 | DiscardCurrentFrame, // ignore the current frame, report the latched one |
846 | ProcessLatchedInOrder, // report the latched frame, then report the current frame |
847 | ProcessLatchedReversed, // report the current frame, then report the latched frame |
848 | LatchCurrentFrame // latch the current frame, don't report anything |
849 | } |
850 | frameAction = DiscardLatchedFrame; |
851 | |
852 | #ifdef _TARGET_X86_ |
853 | // On X86 the IL stub method is reported to us before the frame with the actual interop method. We need to |
854 | // swap the order because if the worker saw the IL stub - which is a CER root - first, it would terminate the |
855 | // stack walk and wouldn't allow the thread to be aborted, regardless of how the interop method is annotated. |
856 | if (pData->fHaveLatchedCF) |
857 | { |
858 | // Does the current and latched frame represent the same call? |
859 | if (pCf->pFrame == pData->LatchedCF.pFrame) |
860 | { |
861 | if (pData->LatchedCF.GetFunction()->AsDynamicMethodDesc()->IsUnbreakable()) |
862 | { |
863 | // Report only the latched IL stub frame which is a CER root. |
864 | frameAction = DiscardCurrentFrame; |
865 | } |
866 | else |
867 | { |
868 | // Report the interop method (current frame) which may be annotated, then the IL stub. |
869 | frameAction = ProcessLatchedReversed; |
870 | } |
871 | } |
872 | else |
873 | { |
874 | // The two frames are unrelated - process them in order. |
875 | frameAction = ProcessLatchedInOrder; |
876 | } |
877 | pData->fHaveLatchedCF = FALSE; |
878 | } |
879 | else |
880 | { |
881 | MethodDesc *pMD = pCf->GetFunction(); |
882 | if (pMD != NULL && pMD->IsILStub() && InlinedCallFrame::FrameHasActiveCall(pCf->pFrame)) |
883 | { |
884 | // This may be IL stub for an interesting interop call - latch it. |
885 | frameAction = LatchCurrentFrame; |
886 | } |
887 | } |
888 | #else // _TARGET_X86_ |
889 | // On 64-bit the IL stub method is reported after the actual interop method so we don't have to swap them. |
890 | // However, we still want to discard the interop method frame if the call is unbreakable by convention. |
891 | if (pData->fHaveLatchedCF) |
892 | { |
893 | MethodDesc *pMD = pCf->GetFunction(); |
894 | if (pMD != NULL && pMD->IsILStub() && |
895 | pData->LatchedCF.GetFrame()->GetReturnAddress() == GetControlPC(pCf->GetRegisterSet()) && |
896 | pMD->AsDynamicMethodDesc()->IsUnbreakable()) |
897 | { |
898 | // The current and latched frame represent the same call and the IL stub is marked as unbreakable. |
899 | // We will discard the interop method and report only the IL stub which is a CER root. |
900 | frameAction = DiscardLatchedFrame; |
901 | } |
902 | else |
903 | { |
904 | // Otherwise process the two frames in order. |
905 | frameAction = ProcessLatchedInOrder; |
906 | } |
907 | pData->fHaveLatchedCF = FALSE; |
908 | } |
909 | else |
910 | { |
911 | MethodDesc *pMD = pCf->GetFunction(); |
912 | if (pCf->GetFrame() != NULL && pMD != NULL && (pMD->IsNDirect() || pMD->IsComPlusCall())) |
913 | { |
914 | // This may be interop method of an interesting interop call - latch it. |
915 | frameAction = LatchCurrentFrame; |
916 | } |
917 | } |
918 | #endif // _TARGET_X86_ |
919 | |
920 | // Execute the "frame action". |
921 | StackWalkAction action; |
922 | switch (frameAction) |
923 | { |
924 | case DiscardLatchedFrame: |
925 | action = TAStackCrawlCallBackWorker(pCf, pData); |
926 | break; |
927 | |
928 | case DiscardCurrentFrame: |
929 | action = TAStackCrawlCallBackWorker(&pData->LatchedCF, pData); |
930 | break; |
931 | |
932 | case ProcessLatchedInOrder: |
933 | action = TAStackCrawlCallBackWorker(&pData->LatchedCF, pData); |
934 | if (action == SWA_CONTINUE) |
935 | action = TAStackCrawlCallBackWorker(pCf, pData); |
936 | break; |
937 | |
938 | case ProcessLatchedReversed: |
939 | action = TAStackCrawlCallBackWorker(pCf, pData); |
940 | if (action == SWA_CONTINUE) |
941 | action = TAStackCrawlCallBackWorker(&pData->LatchedCF, pData); |
942 | break; |
943 | |
944 | case LatchCurrentFrame: |
945 | pData->LatchedCF = *pCf; |
946 | pData->fHaveLatchedCF = TRUE; |
947 | action = SWA_CONTINUE; |
948 | break; |
949 | |
950 | default: |
951 | UNREACHABLE(); |
952 | } |
953 | return action; |
954 | } |
955 | |
956 | // Is the current thread currently executing within a constrained execution region? |
957 | BOOL Thread::IsExecutingWithinCer() |
958 | { |
959 | CONTRACTL |
960 | { |
961 | NOTHROW; |
962 | GC_NOTRIGGER; |
963 | } |
964 | CONTRACTL_END; |
965 | |
966 | if (!g_fEEStarted) |
967 | return FALSE; |
968 | |
969 | Thread *pThread = GetThread(); |
970 | _ASSERTE (pThread); |
971 | StackCrawlContext sContext = { pThread, |
972 | StackCrawlContext::SCC_CheckWithinCer, |
973 | FALSE, |
974 | FALSE, |
975 | FALSE, |
976 | FALSE, |
977 | FALSE, |
978 | FALSE}; |
979 | |
980 | pThread->StackWalkFrames(TAStackCrawlCallBack, &sContext); |
981 | |
982 | #ifdef STRESS_LOG |
983 | if (sContext.fWithinCer && StressLog::StressLogOn(~0u, 0)) |
984 | { |
985 | // If stress log is on, write info to stress log |
986 | StackCrawlContext sContext1 = { pThread, |
987 | StackCrawlContext::SCC_CheckWithinCer, |
988 | FALSE, |
989 | FALSE, |
990 | FALSE, |
991 | FALSE, |
992 | TRUE, |
993 | FALSE}; |
994 | |
995 | pThread->StackWalkFrames(TAStackCrawlCallBack, &sContext1); |
996 | } |
997 | #endif |
998 | |
999 | return sContext.fWithinCer; |
1000 | } |
1001 | |
1002 | |
1003 | // Context structure used during stack walks to determine whether a given method is executing within a CER. |
1004 | struct CerStackCrawlContext |
1005 | { |
1006 | MethodDesc *m_pStartMethod; // First method we crawl (here for debug purposes) |
1007 | bool m_fFirstFrame; // True for first callback only |
1008 | bool m_fWithinCer; // The result |
1009 | }; |
1010 | |
1011 | |
1012 | // Determine whether the method at the given depth in the thread's execution stack is executing within a CER. |
1013 | BOOL Thread::IsWithinCer(CrawlFrame *pCf) |
1014 | { |
1015 | CONTRACTL |
1016 | { |
1017 | NOTHROW; |
1018 | GC_NOTRIGGER; |
1019 | } |
1020 | CONTRACTL_END; |
1021 | |
1022 | return FALSE; |
1023 | } |
1024 | |
1025 | #if defined(_TARGET_AMD64_) && defined(FEATURE_HIJACK) |
1026 | BOOL Thread::IsSafeToInjectThreadAbort(PTR_CONTEXT pContextToCheck) |
1027 | { |
1028 | CONTRACTL |
1029 | { |
1030 | NOTHROW; |
1031 | GC_NOTRIGGER; |
1032 | MODE_ANY; |
1033 | PRECONDITION(pContextToCheck != NULL); |
1034 | } |
1035 | CONTRACTL_END; |
1036 | |
1037 | EECodeInfo codeInfo(GetIP(pContextToCheck)); |
1038 | _ASSERTE(codeInfo.IsValid()); |
1039 | |
1040 | // Check if the method uses a frame register. If it does not, then RSP will be used by the OS as the frame register |
1041 | // and returned as the EstablisherFrame. This is fine at any instruction in the method (including epilog) since there is always a |
1042 | // difference of stackslot size between the callerSP and the callee SP due to return address having been pushed on the stack. |
1043 | if (!codeInfo.HasFrameRegister()) |
1044 | { |
1045 | return TRUE; |
1046 | } |
1047 | |
1048 | BOOL fSafeToInjectThreadAbort = TRUE; |
1049 | |
1050 | if (IsIPInEpilog(pContextToCheck, &codeInfo, &fSafeToInjectThreadAbort)) |
1051 | { |
1052 | return fSafeToInjectThreadAbort; |
1053 | } |
1054 | else |
1055 | { |
1056 | return TRUE; |
1057 | } |
1058 | } |
1059 | #endif // defined(_TARGET_AMD64_) && defined(FEATURE_HIJACK) |
1060 | |
1061 | #ifdef _TARGET_AMD64_ |
1062 | // CONTEXT_CONTROL does not include any nonvolatile registers that might be the frame pointer. |
1063 | #define CONTEXT_MIN_STACKWALK (CONTEXT_CONTROL | CONTEXT_INTEGER) |
1064 | #else |
1065 | #define CONTEXT_MIN_STACKWALK (CONTEXT_CONTROL) |
1066 | #endif |
1067 | |
1068 | |
1069 | BOOL Thread::ReadyForAsyncException() |
1070 | { |
1071 | CONTRACTL { |
1072 | NOTHROW; |
1073 | GC_NOTRIGGER; |
1074 | SO_TOLERANT; |
1075 | } |
1076 | CONTRACTL_END; |
1077 | |
1078 | if (!IsAbortRequested()) |
1079 | { |
1080 | return FALSE; |
1081 | } |
1082 | |
1083 | if (IsAbortRequested() && HasThreadStateNC(TSNC_SOWorkNeeded)) |
1084 | { |
1085 | return TRUE; |
1086 | } |
1087 | |
1088 | // This needs the probe with GenerateHardSO |
1089 | CONTRACT_VIOLATION(SOToleranceViolation); |
1090 | |
1091 | if (GetThread() == this && HasThreadStateNC (TSNC_PreparingAbort) && !IsRudeAbort() ) |
1092 | { |
1093 | STRESS_LOG0(LF_APPDOMAIN, LL_INFO10, "in Thread::ReadyForAbort PreparingAbort\n" ); |
1094 | // Avoid recursive call |
1095 | return FALSE; |
1096 | } |
1097 | |
1098 | if (IsAbortPrevented()) |
1099 | { |
1100 | // |
1101 | // If the thread is marked to have a FuncEval abort request, then allow that to go through |
1102 | // since we dont want to block funcEval aborts. Such requests are initiated by the |
1103 | // right-side when the thread is doing funcEval and the exception would be caught in the |
1104 | // left-side's funcEval implementation that will then clear the funcEval-abort-state from the thread. |
1105 | // |
1106 | // If another thread also marked this one for a non-FuncEval abort, then the left-side will |
1107 | // proceed to [re]throw that exception post funcEval abort. When we come here next, we would follow |
1108 | // the usual rules to raise the exception and if raised, to prevent the abort if applicable. |
1109 | // |
1110 | if (!IsFuncEvalAbort()) |
1111 | { |
1112 | STRESS_LOG0(LF_APPDOMAIN, LL_INFO10, "in Thread::ReadyForAbort prevent abort\n" ); |
1113 | return FALSE; |
1114 | } |
1115 | } |
1116 | |
1117 | // The thread requests not to be aborted. Honor this for safe abort. |
1118 | if (!IsRudeAbort() && IsAsyncPrevented()) |
1119 | { |
1120 | STRESS_LOG0(LF_APPDOMAIN, LL_INFO10, "in Thread::ReadyForAbort AsyncPrevented\n" ); |
1121 | return FALSE; |
1122 | } |
1123 | |
1124 | REGDISPLAY rd; |
1125 | |
1126 | Frame *pStartFrame = NULL; |
1127 | if (ThrewControlForThread() == Thread::InducedThreadRedirect || |
1128 | ThrewControlForThread() == Thread::InducedThreadRedirectAtEndOfCatch) |
1129 | { |
1130 | _ASSERTE(GetThread() == this); |
1131 | _ASSERTE(ExecutionManager::IsManagedCode(GetIP(m_OSContext))); |
1132 | FillRegDisplay(&rd, m_OSContext); |
1133 | |
1134 | if (ThrewControlForThread() == Thread::InducedThreadRedirectAtEndOfCatch) |
1135 | { |
1136 | // On 64bit, this function may be called from COMPlusCheckForAbort when |
1137 | // stack has not unwound, but m_OSContext points to the place after unwind. |
1138 | // |
1139 | TADDR sp = GetSP(m_OSContext); |
1140 | Frame *pFrameAddr = m_pFrame; |
1141 | while (pFrameAddr < (LPVOID)sp) |
1142 | { |
1143 | pFrameAddr = pFrameAddr->Next(); |
1144 | } |
1145 | if (pFrameAddr != m_pFrame) |
1146 | { |
1147 | pStartFrame = pFrameAddr; |
1148 | } |
1149 | } |
1150 | #if defined(_TARGET_AMD64_) && defined(FEATURE_HIJACK) |
1151 | else if (ThrewControlForThread() == Thread::InducedThreadRedirect) |
1152 | { |
1153 | if (!IsSafeToInjectThreadAbort(m_OSContext)) |
1154 | { |
1155 | STRESS_LOG0(LF_EH, LL_INFO10, "Thread::ReadyForAbort: Not injecting abort since we are at an unsafe instruction.\n" ); |
1156 | return FALSE; |
1157 | } |
1158 | } |
1159 | #endif // defined(_TARGET_AMD64_) && defined(FEATURE_HIJACK) |
1160 | } |
1161 | else |
1162 | { |
1163 | if (GetFilterContext()) |
1164 | { |
1165 | FillRegDisplay(&rd, GetFilterContext()); |
1166 | } |
1167 | else |
1168 | { |
1169 | CONTEXT ctx; |
1170 | SetIP(&ctx, 0); |
1171 | SetSP(&ctx, 0); |
1172 | FillRegDisplay(&rd, &ctx); |
1173 | } |
1174 | } |
1175 | |
1176 | #ifdef STRESS_LOG |
1177 | REGDISPLAY rd1; |
1178 | if (StressLog::StressLogOn(~0u, 0)) |
1179 | { |
1180 | CONTEXT ctx1; |
1181 | CopyRegDisplay(&rd, &rd1, &ctx1); |
1182 | } |
1183 | #endif |
1184 | |
1185 | // Walk the stack to determine if we are running in Constrained Execution Region or finally EH clause (in the non-rude abort |
1186 | // case). We cannot initiate an abort in these circumstances. |
1187 | StackCrawlContext TAContext = |
1188 | { |
1189 | this, |
1190 | StackCrawlContext::SCC_CheckWithinCer | (IsRudeAbort() ? 0 : StackCrawlContext::SCC_CheckWithinEH), |
1191 | FALSE, |
1192 | FALSE, |
1193 | FALSE, |
1194 | FALSE, |
1195 | FALSE |
1196 | }; |
1197 | |
1198 | StackWalkFramesEx(&rd, TAStackCrawlCallBack, &TAContext, QUICKUNWIND, pStartFrame); |
1199 | |
1200 | if (!TAContext.fHasManagedCodeOnStack && IsAbortInitiated() && GetThread() == this) |
1201 | { |
1202 | EEResetAbort(TAR_Thread); |
1203 | return FALSE; |
1204 | } |
1205 | |
1206 | if (TAContext.fWithinCer) |
1207 | { |
1208 | STRESS_LOG0(LF_APPDOMAIN, LL_INFO10, "in Thread::ReadyForAbort RunningCer\n" ); |
1209 | return FALSE; |
1210 | } |
1211 | |
1212 | #ifdef STRESS_LOG |
1213 | if (StressLog::StressLogOn(~0u, 0) && |
1214 | (IsRudeAbort() || !TAContext.fWithinEHClause)) |
1215 | { |
1216 | //Save into stresslog. |
1217 | StackCrawlContext TAContext1 = |
1218 | { |
1219 | this, |
1220 | StackCrawlContext::SCC_CheckWithinCer | (IsRudeAbort() ? 0 : StackCrawlContext::SCC_CheckWithinEH), |
1221 | FALSE, |
1222 | FALSE, |
1223 | FALSE, |
1224 | FALSE, |
1225 | TRUE |
1226 | }; |
1227 | |
1228 | StackWalkFramesEx(&rd1, TAStackCrawlCallBack, &TAContext1, QUICKUNWIND, pStartFrame); |
1229 | } |
1230 | #endif |
1231 | |
1232 | if (IsRudeAbort()) { |
1233 | // If it is rude abort, there is no additional restriction on abort. |
1234 | STRESS_LOG0(LF_APPDOMAIN, LL_INFO10, "in Thread::ReadyForAbort RudeAbort\n" ); |
1235 | return TRUE; |
1236 | } |
1237 | |
1238 | if (TAContext.fWithinEHClause) |
1239 | { |
1240 | STRESS_LOG0(LF_APPDOMAIN, LL_INFO10, "in Thread::ReadyForAbort RunningEHClause\n" ); |
1241 | } |
1242 | |
1243 | //if (m_AbortType == EEPolicy::TA_V1Compatible) { |
1244 | // return TRUE; |
1245 | //} |
1246 | |
1247 | // If we are running finally, we can not abort for Safe Abort. |
1248 | return !TAContext.fWithinEHClause; |
1249 | } |
1250 | |
1251 | BOOL Thread::IsRudeAbort() |
1252 | { |
1253 | CONTRACTL { |
1254 | NOTHROW; |
1255 | GC_NOTRIGGER; |
1256 | SO_TOLERANT; |
1257 | } |
1258 | CONTRACTL_END; |
1259 | |
1260 | return (IsAbortRequested() && (m_AbortType == EEPolicy::TA_Rude)); |
1261 | } |
1262 | |
1263 | BOOL Thread::IsFuncEvalAbort() |
1264 | { |
1265 | CONTRACTL { |
1266 | NOTHROW; |
1267 | GC_NOTRIGGER; |
1268 | } |
1269 | CONTRACTL_END; |
1270 | |
1271 | return (IsAbortRequested() && (m_AbortInfo & TAI_AnyFuncEvalAbort)); |
1272 | } |
1273 | |
1274 | // |
1275 | // If the OS is down in kernel mode when we do a GetThreadContext,any |
1276 | // updates we make to the context will not take effect if we try to do |
1277 | // a SetThreadContext. As a result, newer OSes expose the idea of |
1278 | // "trap frame reporting" which will tell us if it is unsafe to modify |
1279 | // the context and pass it along to SetThreadContext. |
1280 | // |
1281 | // On OSes that support trap frame reporting, we will return FALSE if |
1282 | // we can determine that the OS is not in user mode. Otherwise, we |
1283 | // return TRUE. |
1284 | // |
1285 | BOOL Thread::IsContextSafeToRedirect(CONTEXT* pContext) |
1286 | { |
1287 | CONTRACTL |
1288 | { |
1289 | NOTHROW; |
1290 | GC_NOTRIGGER; |
1291 | MODE_ANY; |
1292 | } |
1293 | CONTRACTL_END; |
1294 | |
1295 | BOOL isSafeToRedirect = TRUE; |
1296 | |
1297 | #ifndef FEATURE_PAL |
1298 | |
1299 | #if !defined(_TARGET_X86_) |
1300 | // In some cases (x86 WOW64, ARM32 on ARM64) Windows will not set the CONTEXT_EXCEPTION_REPORTING flag |
1301 | // if the thread is executing in kernel mode (i.e. in the middle of a syscall or exception handling). |
1302 | // Therefore, we should treat the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that |
1303 | // it is not safe to manipulate with the current state of the thread context. |
1304 | // Note: the x86 WOW64 case is already handled in GetSafelyRedirectableThreadContext; in addition, this |
1305 | // flag is never set on Windows7 x86 WOW64. So this check is valid for non-x86 architectures only. |
1306 | isSafeToRedirect = (pContext->ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0; |
1307 | #endif // !defined(_TARGET_X86_) |
1308 | |
1309 | if (pContext->ContextFlags & CONTEXT_EXCEPTION_REPORTING) |
1310 | { |
1311 | if (pContext->ContextFlags & (CONTEXT_SERVICE_ACTIVE|CONTEXT_EXCEPTION_ACTIVE)) |
1312 | { |
1313 | // cannot process exception |
1314 | LOG((LF_ALWAYS, LL_WARNING, "thread [os id=0x08%x id=0x08%x] redirect failed due to ContextFlags of 0x%08x\n" , m_OSThreadId, m_ThreadId, pContext->ContextFlags)); |
1315 | isSafeToRedirect = FALSE; |
1316 | } |
1317 | } |
1318 | |
1319 | #endif // !FEATURE_PAL |
1320 | |
1321 | return isSafeToRedirect; |
1322 | } |
1323 | |
1324 | void Thread::SetAbortEndTime(ULONGLONG endTime, BOOL fRudeAbort) |
1325 | { |
1326 | LIMITED_METHOD_CONTRACT; |
1327 | |
1328 | { |
1329 | AbortRequestLockHolder lh(this); |
1330 | if (fRudeAbort) |
1331 | { |
1332 | if (endTime < m_RudeAbortEndTime) |
1333 | { |
1334 | m_RudeAbortEndTime = endTime; |
1335 | } |
1336 | } |
1337 | else |
1338 | { |
1339 | if (endTime < m_AbortEndTime) |
1340 | { |
1341 | m_AbortEndTime = endTime; |
1342 | } |
1343 | } |
1344 | } |
1345 | |
1346 | } |
1347 | |
1348 | #ifdef _PREFAST_ |
1349 | #pragma warning(push) |
1350 | #pragma warning(disable:21000) // Suppress PREFast warning about overly large function |
1351 | #endif |
1352 | HRESULT |
1353 | Thread::UserAbort(ThreadAbortRequester requester, |
1354 | EEPolicy::ThreadAbortTypes abortType, |
1355 | DWORD timeout, |
1356 | UserAbort_Client client |
1357 | ) |
1358 | { |
1359 | CONTRACTL |
1360 | { |
1361 | THROWS; |
1362 | if (GetThread()) {GC_TRIGGERS;} else {DISABLED(GC_NOTRIGGER);} |
1363 | } |
1364 | CONTRACTL_END; |
1365 | |
1366 | STRESS_LOG2(LF_SYNC | LF_APPDOMAIN, LL_INFO100, "UserAbort Thread %p Thread Id = %x\n" , this, GetThreadId()); |
1367 | |
1368 | BOOL fHoldingThreadStoreLock = ThreadStore::HoldingThreadStore(); |
1369 | |
1370 | // For SafeAbort from FuncEval abort, we do not apply escalation policy. Debugger |
1371 | // tries SafeAbort first with a short timeout. The thread will return to debugger. |
1372 | // After some break, the thread is going to do RudeAbort if abort has not finished. |
1373 | EClrOperation operation; |
1374 | if (abortType == EEPolicy::TA_Rude) |
1375 | { |
1376 | if (HasLockInCurrentDomain()) |
1377 | { |
1378 | operation = OPR_ThreadRudeAbortInCriticalRegion; |
1379 | } |
1380 | else |
1381 | { |
1382 | operation = OPR_ThreadRudeAbortInNonCriticalRegion; |
1383 | } |
1384 | } |
1385 | else |
1386 | { |
1387 | operation = OPR_ThreadAbort; |
1388 | } |
1389 | |
1390 | // Debugger func-eval aborts (both rude + normal) don't have any escalation policy. They are invoked |
1391 | // by the debugger and the debugger handles the consequences. |
1392 | // Furthermore, in interop-debugging, threads will be hard-suspened in preemptive mode while we try to abort them. |
1393 | // So any abort strategy that relies on a timeout and the target thread slipping is dangerous. Escalation policy would let a |
1394 | // host circumvent the timeout and thus we may wait forever for the target thread to slip. We'd deadlock here. Since the escalation |
1395 | // policy doesn't let the host break this deadlock (and certianly doesn't let the debugger break the deadlock), it's unsafe |
1396 | // to have an escalation policy for func-eval aborts at all. |
1397 | BOOL fEscalation = (requester != TAR_FuncEval); |
1398 | if (fEscalation) |
1399 | { |
1400 | EPolicyAction action = GetEEPolicy()->GetDefaultAction(operation, this); |
1401 | switch (action) |
1402 | { |
1403 | case eAbortThread: |
1404 | GetEEPolicy()->NotifyHostOnDefaultAction(operation,action); |
1405 | break; |
1406 | case eRudeAbortThread: |
1407 | if (abortType != EEPolicy::TA_Rude) |
1408 | { |
1409 | abortType = EEPolicy::TA_Rude; |
1410 | } |
1411 | GetEEPolicy()->NotifyHostOnDefaultAction(operation,action); |
1412 | break; |
1413 | case eUnloadAppDomain: |
1414 | case eRudeUnloadAppDomain: |
1415 | // AD unload does not abort finalizer thread. |
1416 | if (this != FinalizerThread::GetFinalizerThread()) |
1417 | { |
1418 | if (this == GetThread()) |
1419 | { |
1420 | Join(INFINITE,TRUE); |
1421 | } |
1422 | return S_OK; |
1423 | } |
1424 | break; |
1425 | case eExitProcess: |
1426 | case eFastExitProcess: |
1427 | case eRudeExitProcess: |
1428 | case eDisableRuntime: |
1429 | GetEEPolicy()->NotifyHostOnDefaultAction(operation,action); |
1430 | EEPolicy::HandleExitProcessFromEscalation(action, HOST_E_EXITPROCESS_THREADABORT); |
1431 | _ASSERTE (!"Should not reach here" ); |
1432 | break; |
1433 | default: |
1434 | _ASSERTE (!"unknown policy for thread abort" ); |
1435 | } |
1436 | |
1437 | DWORD timeoutFromPolicy; |
1438 | if (abortType != EEPolicy::TA_Rude) |
1439 | { |
1440 | timeoutFromPolicy = GetEEPolicy()->GetTimeout(OPR_ThreadAbort); |
1441 | } |
1442 | else if (!HasLockInCurrentDomain()) |
1443 | { |
1444 | timeoutFromPolicy = GetEEPolicy()->GetTimeout(OPR_ThreadRudeAbortInNonCriticalRegion); |
1445 | } |
1446 | else |
1447 | { |
1448 | timeoutFromPolicy = GetEEPolicy()->GetTimeout(OPR_ThreadRudeAbortInCriticalRegion); |
1449 | } |
1450 | if (timeout > timeoutFromPolicy) |
1451 | { |
1452 | timeout = timeoutFromPolicy; |
1453 | } |
1454 | } |
1455 | |
1456 | AbortControlHolder AbortController(this); |
1457 | |
1458 | // Swap in timeout |
1459 | if (timeout != INFINITE) |
1460 | { |
1461 | ULONG64 curTime = CLRGetTickCount64(); |
1462 | ULONG64 newEndTime = curTime + timeout; |
1463 | |
1464 | SetAbortEndTime(newEndTime, abortType == EEPolicy::TA_Rude); |
1465 | } |
1466 | |
1467 | // If the abort comes from the thread abort watchdog, proceed with the abort only |
1468 | // if the abort is still requested. This handles race between watchdog and UnmarkThreadForAbort. |
1469 | BOOL fTentative = (requester == Thread::TAR_Thread) && (client == UAC_WatchDog); |
1470 | MarkThreadForAbort(requester, abortType, fTentative); |
1471 | |
1472 | Thread *pCurThread = GetThread(); |
1473 | |
1474 | // If aborting self |
1475 | if (this == pCurThread) |
1476 | { |
1477 | SetAbortInitiated(); |
1478 | #ifdef _DEBUG |
1479 | m_dwAbortPoint = 1; |
1480 | #endif |
1481 | |
1482 | GCX_COOP(); |
1483 | |
1484 | OBJECTREF exceptObj; |
1485 | |
1486 | if (IsRudeAbort()) |
1487 | { |
1488 | exceptObj = CLRException::GetPreallocatedRudeThreadAbortException(); |
1489 | } |
1490 | else |
1491 | { |
1492 | EEException eeExcept(kThreadAbortException); |
1493 | exceptObj = CLRException::GetThrowableFromException(&eeExcept); |
1494 | } |
1495 | |
1496 | RaiseTheExceptionInternalOnly(exceptObj, FALSE); |
1497 | } |
1498 | |
1499 | #ifdef MDA_SUPPORTED |
1500 | if (requester != TAR_FuncEval) |
1501 | { |
1502 | // FuncEval abort is always aborting another thread. No need to trigger MDA. |
1503 | MDA_TRIGGER_ASSISTANT(AsynchronousThreadAbort, ReportViolation(GetThread(), this)); |
1504 | } |
1505 | #endif |
1506 | |
1507 | _ASSERTE(this != pCurThread); // Aborting another thread. |
1508 | |
1509 | if (client == UAC_Host) |
1510 | { |
1511 | // A host may call ICLRTask::Abort on a critical thread. We don't want to |
1512 | // block this thread. |
1513 | return S_OK; |
1514 | } |
1515 | |
1516 | #ifdef _DEBUG |
1517 | DWORD elapsed_time = 0; |
1518 | #endif |
1519 | |
1520 | // We do not want this thread to be alerted. |
1521 | ThreadPreventAsyncHolder preventAsync(pCurThread != NULL); |
1522 | |
1523 | #ifdef _DEBUG |
1524 | // If UserAbort times out, put up msgbox once. |
1525 | BOOL fAlreadyAssert = FALSE; |
1526 | #endif |
1527 | |
1528 | BOOL fOneTryOnly = (client == UAC_WatchDog) || (client == UAC_FinalizerTimeout); |
1529 | BOOL fFirstRun = TRUE; |
1530 | BOOL fNeedEscalation; |
1531 | |
1532 | #if !defined(DISABLE_THREADSUSPEND) |
1533 | DWORD dwSwitchCount = 0; |
1534 | #endif // !defined(DISABLE_THREADSUSPEND) |
1535 | |
1536 | LRetry: |
1537 | fNeedEscalation = FALSE; |
1538 | for (;;) |
1539 | { |
1540 | if (fOneTryOnly) |
1541 | { |
1542 | if (!fFirstRun) |
1543 | { |
1544 | return S_OK; |
1545 | } |
1546 | fFirstRun = FALSE; |
1547 | } |
1548 | // Lock the thread store |
1549 | LOG((LF_SYNC, INFO3, "UserAbort obtain lock\n" )); |
1550 | |
1551 | ULONGLONG abortEndTime = GetAbortEndTime(); |
1552 | if (abortEndTime != MAXULONGLONG) |
1553 | { |
1554 | ULONGLONG now_time = CLRGetTickCount64(); |
1555 | |
1556 | if (now_time >= abortEndTime) |
1557 | { |
1558 | EPolicyAction action1 = eNoAction; |
1559 | DWORD timeout1 = INFINITE; |
1560 | if (fEscalation) |
1561 | { |
1562 | if (!IsRudeAbort()) |
1563 | { |
1564 | action1 = GetEEPolicy()->GetActionOnTimeout(OPR_ThreadAbort, this); |
1565 | timeout1 = GetEEPolicy()->GetTimeout(OPR_ThreadAbort); |
1566 | } |
1567 | else if (HasLockInCurrentDomain()) |
1568 | { |
1569 | action1 = GetEEPolicy()->GetActionOnTimeout(OPR_ThreadRudeAbortInCriticalRegion, this); |
1570 | timeout1 = GetEEPolicy()->GetTimeout(OPR_ThreadRudeAbortInCriticalRegion); |
1571 | } |
1572 | else |
1573 | { |
1574 | action1 = GetEEPolicy()->GetActionOnTimeout(OPR_ThreadRudeAbortInNonCriticalRegion, this); |
1575 | timeout1 = GetEEPolicy()->GetTimeout(OPR_ThreadRudeAbortInNonCriticalRegion); |
1576 | } |
1577 | } |
1578 | if (action1 == eNoAction) |
1579 | { |
1580 | // timeout, but no action on timeout. |
1581 | // Debugger can call this function to about func-eval with a timeout |
1582 | return HRESULT_FROM_WIN32(ERROR_TIMEOUT); |
1583 | } |
1584 | if (timeout1 != INFINITE) |
1585 | { |
1586 | // There is an escalation policy. |
1587 | fNeedEscalation = TRUE; |
1588 | break; |
1589 | } |
1590 | } |
1591 | } |
1592 | |
1593 | // Thread abort needs to walk stack to decide if thread abort can proceed. |
1594 | // It is unsafe to crawl a stack of thread if the thread is OS-suspended which we do during |
1595 | // thread abort. For example, Thread T1 aborts thread T2. T2 is suspended by T1. Inside SQL |
1596 | // this means that no thread sharing the same scheduler with T2 can run. If T1 needs a lock which |
1597 | // is owned by one thread on the scheduler, T1 will wait forever. |
1598 | // Our solution is to move T2 to a safe point, resume it, and then do stack crawl. |
1599 | |
1600 | // We need to make sure that ThreadStoreLock is released after CheckForAbort. This makes sure |
1601 | // that ThreadAbort does not race against GC. |
1602 | class CheckForAbort |
1603 | { |
1604 | private: |
1605 | Thread *m_pThread; |
1606 | BOOL m_fHoldingThreadStoreLock; |
1607 | BOOL m_NeedRelease; |
1608 | public: |
1609 | CheckForAbort(Thread *pThread, BOOL fHoldingThreadStoreLock) |
1610 | : m_pThread(pThread), |
1611 | m_fHoldingThreadStoreLock(fHoldingThreadStoreLock), |
1612 | m_NeedRelease(TRUE) |
1613 | { |
1614 | if (!fHoldingThreadStoreLock) |
1615 | { |
1616 | ThreadSuspend::LockThreadStore(ThreadSuspend::SUSPEND_OTHER); |
1617 | } |
1618 | ThreadStore::ResetStackCrawlEvent(); |
1619 | |
1620 | // The thread being aborted may clear the TS_AbortRequested bit and the matching increment |
1621 | // of g_TrapReturningThreads behind our back. Increment g_TrapReturningThreads here |
1622 | // to ensure that we stop for the stack crawl even if the TS_AbortRequested bit is cleared. |
1623 | ThreadStore::TrapReturningThreads(TRUE); |
1624 | } |
1625 | void NeedStackCrawl() |
1626 | { |
1627 | m_pThread->SetThreadState(Thread::TS_StackCrawlNeeded); |
1628 | } |
1629 | ~CheckForAbort() |
1630 | { |
1631 | Release(); |
1632 | } |
1633 | void Release() |
1634 | { |
1635 | if (m_NeedRelease) |
1636 | { |
1637 | m_NeedRelease = FALSE; |
1638 | ThreadStore::TrapReturningThreads(FALSE); |
1639 | ThreadStore::SetStackCrawlEvent(); |
1640 | m_pThread->ResetThreadState(TS_StackCrawlNeeded); |
1641 | if (!m_fHoldingThreadStoreLock) |
1642 | { |
1643 | ThreadSuspend::UnlockThreadStore(); |
1644 | } |
1645 | } |
1646 | } |
1647 | }; |
1648 | CheckForAbort checkForAbort(this, fHoldingThreadStoreLock); |
1649 | |
1650 | // We own TS lock. The state of the Thread can not be changed. |
1651 | if (m_State & TS_Unstarted) |
1652 | { |
1653 | // This thread is not yet started. |
1654 | #ifdef _DEBUG |
1655 | m_dwAbortPoint = 2; |
1656 | #endif |
1657 | if(requester == Thread::TAR_Thread) |
1658 | SetAborted(); |
1659 | return S_OK; |
1660 | } |
1661 | |
1662 | if (GetThreadHandle() == INVALID_HANDLE_VALUE && |
1663 | (m_State & TS_Unstarted) == 0) |
1664 | { |
1665 | // The thread is going to die or is already dead. |
1666 | UnmarkThreadForAbort(Thread::TAR_ALL); |
1667 | #ifdef _DEBUG |
1668 | m_dwAbortPoint = 3; |
1669 | #endif |
1670 | if(requester == Thread::TAR_Thread) |
1671 | SetAborted(); |
1672 | return S_OK; |
1673 | } |
1674 | |
1675 | // What if someone else has this thread suspended already? It'll depend where the |
1676 | // thread got suspended. |
1677 | // |
1678 | // User Suspend: |
1679 | // We'll just set the abort bit and hope for the best on the resume. |
1680 | // |
1681 | // GC Suspend: |
1682 | // If it's suspended in jitted code, we'll hijack the IP. |
1683 | // <REVISIT_TODO> Consider race w/ GC suspension</REVISIT_TODO> |
1684 | // If it's suspended but not in jitted code, we'll get suspended for GC, the GC |
1685 | // will complete, and then we'll abort the target thread. |
1686 | // |
1687 | |
1688 | // It's possible that the thread has completed the abort already. |
1689 | // |
1690 | if (!(m_State & TS_AbortRequested)) |
1691 | { |
1692 | #ifdef _DEBUG |
1693 | m_dwAbortPoint = 4; |
1694 | #endif |
1695 | if(requester == Thread::TAR_Thread) |
1696 | SetAborted(); |
1697 | return S_OK; |
1698 | } |
1699 | |
1700 | // If a thread is Dead or Detached, abort is a NOP. |
1701 | // |
1702 | if (m_State & (TS_Dead | TS_Detached | TS_TaskReset)) |
1703 | { |
1704 | UnmarkThreadForAbort(Thread::TAR_ALL); |
1705 | if(requester == Thread::TAR_Thread) |
1706 | SetAborted(); |
1707 | #ifdef _DEBUG |
1708 | m_dwAbortPoint = 5; |
1709 | #endif |
1710 | return S_OK; |
1711 | } |
1712 | |
1713 | // It's possible that some stub notices the AbortRequested bit -- even though we |
1714 | // haven't done any real magic yet. If the thread has already started it's abort, we're |
1715 | // done. |
1716 | // |
1717 | // Two more cases can be folded in here as well. If the thread is unstarted, it'll |
1718 | // abort when we start it. |
1719 | // |
1720 | // If the thread is user suspended (SyncSuspended) -- we're out of luck. Set the bit and |
1721 | // hope for the best on resume. |
1722 | // |
1723 | if ((m_State & TS_AbortInitiated) && !IsRudeAbort()) |
1724 | { |
1725 | #ifdef _DEBUG |
1726 | m_dwAbortPoint = 6; |
1727 | #endif |
1728 | break; |
1729 | } |
1730 | |
1731 | BOOL fOutOfRuntime = FALSE; |
1732 | BOOL fNeedStackCrawl = FALSE; |
1733 | |
1734 | #ifdef DISABLE_THREADSUSPEND |
1735 | // On platforms that do not support safe thread suspension we have to |
1736 | // rely on the GCPOLL mechanism; the mechanism is activated above by |
1737 | // TrapReturningThreads. However when reading shared state we need |
1738 | // to erect appropriate memory barriers. So the interlocked operation |
1739 | // below ensures that any future reads on this thread will happen after |
1740 | // any earlier writes on a different thread have taken effect. |
1741 | FastInterlockOr((DWORD*)&m_State, 0); |
1742 | |
1743 | #else // DISABLE_THREADSUSPEND |
1744 | |
1745 | // Win32 suspend the thread, so it isn't moving under us. |
1746 | SuspendThreadResult str = SuspendThread(); |
1747 | switch (str) |
1748 | { |
1749 | case STR_Success: |
1750 | break; |
1751 | |
1752 | case STR_Failure: |
1753 | case STR_UnstartedOrDead: |
1754 | case STR_NoStressLog: |
1755 | checkForAbort.Release(); |
1756 | __SwitchToThread(0, ++dwSwitchCount); |
1757 | continue; |
1758 | |
1759 | case STR_SwitchedOut: |
1760 | // If the thread is in preemptive gc mode, we can erect a barrier to block the |
1761 | // thread to return to cooperative mode. Then we can do stack crawl and make decision. |
1762 | if (!m_fPreemptiveGCDisabled) |
1763 | { |
1764 | checkForAbort.NeedStackCrawl(); |
1765 | if (GetThreadHandle() != SWITCHOUT_HANDLE_VALUE || m_fPreemptiveGCDisabled) |
1766 | { |
1767 | checkForAbort.Release(); |
1768 | __SwitchToThread(0, ++dwSwitchCount); |
1769 | continue; |
1770 | } |
1771 | else |
1772 | { |
1773 | goto LStackCrawl; |
1774 | } |
1775 | } |
1776 | else |
1777 | { |
1778 | goto LPrepareRetry; |
1779 | } |
1780 | |
1781 | default: |
1782 | UNREACHABLE(); |
1783 | } |
1784 | |
1785 | _ASSERTE(str == STR_Success); |
1786 | |
1787 | #endif // DISABLE_THREADSUSPEND |
1788 | |
1789 | // It's possible that the thread has completed the abort already. |
1790 | // |
1791 | if (!(m_State & TS_AbortRequested)) |
1792 | { |
1793 | #ifndef DISABLE_THREADSUSPEND |
1794 | ResumeThread(); |
1795 | #endif |
1796 | if(requester == Thread::TAR_Thread) |
1797 | SetAborted(); |
1798 | #ifdef _DEBUG |
1799 | m_dwAbortPoint = 63; |
1800 | #endif |
1801 | return S_OK; |
1802 | } |
1803 | |
1804 | // Check whether some stub noticed the AbortRequested bit in-between our test above |
1805 | // and us suspending the thread. |
1806 | if ((m_State & TS_AbortInitiated) && !IsRudeAbort()) |
1807 | { |
1808 | #ifndef DISABLE_THREADSUSPEND |
1809 | ResumeThread(); |
1810 | #endif |
1811 | #ifdef _DEBUG |
1812 | m_dwAbortPoint = 65; |
1813 | #endif |
1814 | break; |
1815 | } |
1816 | |
1817 | // If Threads is stopped under a managed debugger, it will have both |
1818 | // TS_DebugSuspendPending and TS_SyncSuspended, regardless of whether |
1819 | // the thread is actually suspended or not. |
1820 | // If it's suspended w/o the debugger (eg, by via Thread.Suspend), it will |
1821 | // also have TS_UserSuspendPending set. |
1822 | if (m_State & TS_SyncSuspended) |
1823 | { |
1824 | #ifndef DISABLE_THREADSUSPEND |
1825 | ResumeThread(); |
1826 | #endif |
1827 | checkForAbort.Release(); |
1828 | #ifdef _DEBUG |
1829 | m_dwAbortPoint = 7; |
1830 | #endif |
1831 | |
1832 | // CoreCLR does not support user-requested thread suspension |
1833 | _ASSERTE(!(m_State & TS_UserSuspendPending)); |
1834 | |
1835 | // |
1836 | // If it's stopped by the debugger, we don't want to throw an exception. |
1837 | // Debugger suspension is to have no effect of the runtime behaviour. |
1838 | // |
1839 | if (m_State & TS_DebugSuspendPending) |
1840 | { |
1841 | return S_OK; |
1842 | } |
1843 | |
1844 | COMPlusThrow(kThreadStateException, IDS_EE_THREAD_ABORT_WHILE_SUSPEND); |
1845 | } |
1846 | |
1847 | // If the thread has no managed code on it's call stack, abort is a NOP. We're about |
1848 | // to touch the unmanaged thread's stack -- for this to be safe, we can't be |
1849 | // Dead/Detached/Unstarted. |
1850 | // |
1851 | _ASSERTE(!(m_State & ( TS_Dead |
1852 | | TS_Detached |
1853 | | TS_Unstarted))); |
1854 | |
1855 | #if defined(_TARGET_X86_) && !defined(WIN64EXCEPTIONS) |
1856 | // TODO WIN64: consider this if there is a way to detect of managed code on stack. |
1857 | if ((m_pFrame == FRAME_TOP) |
1858 | && (GetFirstCOMPlusSEHRecord(this) == EXCEPTION_CHAIN_END) |
1859 | ) |
1860 | { |
1861 | #ifndef DISABLE_THREADSUSPEND |
1862 | ResumeThread(); |
1863 | #endif |
1864 | #ifdef _DEBUG |
1865 | m_dwAbortPoint = 8; |
1866 | #endif |
1867 | |
1868 | if(requester == Thread::TAR_Thread) |
1869 | SetAborted(); |
1870 | return S_OK; |
1871 | } |
1872 | #endif // _TARGET_X86_ |
1873 | |
1874 | |
1875 | if (!m_fPreemptiveGCDisabled) |
1876 | { |
1877 | if ((m_pFrame != FRAME_TOP) && m_pFrame->IsTransitionToNativeFrame() |
1878 | #if defined(_TARGET_X86_) && !defined(WIN64EXCEPTIONS) |
1879 | && ((size_t) GetFirstCOMPlusSEHRecord(this) > ((size_t) m_pFrame) - 20) |
1880 | #endif // _TARGET_X86_ |
1881 | ) |
1882 | { |
1883 | fOutOfRuntime = TRUE; |
1884 | } |
1885 | } |
1886 | |
1887 | checkForAbort.NeedStackCrawl(); |
1888 | if (!m_fPreemptiveGCDisabled) |
1889 | { |
1890 | fNeedStackCrawl = TRUE; |
1891 | } |
1892 | #if defined(FEATURE_HIJACK) && !defined(PLATFORM_UNIX) |
1893 | else |
1894 | { |
1895 | HandleJITCaseForAbort(); |
1896 | } |
1897 | #endif // FEATURE_HIJACK && !PLATFORM_UNIX |
1898 | |
1899 | #ifndef DISABLE_THREADSUSPEND |
1900 | // The thread is not suspended now. |
1901 | ResumeThread(); |
1902 | #endif |
1903 | |
1904 | if (!fNeedStackCrawl) |
1905 | { |
1906 | goto LPrepareRetry; |
1907 | } |
1908 | |
1909 | #ifndef DISABLE_THREADSUSPEND |
1910 | LStackCrawl: |
1911 | #endif // DISABLE_THREADSUSPEND |
1912 | |
1913 | if (!ReadyForAbort()) { |
1914 | goto LPrepareRetry; |
1915 | } |
1916 | |
1917 | // !!! Check for Exception in flight should happen before induced thread abort. |
1918 | // !!! ReadyForAbort skips catch and filter clause. |
1919 | |
1920 | // If an exception is currently being thrown, one of two things will happen. Either, we'll |
1921 | // catch, and notice the abort request in our end-catch, or we'll not catch [in which case |
1922 | // we're leaving managed code anyway. The top-most handler is responsible for resetting |
1923 | // the bit. |
1924 | // |
1925 | if (HasException() && |
1926 | // For rude abort, we will initiated abort |
1927 | !IsRudeAbort()) |
1928 | { |
1929 | #ifdef _DEBUG |
1930 | m_dwAbortPoint = 9; |
1931 | #endif |
1932 | break; |
1933 | } |
1934 | |
1935 | // If the thread is in sleep, wait, or join interrupt it |
1936 | // However, we do NOT want to interrupt if the thread is already processing an exception |
1937 | if (m_State & TS_Interruptible) |
1938 | { |
1939 | UserInterrupt(TI_Abort); // if the user wakes up because of this, it will read the |
1940 | // abort requested bit and initiate the abort |
1941 | #ifdef _DEBUG |
1942 | m_dwAbortPoint = 10; |
1943 | #endif |
1944 | goto LPrepareRetry; |
1945 | } |
1946 | |
1947 | if (fOutOfRuntime) |
1948 | { |
1949 | // If the thread is running outside the EE, and is behind a stub that's going |
1950 | // to catch... |
1951 | #ifdef _DEBUG |
1952 | m_dwAbortPoint = 11; |
1953 | #endif |
1954 | break; |
1955 | } |
1956 | |
1957 | // Ok. It's not in managed code, nor safely out behind a stub that's going to catch |
1958 | // it on the way in. We have to poll. |
1959 | |
1960 | LPrepareRetry: |
1961 | |
1962 | checkForAbort.Release(); |
1963 | |
1964 | if (fOneTryOnly) |
1965 | { |
1966 | break; |
1967 | } |
1968 | |
1969 | // Don't do a Sleep. It's possible that the thread we are trying to abort is |
1970 | // stuck in unmanaged code trying to get into the apartment that we are supposed |
1971 | // to be pumping! Instead, ping the current thread's handle. Obviously this |
1972 | // will time out, but it will pump if we need it to. |
1973 | if (pCurThread) |
1974 | { |
1975 | pCurThread->Join(ABORT_POLL_TIMEOUT, TRUE); |
1976 | } |
1977 | else |
1978 | { |
1979 | ClrSleepEx(ABORT_POLL_TIMEOUT, FALSE); |
1980 | } |
1981 | |
1982 | |
1983 | #ifdef _DEBUG |
1984 | elapsed_time += ABORT_POLL_TIMEOUT; |
1985 | if (g_pConfig->GetGCStressLevel() == 0 && !fAlreadyAssert) |
1986 | { |
1987 | _ASSERTE(elapsed_time < ABORT_FAIL_TIMEOUT); |
1988 | fAlreadyAssert = TRUE; |
1989 | } |
1990 | #endif |
1991 | |
1992 | } // for(;;) |
1993 | |
1994 | if (fOneTryOnly && !fNeedEscalation) |
1995 | { |
1996 | return S_OK; |
1997 | } |
1998 | |
1999 | if ((GetAbortEndTime() != MAXULONGLONG) && IsAbortRequested()) |
2000 | { |
2001 | while (TRUE) |
2002 | { |
2003 | if (!IsAbortRequested()) |
2004 | { |
2005 | return S_OK; |
2006 | } |
2007 | ULONGLONG curTime = CLRGetTickCount64(); |
2008 | if (curTime >= GetAbortEndTime()) |
2009 | { |
2010 | break; |
2011 | } |
2012 | |
2013 | if (pCurThread) |
2014 | { |
2015 | pCurThread->Join(100, TRUE); |
2016 | } |
2017 | else |
2018 | { |
2019 | ClrSleepEx(100, FALSE); |
2020 | } |
2021 | |
2022 | } |
2023 | |
2024 | if (IsAbortRequested() && fEscalation) |
2025 | { |
2026 | EPolicyAction action1; |
2027 | EClrOperation operation1; |
2028 | if (!IsRudeAbort()) |
2029 | { |
2030 | operation1 = OPR_ThreadAbort; |
2031 | } |
2032 | else if (HasLockInCurrentDomain()) |
2033 | { |
2034 | operation1 = OPR_ThreadRudeAbortInCriticalRegion; |
2035 | } |
2036 | else |
2037 | { |
2038 | operation1 = OPR_ThreadRudeAbortInNonCriticalRegion; |
2039 | } |
2040 | action1 = GetEEPolicy()->GetActionOnTimeout(operation1, this); |
2041 | switch (action1) |
2042 | { |
2043 | case eRudeAbortThread: |
2044 | GetEEPolicy()->NotifyHostOnTimeout(operation1, action1); |
2045 | MarkThreadForAbort(requester, EEPolicy::TA_Rude); |
2046 | SetRudeAbortEndTimeFromEEPolicy(); |
2047 | goto LRetry; |
2048 | case eUnloadAppDomain: |
2049 | // AD unload does not abort finalizer thread. |
2050 | if (this == FinalizerThread::GetFinalizerThread()) |
2051 | { |
2052 | GetEEPolicy()->NotifyHostOnTimeout(operation1, action1); |
2053 | MarkThreadForAbort(requester, EEPolicy::TA_Rude); |
2054 | SetRudeAbortEndTimeFromEEPolicy(); |
2055 | goto LRetry; |
2056 | } |
2057 | else |
2058 | { |
2059 | if (this == GetThread()) |
2060 | { |
2061 | Join(INFINITE,TRUE); |
2062 | } |
2063 | return S_OK; |
2064 | } |
2065 | break; |
2066 | case eRudeUnloadAppDomain: |
2067 | // AD unload does not abort finalizer thread. |
2068 | if (this == FinalizerThread::GetFinalizerThread()) |
2069 | { |
2070 | MarkThreadForAbort(requester, EEPolicy::TA_Rude); |
2071 | SetRudeAbortEndTimeFromEEPolicy(); |
2072 | goto LRetry; |
2073 | } |
2074 | else |
2075 | { |
2076 | if (this == GetThread()) |
2077 | { |
2078 | Join(INFINITE,TRUE); |
2079 | } |
2080 | return S_OK; |
2081 | } |
2082 | break; |
2083 | case eExitProcess: |
2084 | case eFastExitProcess: |
2085 | case eRudeExitProcess: |
2086 | case eDisableRuntime: |
2087 | GetEEPolicy()->NotifyHostOnTimeout(operation1, action1); |
2088 | EEPolicy::HandleExitProcessFromEscalation(action1, HOST_E_EXITPROCESS_TIMEOUT); |
2089 | _ASSERTE (!"Should not reach here" ); |
2090 | break; |
2091 | default: |
2092 | break; |
2093 | } |
2094 | } |
2095 | |
2096 | return HRESULT_FROM_WIN32(ERROR_TIMEOUT); |
2097 | } |
2098 | |
2099 | if(requester == Thread::TAR_Thread) |
2100 | SetAborted(); |
2101 | return S_OK; |
2102 | } |
2103 | #ifdef _PREFAST_ |
2104 | #pragma warning(pop) |
2105 | #endif |
2106 | |
2107 | void Thread::SetRudeAbortEndTimeFromEEPolicy() |
2108 | { |
2109 | LIMITED_METHOD_CONTRACT; |
2110 | |
2111 | DWORD timeout = GetEEPolicy()->GetTimeout(OPR_ThreadRudeAbortInCriticalRegion); |
2112 | |
2113 | ULONGLONG newEndTime; |
2114 | if (timeout == INFINITE) |
2115 | { |
2116 | newEndTime = MAXULONGLONG; |
2117 | } |
2118 | else |
2119 | { |
2120 | newEndTime = CLRGetTickCount64() + timeout; |
2121 | } |
2122 | |
2123 | SetAbortEndTime(newEndTime, TRUE); |
2124 | } |
2125 | |
2126 | ULONGLONG Thread::s_NextSelfAbortEndTime = MAXULONGLONG; |
2127 | |
2128 | void Thread::ThreadAbortWatchDogAbort(Thread *pThread) |
2129 | { |
2130 | CONTRACTL |
2131 | { |
2132 | NOTHROW; |
2133 | if (GetThread()) {GC_TRIGGERS;} else {DISABLED(GC_NOTRIGGER);} |
2134 | } |
2135 | CONTRACTL_END; |
2136 | |
2137 | EEPolicy::ThreadAbortTypes abortType = EEPolicy::TA_Safe; |
2138 | if (pThread->m_AbortInfo & TAI_ThreadRudeAbort) |
2139 | { |
2140 | abortType = EEPolicy::TA_Rude; |
2141 | } |
2142 | else if (pThread->m_AbortInfo & TAI_ThreadV1Abort) |
2143 | { |
2144 | abortType = EEPolicy::TA_V1Compatible; |
2145 | } |
2146 | else if (pThread->m_AbortInfo & TAI_ThreadAbort) |
2147 | { |
2148 | abortType = EEPolicy::TA_Safe; |
2149 | } |
2150 | else |
2151 | { |
2152 | return; |
2153 | } |
2154 | |
2155 | EX_TRY |
2156 | { |
2157 | pThread->UserAbort(Thread::TAR_Thread, abortType, INFINITE, Thread::UAC_WatchDog); |
2158 | } |
2159 | EX_CATCH |
2160 | { |
2161 | } |
2162 | EX_END_CATCH(SwallowAllExceptions); |
2163 | } |
2164 | |
2165 | void Thread::ThreadAbortWatchDogEscalate(Thread *pThread) |
2166 | { |
2167 | CONTRACTL |
2168 | { |
2169 | NOTHROW; |
2170 | if (GetThread()) {GC_TRIGGERS;} else {DISABLED(GC_NOTRIGGER);} |
2171 | } |
2172 | CONTRACTL_END; |
2173 | |
2174 | EPolicyAction action = eNoAction; |
2175 | EClrOperation operation = OPR_ThreadRudeAbortInNonCriticalRegion; |
2176 | if (!pThread->IsRudeAbort()) |
2177 | { |
2178 | operation = OPR_ThreadAbort; |
2179 | } |
2180 | else if (pThread->HasLockInCurrentDomain()) |
2181 | { |
2182 | operation = OPR_ThreadRudeAbortInCriticalRegion; |
2183 | } |
2184 | else |
2185 | { |
2186 | operation = OPR_ThreadRudeAbortInNonCriticalRegion; |
2187 | } |
2188 | action = GetEEPolicy()->GetActionOnTimeout(operation, pThread); |
2189 | // We only support escalation to rude abort |
2190 | |
2191 | EX_TRY { |
2192 | switch (action) |
2193 | { |
2194 | case eRudeAbortThread: |
2195 | GetEEPolicy()->NotifyHostOnTimeout(operation,action); |
2196 | pThread->UserAbort(Thread::TAR_Thread, EEPolicy::TA_Rude, INFINITE, Thread::UAC_WatchDog); |
2197 | break; |
2198 | case eExitProcess: |
2199 | case eFastExitProcess: |
2200 | case eRudeExitProcess: |
2201 | case eDisableRuntime: |
2202 | // HandleExitProcessFromEscalation will try to grab ThreadStore again. |
2203 | _ASSERTE (ThreadStore::HoldingThreadStore()); |
2204 | ThreadStore::UnlockThreadStore(); |
2205 | GetEEPolicy()->NotifyHostOnTimeout(operation,action); |
2206 | EEPolicy::HandleExitProcessFromEscalation(action, HOST_E_EXITPROCESS_THREADABORT); |
2207 | _ASSERTE (!"Should not reach here" ); |
2208 | break; |
2209 | case eNoAction: |
2210 | break; |
2211 | default: |
2212 | _ASSERTE (!"unknown policy for thread abort" ); |
2213 | } |
2214 | } |
2215 | EX_CATCH { |
2216 | } |
2217 | EX_END_CATCH(SwallowAllExceptions); |
2218 | } |
2219 | |
2220 | // If a thread is self-aborted and has a timeout, we need to watch the thread |
2221 | void Thread::ThreadAbortWatchDog() |
2222 | { |
2223 | CONTRACTL |
2224 | { |
2225 | NOTHROW; |
2226 | if (GetThread()) {GC_TRIGGERS;} else {DISABLED(GC_NOTRIGGER);} |
2227 | } |
2228 | CONTRACTL_END; |
2229 | |
2230 | if (CLRHosted()) |
2231 | { |
2232 | ThreadStoreLockHolder tsLock; |
2233 | |
2234 | ULONGLONG curTime = CLRGetTickCount64(); |
2235 | |
2236 | s_NextSelfAbortEndTime = MAXULONGLONG; |
2237 | |
2238 | Thread *thread = NULL; |
2239 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
2240 | { |
2241 | if (!thread->IsAbortRequested()) |
2242 | { |
2243 | continue; |
2244 | } |
2245 | |
2246 | if (thread == FinalizerThread::GetFinalizerThread() && !g_FinalizerIsRunning) |
2247 | { |
2248 | // if finalizer method is not running, don't try to abort the finalizer thread |
2249 | continue; |
2250 | } |
2251 | |
2252 | BOOL fNeedsToInitiateAbort = !thread->IsAbortInitiated() || thread->IsRudeAbort(); |
2253 | ULONGLONG endTime = thread->GetAbortEndTime(); |
2254 | if (fNeedsToInitiateAbort) |
2255 | { |
2256 | s_NextSelfAbortEndTime = 0; |
2257 | } |
2258 | else if (endTime < s_NextSelfAbortEndTime) |
2259 | { |
2260 | s_NextSelfAbortEndTime = endTime; |
2261 | } |
2262 | |
2263 | if (thread->m_AbortController == 0) |
2264 | { |
2265 | STRESS_LOG3(LF_ALWAYS, LL_ALWAYS, "ThreadAbortWatchDog for Thread %p Thread Id = %x with timeout %x\n" , |
2266 | thread, thread->GetThreadId(), endTime); |
2267 | |
2268 | if (endTime != MAXULONGLONG && curTime >= endTime) |
2269 | { |
2270 | ThreadAbortWatchDogEscalate(thread); |
2271 | } |
2272 | else if (fNeedsToInitiateAbort) |
2273 | { |
2274 | ThreadAbortWatchDogAbort(thread); |
2275 | } |
2276 | } |
2277 | } |
2278 | } |
2279 | } |
2280 | |
2281 | void Thread::LockAbortRequest(Thread* pThread) |
2282 | { |
2283 | WRAPPER_NO_CONTRACT; |
2284 | |
2285 | DWORD dwSwitchCount = 0; |
2286 | |
2287 | while (TRUE) { |
2288 | for (unsigned i = 0; i < 10000; i ++) { |
2289 | if (VolatileLoad(&(pThread->m_AbortRequestLock)) == 0) { |
2290 | break; |
2291 | } |
2292 | YieldProcessor(); // indicate to the processor that we are spinning |
2293 | } |
2294 | if (FastInterlockCompareExchange(&(pThread->m_AbortRequestLock),1,0) == 0) { |
2295 | return; |
2296 | } |
2297 | __SwitchToThread(0, ++dwSwitchCount); |
2298 | } |
2299 | } |
2300 | |
2301 | void Thread::UnlockAbortRequest(Thread *pThread) |
2302 | { |
2303 | LIMITED_METHOD_CONTRACT; |
2304 | |
2305 | _ASSERTE (pThread->m_AbortRequestLock == 1); |
2306 | FastInterlockExchange(&pThread->m_AbortRequestLock, 0); |
2307 | } |
2308 | |
2309 | void Thread::MarkThreadForAbort(ThreadAbortRequester requester, EEPolicy::ThreadAbortTypes abortType, BOOL fTentative /*=FALSE*/) |
2310 | { |
2311 | CONTRACTL |
2312 | { |
2313 | NOTHROW; |
2314 | GC_NOTRIGGER; |
2315 | } |
2316 | CONTRACTL_END; |
2317 | |
2318 | _ASSERTE ((requester & TAR_StackOverflow) == 0 || (requester & TAR_Thread) == TAR_Thread); |
2319 | |
2320 | AbortRequestLockHolder lh(this); |
2321 | |
2322 | if (fTentative) |
2323 | { |
2324 | if (!IsAbortRequested()) |
2325 | { |
2326 | STRESS_LOG0(LF_SYNC, LL_INFO1000, "Tentative thread abort abandoned\n" ); |
2327 | return; |
2328 | } |
2329 | } |
2330 | |
2331 | #ifdef _DEBUG |
2332 | if (abortType == EEPolicy::TA_Rude) |
2333 | { |
2334 | m_fRudeAborted = TRUE; |
2335 | } |
2336 | #endif |
2337 | |
2338 | DWORD abortInfo = 0; |
2339 | |
2340 | if (requester & TAR_Thread) |
2341 | { |
2342 | if (abortType == EEPolicy::TA_Safe) |
2343 | { |
2344 | abortInfo |= TAI_ThreadAbort; |
2345 | } |
2346 | else if (abortType == EEPolicy::TA_Rude) |
2347 | { |
2348 | abortInfo |= TAI_ThreadRudeAbort; |
2349 | } |
2350 | else if (abortType == EEPolicy::TA_V1Compatible) |
2351 | { |
2352 | abortInfo |= TAI_ThreadV1Abort; |
2353 | } |
2354 | } |
2355 | |
2356 | if (requester & TAR_FuncEval) |
2357 | { |
2358 | if (abortType == EEPolicy::TA_Safe) |
2359 | { |
2360 | abortInfo |= TAI_FuncEvalAbort; |
2361 | } |
2362 | else if (abortType == EEPolicy::TA_Rude) |
2363 | { |
2364 | abortInfo |= TAI_FuncEvalRudeAbort; |
2365 | } |
2366 | else if (abortType == EEPolicy::TA_V1Compatible) |
2367 | { |
2368 | abortInfo |= TAI_FuncEvalV1Abort; |
2369 | } |
2370 | } |
2371 | |
2372 | if (abortInfo == 0) |
2373 | { |
2374 | ASSERT(!"Invalid abort information" ); |
2375 | return; |
2376 | } |
2377 | |
2378 | if (requester == TAR_Thread) |
2379 | { |
2380 | DWORD timeoutFromPolicy; |
2381 | if (abortType != EEPolicy::TA_Rude) |
2382 | { |
2383 | timeoutFromPolicy = GetEEPolicy()->GetTimeout(OPR_ThreadAbort); |
2384 | } |
2385 | else if (!HasLockInCurrentDomain()) |
2386 | { |
2387 | timeoutFromPolicy = GetEEPolicy()->GetTimeout(OPR_ThreadRudeAbortInNonCriticalRegion); |
2388 | } |
2389 | else |
2390 | { |
2391 | timeoutFromPolicy = GetEEPolicy()->GetTimeout(OPR_ThreadRudeAbortInCriticalRegion); |
2392 | } |
2393 | if (timeoutFromPolicy != INFINITE) |
2394 | { |
2395 | ULONGLONG endTime = CLRGetTickCount64() + timeoutFromPolicy; |
2396 | if (abortType != EEPolicy::TA_Rude) |
2397 | { |
2398 | if (endTime < m_AbortEndTime) |
2399 | { |
2400 | m_AbortEndTime = endTime; |
2401 | } |
2402 | } |
2403 | else if (endTime < m_RudeAbortEndTime) |
2404 | { |
2405 | m_RudeAbortEndTime = endTime; |
2406 | } |
2407 | } |
2408 | } |
2409 | |
2410 | if (abortInfo == (m_AbortInfo & abortInfo)) |
2411 | { |
2412 | // |
2413 | // We are already doing this kind of abort. |
2414 | // |
2415 | return; |
2416 | } |
2417 | |
2418 | m_AbortInfo |= abortInfo; |
2419 | |
2420 | if (m_AbortType >= (DWORD)abortType) |
2421 | { |
2422 | // another thread is aborting at a higher level |
2423 | return; |
2424 | } |
2425 | |
2426 | m_AbortType = abortType; |
2427 | |
2428 | if (!IsAbortRequested()) |
2429 | { |
2430 | // We must set this before we start flipping thread bits to avoid races where |
2431 | // trap returning threads is already high due to other reasons. |
2432 | |
2433 | // The thread is asked for abort the first time |
2434 | SetAbortRequestBit(); |
2435 | } |
2436 | STRESS_LOG4(LF_APPDOMAIN, LL_ALWAYS, "Mark Thread %p Thread Id = %x for abort from requester %d (type %d)\n" , this, GetThreadId(), requester, abortType); |
2437 | } |
2438 | |
2439 | void Thread::SetAbortRequestBit() |
2440 | { |
2441 | WRAPPER_NO_CONTRACT; |
2442 | while (TRUE) |
2443 | { |
2444 | Volatile<LONG> curValue = (LONG)m_State; |
2445 | if ((curValue & TS_AbortRequested) != 0) |
2446 | { |
2447 | break; |
2448 | } |
2449 | if (FastInterlockCompareExchange((LONG*)&m_State, curValue|TS_AbortRequested, curValue) == curValue) |
2450 | { |
2451 | ThreadStore::TrapReturningThreads(TRUE); |
2452 | |
2453 | break; |
2454 | } |
2455 | } |
2456 | } |
2457 | |
2458 | void Thread::RemoveAbortRequestBit() |
2459 | { |
2460 | CONTRACTL { |
2461 | NOTHROW; |
2462 | GC_NOTRIGGER; |
2463 | } CONTRACTL_END; |
2464 | |
2465 | #ifdef _DEBUG |
2466 | // There's a race between removing the TS_AbortRequested bit and decrementing g_TrapReturningThreads |
2467 | // We may remove the bit, but before we have a chance to call ThreadStore::TrapReturningThreads(FALSE) |
2468 | // DbgFindThread() may execute, and find too few threads with the bit set. |
2469 | // To ensure the assert in DbgFindThread does not fire under such a race we set the ChgInFlight before hand. |
2470 | CounterHolder trtHolder(&g_trtChgInFlight); |
2471 | #endif |
2472 | while (TRUE) |
2473 | { |
2474 | Volatile<LONG> curValue = (LONG)m_State; |
2475 | if ((curValue & TS_AbortRequested) == 0) |
2476 | { |
2477 | break; |
2478 | } |
2479 | if (FastInterlockCompareExchange((LONG*)&m_State, curValue&(~TS_AbortRequested), curValue) == curValue) |
2480 | { |
2481 | ThreadStore::TrapReturningThreads(FALSE); |
2482 | |
2483 | break; |
2484 | } |
2485 | } |
2486 | } |
2487 | |
2488 | // Make sure that when AbortRequest bit is cleared, we also dec TrapReturningThreads count. |
2489 | void Thread::UnmarkThreadForAbort(ThreadAbortRequester requester, BOOL fForce) |
2490 | { |
2491 | CONTRACTL |
2492 | { |
2493 | NOTHROW; |
2494 | GC_NOTRIGGER; |
2495 | } |
2496 | CONTRACTL_END; |
2497 | |
2498 | // Switch to COOP (for ClearAbortReason) before acquiring AbortRequestLock |
2499 | GCX_COOP(); |
2500 | |
2501 | AbortRequestLockHolder lh(this); |
2502 | |
2503 | // |
2504 | // Unmark the bits that are being turned off |
2505 | // |
2506 | if (requester & TAR_Thread) |
2507 | { |
2508 | if ((m_AbortInfo != TAI_ThreadRudeAbort) || fForce) |
2509 | { |
2510 | m_AbortInfo &= ~(TAI_ThreadAbort | |
2511 | TAI_ThreadV1Abort | |
2512 | TAI_ThreadRudeAbort ); |
2513 | } |
2514 | |
2515 | if (m_AbortReason) |
2516 | { |
2517 | ClearAbortReason(TRUE); |
2518 | } |
2519 | } |
2520 | |
2521 | if (requester & TAR_FuncEval) |
2522 | { |
2523 | m_AbortInfo &= ~(TAI_FuncEvalAbort | |
2524 | TAI_FuncEvalV1Abort | |
2525 | TAI_FuncEvalRudeAbort); |
2526 | } |
2527 | |
2528 | // |
2529 | // Decide which type of abort to do based on the new bit field. |
2530 | // |
2531 | if (m_AbortInfo & TAI_AnyRudeAbort) |
2532 | { |
2533 | m_AbortType = EEPolicy::TA_Rude; |
2534 | } |
2535 | else if (m_AbortInfo & TAI_AnyV1Abort) |
2536 | { |
2537 | m_AbortType = EEPolicy::TA_V1Compatible; |
2538 | } |
2539 | else if (m_AbortInfo & TAI_AnySafeAbort) |
2540 | { |
2541 | m_AbortType = EEPolicy::TA_Safe; |
2542 | } |
2543 | else |
2544 | { |
2545 | m_AbortType = EEPolicy::TA_None; |
2546 | } |
2547 | |
2548 | // |
2549 | // If still aborting, do nothing |
2550 | // |
2551 | if (m_AbortType != EEPolicy::TA_None) |
2552 | { |
2553 | return; |
2554 | } |
2555 | |
2556 | m_AbortEndTime = MAXULONGLONG; |
2557 | m_RudeAbortEndTime = MAXULONGLONG; |
2558 | |
2559 | if (IsAbortRequested()) |
2560 | { |
2561 | RemoveAbortRequestBit(); |
2562 | FastInterlockAnd((DWORD*)&m_State,~(TS_AbortInitiated)); |
2563 | m_fRudeAbortInitiated = FALSE; |
2564 | ResetUserInterrupted(); |
2565 | } |
2566 | |
2567 | STRESS_LOG3(LF_APPDOMAIN, LL_ALWAYS, "Unmark Thread %p Thread Id = %x for abort from requester %d\n" , this, GetThreadId(), requester); |
2568 | } |
2569 | |
2570 | void Thread::InternalResetAbort(ThreadAbortRequester requester, BOOL fResetRudeAbort) |
2571 | { |
2572 | CONTRACTL { |
2573 | NOTHROW; |
2574 | GC_NOTRIGGER; |
2575 | } |
2576 | CONTRACTL_END; |
2577 | |
2578 | _ASSERTE(this == GetThread()); |
2579 | _ASSERTE(!IsDead()); |
2580 | |
2581 | // managed code can not reset Rude thread abort |
2582 | UnmarkThreadForAbort(requester, fResetRudeAbort); |
2583 | } |
2584 | |
2585 | |
2586 | // Throw a thread abort request when a suspended thread is resumed. Make sure you know what you |
2587 | // are doing when you call this routine. |
2588 | void Thread::SetAbortRequest(EEPolicy::ThreadAbortTypes abortType) |
2589 | { |
2590 | CONTRACTL { |
2591 | NOTHROW; |
2592 | GC_NOTRIGGER; |
2593 | } |
2594 | CONTRACTL_END; |
2595 | |
2596 | MarkThreadForAbort(TAR_Thread, abortType); |
2597 | |
2598 | if (m_State & TS_Interruptible) |
2599 | { |
2600 | UserInterrupt(TI_Abort); |
2601 | } |
2602 | } |
2603 | |
2604 | |
2605 | void ThreadSuspend::LockThreadStore(ThreadSuspend::SUSPEND_REASON reason) |
2606 | { |
2607 | CONTRACTL { |
2608 | NOTHROW; |
2609 | if ((GetThread() != NULL) && GetThread()->PreemptiveGCDisabled()) {GC_TRIGGERS;} else {DISABLED(GC_NOTRIGGER);} |
2610 | } |
2611 | CONTRACTL_END; |
2612 | |
2613 | // There's a nasty problem here. Once we start shutting down because of a |
2614 | // process detach notification, threads are disappearing from under us. There |
2615 | // are a surprising number of cases where the dying thread holds the ThreadStore |
2616 | // lock. For example, the finalizer thread holds this during startup in about |
2617 | // 10 of our COM BVTs. |
2618 | if (!IsAtProcessExit()) |
2619 | { |
2620 | BOOL gcOnTransitions; |
2621 | |
2622 | Thread *pCurThread = GetThread(); |
2623 | |
2624 | gcOnTransitions = GC_ON_TRANSITIONS(FALSE); // dont do GC for GCStress 3 |
2625 | |
2626 | BOOL toggleGC = ( pCurThread != NULL |
2627 | && pCurThread->PreemptiveGCDisabled() |
2628 | && reason != ThreadSuspend::SUSPEND_FOR_GC); |
2629 | |
2630 | // Note: there is logic in gc.cpp surrounding suspending all |
2631 | // runtime threads for a GC that depends on the fact that we |
2632 | // do an EnablePreemptiveGC and a DisablePreemptiveGC around |
2633 | // taking this lock. |
2634 | if (toggleGC) |
2635 | pCurThread->EnablePreemptiveGC(); |
2636 | |
2637 | LOG((LF_SYNC, INFO3, "Locking thread store\n" )); |
2638 | |
2639 | // Any thread that holds the thread store lock cannot be stopped by unmanaged breakpoints and exceptions when |
2640 | // we're doing managed/unmanaged debugging. Calling SetDebugCantStop(true) on the current thread helps us |
2641 | // remember that. |
2642 | if (pCurThread) |
2643 | pCurThread->SetDebugCantStop(true); |
2644 | |
2645 | // This is used to avoid thread starvation if non-GC threads are competing for |
2646 | // the thread store lock when there is a real GC-thread waiting to get in. |
2647 | // This is initialized lazily when the first non-GC thread backs out because of |
2648 | // a waiting GC thread. |
2649 | if (s_hAbortEvt != NULL && |
2650 | !(reason == ThreadSuspend::SUSPEND_FOR_GC || |
2651 | reason == ThreadSuspend::SUSPEND_FOR_GC_PREP || |
2652 | reason == ThreadSuspend::SUSPEND_FOR_DEBUGGER_SWEEP) && |
2653 | m_pThreadAttemptingSuspendForGC != NULL && |
2654 | m_pThreadAttemptingSuspendForGC != pCurThread) |
2655 | { |
2656 | CLREventBase * hAbortEvt = s_hAbortEvt; |
2657 | |
2658 | if (hAbortEvt != NULL) |
2659 | { |
2660 | LOG((LF_SYNC, INFO3, "Performing suspend abort wait.\n" )); |
2661 | hAbortEvt->Wait(INFINITE, FALSE); |
2662 | LOG((LF_SYNC, INFO3, "Release from suspend abort wait.\n" )); |
2663 | } |
2664 | } |
2665 | |
2666 | // This is shutdown aware. If we're in shutdown, and not helper/finalizer/shutdown |
2667 | // then this will not take the lock and just block forever. |
2668 | ThreadStore::s_pThreadStore->Enter(); |
2669 | |
2670 | |
2671 | _ASSERTE(ThreadStore::s_pThreadStore->m_holderthreadid.IsUnknown()); |
2672 | ThreadStore::s_pThreadStore->m_holderthreadid.SetToCurrentThread(); |
2673 | |
2674 | LOG((LF_SYNC, INFO3, "Locked thread store\n" )); |
2675 | |
2676 | // Established after we obtain the lock, so only useful for synchronous tests. |
2677 | // A thread attempting to suspend us asynchronously already holds this lock. |
2678 | ThreadStore::s_pThreadStore->m_HoldingThread = pCurThread; |
2679 | |
2680 | #ifndef _PREFAST_ |
2681 | if (toggleGC) |
2682 | pCurThread->DisablePreemptiveGC(); |
2683 | #endif |
2684 | |
2685 | GC_ON_TRANSITIONS(gcOnTransitions); |
2686 | } |
2687 | #ifdef _DEBUG |
2688 | else |
2689 | LOG((LF_SYNC, INFO3, "Locking thread store skipped upon detach\n" )); |
2690 | #endif |
2691 | } |
2692 | |
2693 | void ThreadSuspend::UnlockThreadStore(BOOL bThreadDestroyed, ThreadSuspend::SUSPEND_REASON reason) |
2694 | { |
2695 | CONTRACTL { |
2696 | NOTHROW; |
2697 | GC_NOTRIGGER; |
2698 | } |
2699 | CONTRACTL_END; |
2700 | |
2701 | // There's a nasty problem here. Once we start shutting down because of a |
2702 | // process detach notification, threads are disappearing from under us. There |
2703 | // are a surprising number of cases where the dying thread holds the ThreadStore |
2704 | // lock. For example, the finalizer thread holds this during startup in about |
2705 | // 10 of our COM BVTs. |
2706 | if (!IsAtProcessExit()) |
2707 | { |
2708 | Thread *pCurThread = GetThread(); |
2709 | |
2710 | LOG((LF_SYNC, INFO3, "Unlocking thread store\n" )); |
2711 | _ASSERTE(GetThread() == NULL || ThreadStore::s_pThreadStore->m_HoldingThread == GetThread()); |
2712 | |
2713 | #ifdef _DEBUG |
2714 | // If Thread object has been destroyed, we need to reset the ownership info in Crst. |
2715 | _ASSERTE(!bThreadDestroyed || GetThread() == NULL); |
2716 | if (bThreadDestroyed) { |
2717 | ThreadStore::s_pThreadStore->m_Crst.m_holderthreadid.SetToCurrentThread(); |
2718 | } |
2719 | #endif |
2720 | |
2721 | ThreadStore::s_pThreadStore->m_HoldingThread = NULL; |
2722 | ThreadStore::s_pThreadStore->m_holderthreadid.Clear(); |
2723 | ThreadStore::s_pThreadStore->Leave(); |
2724 | LOG((LF_SYNC, INFO3, "Unlocked thread store\n" )); |
2725 | |
2726 | // We're out of the critical area for managed/unmanaged debugging. |
2727 | if (!bThreadDestroyed && pCurThread) |
2728 | pCurThread->SetDebugCantStop(false); |
2729 | } |
2730 | #ifdef _DEBUG |
2731 | else |
2732 | LOG((LF_SYNC, INFO3, "Unlocking thread store skipped upon detach\n" )); |
2733 | #endif |
2734 | } |
2735 | |
2736 | |
2737 | void ThreadStore::AllocateOSContext() |
2738 | { |
2739 | LIMITED_METHOD_CONTRACT; |
2740 | _ASSERTE(HoldingThreadStore()); |
2741 | if (s_pOSContext == NULL |
2742 | #ifdef _DEBUG |
2743 | || s_pOSContext == (CONTEXT*)0x1 |
2744 | #endif |
2745 | ) |
2746 | { |
2747 | s_pOSContext = new (nothrow) CONTEXT(); |
2748 | } |
2749 | #ifdef _DEBUG |
2750 | if (s_pOSContext == NULL) |
2751 | { |
2752 | s_pOSContext = (CONTEXT*)0x1; |
2753 | } |
2754 | #endif |
2755 | } |
2756 | |
2757 | CONTEXT *ThreadStore::GrabOSContext() |
2758 | { |
2759 | LIMITED_METHOD_CONTRACT; |
2760 | _ASSERTE(HoldingThreadStore()); |
2761 | CONTEXT *pContext = s_pOSContext; |
2762 | s_pOSContext = NULL; |
2763 | #ifdef _DEBUG |
2764 | if (pContext == (CONTEXT*)0x1) |
2765 | { |
2766 | pContext = NULL; |
2767 | } |
2768 | #endif |
2769 | return pContext; |
2770 | } |
2771 | |
2772 | extern void WaitForEndOfShutdown(); |
2773 | |
2774 | //---------------------------------------------------------------------------- |
2775 | // |
2776 | // Suspending threads, rendezvousing with threads that reach safe places, etc. |
2777 | // |
2778 | //---------------------------------------------------------------------------- |
2779 | |
2780 | // A note on SUSPENSIONS. |
2781 | // |
2782 | // We must not suspend a thread while it is holding the ThreadStore lock, or |
2783 | // the lock on the thread. Why? Because we need those locks to resume the |
2784 | // thread (and to perform a GC, use the debugger, spawn or kill threads, etc.) |
2785 | // |
2786 | // There are two types of suspension we must consider to enforce the above |
2787 | // rule. Synchronous suspensions are where we persuade the thread to suspend |
2788 | // itself. This is CommonTripThread and its cousins. In other words, the |
2789 | // thread toggles the GC mode, or it hits a hijack, or certain opcodes in the |
2790 | // interpreter, etc. In these cases, the thread can simply check whether it |
2791 | // is holding these locks before it suspends itself. |
2792 | // |
2793 | // The other style is an asynchronous suspension. This is where another |
2794 | // thread looks to see where we are. If we are in a fully interruptible region |
2795 | // of JIT code, we will be left suspended. In this case, the thread performing |
2796 | // the suspension must hold the locks on the thread and the threadstore. This |
2797 | // ensures that we aren't suspended while we are holding these locks. |
2798 | // |
2799 | // Note that in the asynchronous case it's not enough to just inspect the thread |
2800 | // to see if it's holding these locks. Since the thread must be in preemptive |
2801 | // mode to block to acquire these locks, and since there will be a few inst- |
2802 | // ructions between acquiring the lock and noting in our state that we've |
2803 | // acquired it, then there would be a window where we would seem eligible for |
2804 | // suspension -- but in fact would not be. |
2805 | |
2806 | //---------------------------------------------------------------------------- |
2807 | |
2808 | // We can't leave preemptive mode and enter cooperative mode, if a GC is |
2809 | // currently in progress. This is the situation when returning back into |
2810 | // the EE from outside. See the comments in DisablePreemptiveGC() to understand |
2811 | // why we Enable GC here! |
2812 | void Thread::RareDisablePreemptiveGC() |
2813 | { |
2814 | BEGIN_PRESERVE_LAST_ERROR; |
2815 | |
2816 | CONTRACTL { |
2817 | NOTHROW; |
2818 | SO_TOLERANT; |
2819 | DISABLED(GC_TRIGGERS); // I think this is actually wrong: prevents a p->c->p mode switch inside a NOTRIGGER region. |
2820 | } |
2821 | CONTRACTL_END; |
2822 | |
2823 | CONTRACT_VIOLATION(SOToleranceViolation); |
2824 | |
2825 | if (IsAtProcessExit()) |
2826 | { |
2827 | goto Exit; |
2828 | } |
2829 | |
2830 | // This should NEVER be called if the TSNC_UnsafeSkipEnterCooperative bit is set! |
2831 | _ASSERTE(!(m_StateNC & TSNC_UnsafeSkipEnterCooperative) && "DisablePreemptiveGC called while the TSNC_UnsafeSkipEnterCooperative bit is set" ); |
2832 | |
2833 | // Holding a spin lock in preemp mode and switch to coop mode could cause other threads spinning |
2834 | // waiting for GC |
2835 | _ASSERTE ((m_StateNC & Thread::TSNC_OwnsSpinLock) == 0); |
2836 | |
2837 | if (!GCHeapUtilities::IsGCHeapInitialized()) |
2838 | { |
2839 | goto Exit; |
2840 | } |
2841 | |
2842 | // CoreCLR does not support user-requested thread suspension |
2843 | _ASSERTE(!(m_State & TS_UserSuspendPending)); |
2844 | |
2845 | // Note IsGCInProgress is also true for say Pause (anywhere SuspendEE happens) and GCThread is the |
2846 | // thread that did the Pause. While in Pause if another thread attempts Rev/Pinvoke it should get inside the following and |
2847 | // block until resume |
2848 | if (((GCHeapUtilities::IsGCInProgress() && (this != ThreadSuspend::GetSuspensionThread())) || |
2849 | (m_State & (TS_UserSuspendPending | TS_DebugSuspendPending | TS_StackCrawlNeeded))) && |
2850 | (!g_fSuspendOnShutdown || IsFinalizerThread() || IsShutdownSpecialThread())) |
2851 | { |
2852 | if (!ThreadStore::HoldingThreadStore(this)) |
2853 | { |
2854 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: entering. Thread state = %x\n" , m_State.Load()); |
2855 | |
2856 | DWORD dwSwitchCount = 0; |
2857 | |
2858 | do |
2859 | { |
2860 | // CoreCLR does not support user-requested thread suspension |
2861 | _ASSERTE(!(m_State & TS_UserSuspendPending)); |
2862 | |
2863 | EnablePreemptiveGC(); |
2864 | |
2865 | // Cannot use GCX_PREEMP_NO_DTOR here because we're inside of the thread |
2866 | // PREEMP->COOP switch mechanism and GCX_PREEMP's assert's will fire. |
2867 | // Instead we use BEGIN_GCX_ASSERT_PREEMP to inform Scan of the mode |
2868 | // change here. |
2869 | BEGIN_GCX_ASSERT_PREEMP; |
2870 | |
2871 | // just wait until the GC is over. |
2872 | if (this != ThreadSuspend::GetSuspensionThread()) |
2873 | { |
2874 | #ifdef PROFILING_SUPPORTED |
2875 | // If profiler desires GC events, notify it that this thread is waiting until the GC is over |
2876 | // Do not send suspend notifications for debugger suspensions |
2877 | { |
2878 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
2879 | if (!(m_State & TS_DebugSuspendPending)) |
2880 | { |
2881 | g_profControlBlock.pProfInterface->RuntimeThreadSuspended((ThreadID)this); |
2882 | } |
2883 | END_PIN_PROFILER(); |
2884 | } |
2885 | #endif // PROFILING_SUPPORTED |
2886 | |
2887 | |
2888 | |
2889 | DWORD status = S_OK; |
2890 | SetThreadStateNC(TSNC_WaitUntilGCFinished); |
2891 | status = GCHeapUtilities::GetGCHeap()->WaitUntilGCComplete(); |
2892 | ResetThreadStateNC(TSNC_WaitUntilGCFinished); |
2893 | |
2894 | if (status == (DWORD)COR_E_STACKOVERFLOW) |
2895 | { |
2896 | // One of two things can happen here: |
2897 | // 1. GC is suspending the process. GC needs to wait. |
2898 | // 2. GC is proceeding after suspension. The current thread needs to spin. |
2899 | SetThreadState(TS_BlockGCForSO); |
2900 | while (GCHeapUtilities::IsGCInProgress() && m_fPreemptiveGCDisabled.Load() == 0) |
2901 | { |
2902 | #undef Sleep |
2903 | // We can not go to a host for blocking operation due ot lack of stack. |
2904 | // Instead we will spin here until |
2905 | // 1. GC is finished; Or |
2906 | // 2. GC lets this thread to run and will wait for it |
2907 | Sleep(10); |
2908 | #define Sleep(a) Dont_Use_Sleep(a) |
2909 | } |
2910 | ResetThreadState(TS_BlockGCForSO); |
2911 | if (m_fPreemptiveGCDisabled.Load() == 1) |
2912 | { |
2913 | // GC suspension has allowed this thread to switch back to cooperative mode. |
2914 | break; |
2915 | } |
2916 | } |
2917 | if (!GCHeapUtilities::IsGCInProgress()) |
2918 | { |
2919 | if (HasThreadState(TS_StackCrawlNeeded)) |
2920 | { |
2921 | SetThreadStateNC(TSNC_WaitUntilGCFinished); |
2922 | ThreadStore::WaitForStackCrawlEvent(); |
2923 | ResetThreadStateNC(TSNC_WaitUntilGCFinished); |
2924 | } |
2925 | else |
2926 | { |
2927 | __SwitchToThread(0, ++dwSwitchCount); |
2928 | } |
2929 | } |
2930 | |
2931 | #ifdef PROFILING_SUPPORTED |
2932 | // Let the profiler know that this thread is resuming |
2933 | { |
2934 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
2935 | g_profControlBlock.pProfInterface->RuntimeThreadResumed((ThreadID)this); |
2936 | END_PIN_PROFILER(); |
2937 | } |
2938 | #endif // PROFILING_SUPPORTED |
2939 | } |
2940 | |
2941 | END_GCX_ASSERT_PREEMP; |
2942 | |
2943 | // disable preemptive gc. |
2944 | FastInterlockOr(&m_fPreemptiveGCDisabled, 1); |
2945 | |
2946 | // The fact that we check whether 'this' is the GC thread may seem |
2947 | // strange. After all, we determined this before entering the method. |
2948 | // However, it is possible for the current thread to become the GC |
2949 | // thread while in this loop. This happens if you use the COM+ |
2950 | // debugger to suspend this thread and then release it. |
2951 | |
2952 | } while ((GCHeapUtilities::IsGCInProgress() && (this != ThreadSuspend::GetSuspensionThread())) || |
2953 | (m_State & (TS_UserSuspendPending | TS_DebugSuspendPending | TS_StackCrawlNeeded))); |
2954 | } |
2955 | STRESS_LOG0(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: leaving\n" ); |
2956 | } |
2957 | |
2958 | // Block all threads except finalizer and shutdown thread during shutdown. |
2959 | // If g_fSuspendFinalizerOnShutdown is set, block the finalizer too. |
2960 | if ((g_fSuspendOnShutdown && !IsFinalizerThread() && !IsShutdownSpecialThread()) || |
2961 | (g_fSuspendFinalizerOnShutdown && IsFinalizerThread())) |
2962 | { |
2963 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: entering. Thread state = %x\n" , m_State.Load()); |
2964 | |
2965 | EnablePreemptiveGC(); |
2966 | |
2967 | // Cannot use GCX_PREEMP_NO_DTOR here because we're inside of the thread |
2968 | // PREEMP->COOP switch mechanism and GCX_PREEMP's assert's will fire. |
2969 | // Instead we use BEGIN_GCX_ASSERT_PREEMP to inform Scan of the mode |
2970 | // change here. |
2971 | BEGIN_GCX_ASSERT_PREEMP; |
2972 | |
2973 | #ifdef PROFILING_SUPPORTED |
2974 | // If profiler desires GC events, notify it that this thread is waiting until the GC is over |
2975 | // Do not send suspend notifications for debugger suspensions |
2976 | { |
2977 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
2978 | if (!(m_State & TS_DebugSuspendPending)) |
2979 | { |
2980 | g_profControlBlock.pProfInterface->RuntimeThreadSuspended((ThreadID)this); |
2981 | } |
2982 | END_PIN_PROFILER(); |
2983 | } |
2984 | #endif // PROFILING_SUPPORTED |
2985 | |
2986 | |
2987 | |
2988 | // The thread is blocked for shutdown. We do not concern for GC violation. |
2989 | CONTRACT_VIOLATION(GCViolation); |
2990 | |
2991 | WaitForEndOfShutdown(); |
2992 | |
2993 | END_GCX_ASSERT_PREEMP; |
2994 | |
2995 | __SwitchToThread(INFINITE, CALLER_LIMITS_SPINNING); |
2996 | _ASSERTE(!"Cannot reach here" ); |
2997 | } |
2998 | |
2999 | Exit: ; |
3000 | END_PRESERVE_LAST_ERROR; |
3001 | } |
3002 | |
3003 | void Thread::HandleThreadAbortTimeout() |
3004 | { |
3005 | WRAPPER_NO_CONTRACT; |
3006 | |
3007 | EPolicyAction action = eNoAction; |
3008 | EClrOperation operation = OPR_ThreadRudeAbortInNonCriticalRegion; |
3009 | |
3010 | if (IsFuncEvalAbort()) |
3011 | { |
3012 | // There can't be escalation policy for FuncEvalAbort timeout. |
3013 | // The debugger should retain control of the policy. For example, if a RudeAbort times out, it's |
3014 | // probably because the debugger had some other thread frozen. When the thread is thawed, things might |
3015 | // be fine, so we don't want to escelate the FuncEvalRudeAbort (which will be swalled by FuncEvalHijackWorker) |
3016 | // into a user RudeThreadAbort (which will at least rip the entire thread). |
3017 | return; |
3018 | } |
3019 | |
3020 | if (!IsRudeAbort()) |
3021 | { |
3022 | operation = OPR_ThreadAbort; |
3023 | } |
3024 | else if (HasLockInCurrentDomain()) |
3025 | { |
3026 | operation = OPR_ThreadRudeAbortInCriticalRegion; |
3027 | } |
3028 | else |
3029 | { |
3030 | operation = OPR_ThreadRudeAbortInNonCriticalRegion; |
3031 | } |
3032 | action = GetEEPolicy()->GetActionOnTimeout(operation, this); |
3033 | // We only support escalation to rude abort |
3034 | |
3035 | EX_TRY { |
3036 | switch (action) |
3037 | { |
3038 | case eRudeAbortThread: |
3039 | GetEEPolicy()->NotifyHostOnTimeout(operation,action); |
3040 | MarkThreadForAbort(TAR_Thread, EEPolicy::TA_Rude); |
3041 | break; |
3042 | case eExitProcess: |
3043 | case eFastExitProcess: |
3044 | case eRudeExitProcess: |
3045 | case eDisableRuntime: |
3046 | GetEEPolicy()->NotifyHostOnTimeout(operation,action); |
3047 | EEPolicy::HandleExitProcessFromEscalation(action, HOST_E_EXITPROCESS_THREADABORT); |
3048 | _ASSERTE (!"Should not reach here" ); |
3049 | break; |
3050 | case eNoAction: |
3051 | break; |
3052 | default: |
3053 | _ASSERTE (!"unknown policy for thread abort" ); |
3054 | } |
3055 | } |
3056 | EX_CATCH { |
3057 | } |
3058 | EX_END_CATCH(SwallowAllExceptions); |
3059 | } |
3060 | |
3061 | void Thread::HandleThreadAbort (BOOL fForce) |
3062 | { |
3063 | BEGIN_PRESERVE_LAST_ERROR; |
3064 | |
3065 | STATIC_CONTRACT_THROWS; |
3066 | STATIC_CONTRACT_GC_TRIGGERS; |
3067 | STATIC_CONTRACT_SO_TOLERANT; |
3068 | |
3069 | BEGIN_SO_INTOLERANT_CODE(this); |
3070 | TESTHOOKCALL(AppDomainCanBeUnloaded(GetDomain()->GetId().m_dwId,FALSE)); |
3071 | |
3072 | // It's possible we could go through here if we hit a hard SO and MC++ has called back |
3073 | // into the runtime on this thread |
3074 | |
3075 | FinishSOWork(); |
3076 | |
3077 | if (IsAbortRequested() && GetAbortEndTime() < CLRGetTickCount64()) |
3078 | { |
3079 | HandleThreadAbortTimeout(); |
3080 | } |
3081 | |
3082 | // @TODO: we should consider treating this function as an FCALL or HCALL and use FCThrow instead of COMPlusThrow |
3083 | |
3084 | // Sometimes we call this without any CLR SEH in place. An example is UMThunkStubRareDisableWorker. |
3085 | // That's okay since COMPlusThrow will eventually erect SEH around the RaiseException. It prevents |
3086 | // us from stating CONTRACT here. |
3087 | |
3088 | if (fForce || ReadyForAbort()) |
3089 | { |
3090 | ResetThreadState ((ThreadState)(TS_Interrupted | TS_Interruptible)); |
3091 | // We are going to abort. Abort satisfies Thread.Interrupt requirement. |
3092 | FastInterlockExchange (&m_UserInterrupt, 0); |
3093 | |
3094 | // generate either a ThreadAbort exception |
3095 | STRESS_LOG1(LF_APPDOMAIN, LL_INFO100, "Thread::HandleThreadAbort throwing abort for %x\n" , GetThreadId()); |
3096 | |
3097 | GCX_COOP_NO_DTOR(); |
3098 | |
3099 | // Can not use holder. GCX_COOP forces the thread back to the original state during |
3100 | // exception unwinding, which may put the thread back to cooperative mode. |
3101 | // GCX_COOP(); |
3102 | |
3103 | if (!IsAbortInitiated() || |
3104 | (IsRudeAbort() && !IsRudeAbortInitiated())) |
3105 | { |
3106 | PreWorkForThreadAbort(); |
3107 | } |
3108 | |
3109 | PreparingAbortHolder paHolder; |
3110 | |
3111 | OBJECTREF exceptObj; |
3112 | |
3113 | if (IsRudeAbort()) |
3114 | { |
3115 | exceptObj = CLRException::GetPreallocatedRudeThreadAbortException(); |
3116 | } |
3117 | else |
3118 | { |
3119 | EEException eeExcept(kThreadAbortException); |
3120 | exceptObj = CLRException::GetThrowableFromException(&eeExcept); |
3121 | } |
3122 | |
3123 | RaiseTheExceptionInternalOnly(exceptObj, FALSE); |
3124 | } |
3125 | END_SO_INTOLERANT_CODE; |
3126 | |
3127 | END_PRESERVE_LAST_ERROR; |
3128 | } |
3129 | |
3130 | void Thread::PreWorkForThreadAbort() |
3131 | { |
3132 | WRAPPER_NO_CONTRACT; |
3133 | |
3134 | SetAbortInitiated(); |
3135 | // if an abort and interrupt happen at the same time (e.g. on a sleeping thread), |
3136 | // the abort is favored. But we do need to reset the interrupt bits. |
3137 | FastInterlockAnd((ULONG *) &m_State, ~(TS_Interruptible | TS_Interrupted)); |
3138 | ResetUserInterrupted(); |
3139 | |
3140 | if (IsRudeAbort() && !(m_AbortInfo & (TAI_ADUnloadAbort | |
3141 | TAI_ADUnloadRudeAbort | |
3142 | TAI_ADUnloadV1Abort) |
3143 | )) { |
3144 | if (HasLockInCurrentDomain()) { |
3145 | AppDomain *pDomain = GetAppDomain(); |
3146 | // Cannot enable the following assertion. |
3147 | // We may take the lock, but the lock will be released during exception backout. |
3148 | //_ASSERTE(!pDomain->IsDefaultDomain()); |
3149 | EPolicyAction action = GetEEPolicy()->GetDefaultAction(OPR_ThreadRudeAbortInCriticalRegion, this); |
3150 | switch (action) |
3151 | { |
3152 | case eExitProcess: |
3153 | case eFastExitProcess: |
3154 | case eRudeExitProcess: |
3155 | case eDisableRuntime: |
3156 | { |
3157 | // We're about to exit the process, if we take an SO here we'll just exit faster right??? |
3158 | CONTRACT_VIOLATION(SOToleranceViolation); |
3159 | |
3160 | GetEEPolicy()->NotifyHostOnDefaultAction(OPR_ThreadRudeAbortInCriticalRegion,action); |
3161 | GetEEPolicy()->HandleExitProcessFromEscalation(action,HOST_E_EXITPROCESS_ADUNLOAD); |
3162 | } |
3163 | break; |
3164 | default: |
3165 | break; |
3166 | } |
3167 | } |
3168 | } |
3169 | } |
3170 | |
3171 | #if defined(STRESS_HEAP) && defined(_DEBUG) |
3172 | |
3173 | // This function is for GC stress testing. Before we enable preemptive GC, let us do a GC |
3174 | // because GC may happen while the thread is in preemptive GC mode. |
3175 | void Thread::PerformPreemptiveGC() |
3176 | { |
3177 | CONTRACTL { |
3178 | NOTHROW; |
3179 | DISABLED(GC_TRIGGERS); // I think this is actually wrong: prevents a p->c->p mode switch inside a NOTRIGGER region. |
3180 | DEBUG_ONLY; |
3181 | } |
3182 | CONTRACTL_END; |
3183 | |
3184 | if (IsAtProcessExit()) |
3185 | return; |
3186 | |
3187 | if (!GCStressPolicy::IsEnabled() || !GCStress<cfg_transition>::IsEnabled()) |
3188 | return; |
3189 | |
3190 | if (!GCHeapUtilities::IsGCHeapInitialized()) |
3191 | return; |
3192 | |
3193 | if (!m_GCOnTransitionsOK |
3194 | #ifdef ENABLE_CONTRACTS |
3195 | || RawGCNoTrigger() |
3196 | #endif |
3197 | || g_fEEShutDown |
3198 | || GCHeapUtilities::IsGCInProgress(TRUE) |
3199 | || GCHeapUtilities::GetGCHeap()->GetGcCount() == 0 // Need something that works for isolated heap. |
3200 | || ThreadStore::HoldingThreadStore()) |
3201 | return; |
3202 | |
3203 | if (Thread::ThreadsAtUnsafePlaces()) |
3204 | return; |
3205 | |
3206 | #ifdef DEBUGGING_SUPPORTED |
3207 | // Don't collect if the debugger is attach and either 1) there |
3208 | // are any threads held at unsafe places or 2) this thread is |
3209 | // under the control of the debugger's dispatch logic (as |
3210 | // evidenced by having a non-NULL filter context.) |
3211 | if ((CORDebuggerAttached() && |
3212 | (g_pDebugInterface->ThreadsAtUnsafePlaces() || |
3213 | (GetFilterContext() != NULL)))) |
3214 | return; |
3215 | #endif // DEBUGGING_SUPPORTED |
3216 | |
3217 | _ASSERTE(m_fPreemptiveGCDisabled.Load() == 0); // we are in preemptive mode when we call this |
3218 | |
3219 | m_GCOnTransitionsOK = FALSE; |
3220 | { |
3221 | GCX_COOP(); |
3222 | m_bGCStressing = TRUE; |
3223 | |
3224 | // BUG(github #10318) - when not using allocation contexts, the alloc lock |
3225 | // must be acquired here. Until fixed, this assert prevents random heap corruption. |
3226 | _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); |
3227 | GCHeapUtilities::GetGCHeap()->StressHeap(GetThread()->GetAllocContext()); |
3228 | m_bGCStressing = FALSE; |
3229 | } |
3230 | m_GCOnTransitionsOK = TRUE; |
3231 | } |
3232 | #endif // STRESS_HEAP && DEBUG |
3233 | |
3234 | // To leave cooperative mode and enter preemptive mode, if a GC is in progress, we |
3235 | // no longer care to suspend this thread. But if we are trying to suspend the thread |
3236 | // for other reasons (e.g. Thread.Suspend()), now is a good time. |
3237 | // |
3238 | // Note that it is possible for an N/Direct call to leave the EE without explicitly |
3239 | // enabling preemptive GC. |
3240 | void Thread::RareEnablePreemptiveGC() |
3241 | { |
3242 | CONTRACTL { |
3243 | NOTHROW; |
3244 | DISABLED(GC_TRIGGERS); // I think this is actually wrong: prevents a p->c->p mode switch inside a NOTRIGGER region. |
3245 | SO_TOLERANT; |
3246 | } |
3247 | CONTRACTL_END; |
3248 | |
3249 | // @todo - Needs a hard SO probe |
3250 | CONTRACT_VIOLATION(GCViolation|FaultViolation|SOToleranceViolation); |
3251 | |
3252 | // If we have already received our PROCESS_DETACH during shutdown, there is only one thread in the |
3253 | // process and no coordination is necessary. |
3254 | if (IsAtProcessExit()) |
3255 | return; |
3256 | |
3257 | // EnablePreemptiveGC already set us to preemptive mode before triggering the Rare path. |
3258 | // Force other threads to see this update, since the Rare path implies that someone else |
3259 | // is observing us (e.g. SuspendRuntime). |
3260 | |
3261 | _ASSERTE (!m_fPreemptiveGCDisabled); |
3262 | |
3263 | // holding a spin lock in coop mode and transit to preemp mode will cause deadlock on GC |
3264 | _ASSERTE ((m_StateNC & Thread::TSNC_OwnsSpinLock) == 0); |
3265 | |
3266 | FastInterlockOr (&m_fPreemptiveGCDisabled, 0); |
3267 | |
3268 | #if defined(STRESS_HEAP) && defined(_DEBUG) |
3269 | if (!IsDetached()) |
3270 | PerformPreemptiveGC(); |
3271 | #endif |
3272 | |
3273 | STRESS_LOG1(LF_SYNC, LL_INFO100000, "RareEnablePreemptiveGC: entering. Thread state = %x\n" , m_State.Load()); |
3274 | if (!ThreadStore::HoldingThreadStore(this)) |
3275 | { |
3276 | #ifdef FEATURE_HIJACK |
3277 | // Remove any hijacks we might have. |
3278 | UnhijackThread(); |
3279 | #endif // FEATURE_HIJACK |
3280 | |
3281 | // wake up any threads waiting to suspend us, like the GC thread. |
3282 | ThreadSuspend::g_pGCSuspendEvent->Set(); |
3283 | |
3284 | // for GC, the fact that we are leaving the EE means that it no longer needs to |
3285 | // suspend us. But if we are doing a non-GC suspend, we need to block now. |
3286 | // Give the debugger precedence over user suspensions: |
3287 | while (m_State & (TS_DebugSuspendPending | TS_UserSuspendPending)) |
3288 | { |
3289 | // CoreCLR does not support user-requested thread suspension |
3290 | _ASSERTE(!(m_State & TS_UserSuspendPending)); |
3291 | |
3292 | #ifdef DEBUGGING_SUPPORTED |
3293 | // We don't notify the debugger that this thread is now suspended. We'll just |
3294 | // let the debugger's helper thread sweep and pick it up. |
3295 | // We also never take the TSL in here either. |
3296 | // Life's much simpler this way... |
3297 | |
3298 | |
3299 | #endif // DEBUGGING_SUPPORTED |
3300 | |
3301 | #ifdef LOGGING |
3302 | { |
3303 | LOG((LF_CORDB, LL_INFO1000, "[0x%x] SUSPEND: suspended while enabling gc.\n" , GetThreadId())); |
3304 | } |
3305 | #endif |
3306 | |
3307 | WaitSuspendEvents(); // sets bits, too |
3308 | |
3309 | } |
3310 | } |
3311 | STRESS_LOG0(LF_SYNC, LL_INFO100000, " RareEnablePreemptiveGC: leaving.\n" ); |
3312 | } |
3313 | |
3314 | // Called when we are passing through a safe point in CommonTripThread or |
3315 | // HandleGCSuspensionForInterruptedThread. Do the right thing with this thread, |
3316 | // which can either mean waiting for the GC to complete, or performing a |
3317 | // pending suspension. |
3318 | void Thread::PulseGCMode() |
3319 | { |
3320 | CONTRACTL { |
3321 | NOTHROW; |
3322 | GC_TRIGGERS; |
3323 | } |
3324 | CONTRACTL_END; |
3325 | |
3326 | _ASSERTE(this == GetThread()); |
3327 | |
3328 | if (PreemptiveGCDisabled() && CatchAtSafePoint()) |
3329 | { |
3330 | EnablePreemptiveGC(); |
3331 | DisablePreemptiveGC(); |
3332 | } |
3333 | } |
3334 | |
3335 | // Indicate whether threads should be trapped when returning to the EE (i.e. disabling |
3336 | // preemptive GC mode) |
3337 | Volatile<LONG> g_fTrapReturningThreadsLock; |
3338 | void ThreadStore::TrapReturningThreads(BOOL yes) |
3339 | { |
3340 | CONTRACTL { |
3341 | NOTHROW; |
3342 | GC_NOTRIGGER; |
3343 | } CONTRACTL_END; |
3344 | |
3345 | // make sure that a thread doesn't get suspended holding g_fTrapReturningThreadsLock |
3346 | // if a suspended thread held this lock and then the suspending thread called in |
3347 | // here (which it does) the suspending thread would deadlock causing the suspension |
3348 | // as a whole to deadlock |
3349 | ForbidSuspendThreadHolder suspend; |
3350 | |
3351 | DWORD dwSwitchCount = 0; |
3352 | while (1 == FastInterlockExchange(&g_fTrapReturningThreadsLock, 1)) |
3353 | { |
3354 | // we can't forbid suspension while we are sleeping and don't hold the lock |
3355 | // this will trigger an assert on SQLCLR but is a general issue |
3356 | suspend.Release(); |
3357 | __SwitchToThread(0, ++dwSwitchCount); |
3358 | suspend.Acquire(); |
3359 | } |
3360 | |
3361 | if (yes) |
3362 | { |
3363 | #ifdef _DEBUG |
3364 | CounterHolder trtHolder(&g_trtChgInFlight); |
3365 | FastInterlockIncrement(&g_trtChgStamp); |
3366 | #endif |
3367 | |
3368 | GCHeapUtilities::GetGCHeap()->SetSuspensionPending(true); |
3369 | FastInterlockIncrement (&g_TrapReturningThreads); |
3370 | #ifdef ENABLE_FAST_GCPOLL_HELPER |
3371 | EnableJitGCPoll(); |
3372 | #endif |
3373 | _ASSERTE(g_TrapReturningThreads > 0); |
3374 | |
3375 | #ifdef _DEBUG |
3376 | trtHolder.Release(); |
3377 | #endif |
3378 | } |
3379 | else |
3380 | { |
3381 | FastInterlockDecrement (&g_TrapReturningThreads); |
3382 | GCHeapUtilities::GetGCHeap()->SetSuspensionPending(false); |
3383 | |
3384 | #ifdef ENABLE_FAST_GCPOLL_HELPER |
3385 | if (0 == g_TrapReturningThreads) |
3386 | { |
3387 | DisableJitGCPoll(); |
3388 | } |
3389 | #endif |
3390 | |
3391 | _ASSERTE(g_TrapReturningThreads >= 0); |
3392 | } |
3393 | #ifdef ENABLE_FAST_GCPOLL_HELPER |
3394 | //Ensure that we flush the cache line containing the GC Poll Helper. |
3395 | MemoryBarrier(); |
3396 | #endif //ENABLE_FAST_GCPOLL_HELPER |
3397 | g_fTrapReturningThreadsLock = 0; |
3398 | |
3399 | } |
3400 | |
3401 | #ifdef FEATURE_HIJACK |
3402 | |
3403 | void RedirectedThreadFrame::ExceptionUnwind() |
3404 | { |
3405 | CONTRACTL |
3406 | { |
3407 | NOTHROW; |
3408 | GC_NOTRIGGER; |
3409 | SO_TOLERANT; |
3410 | MODE_ANY; |
3411 | } |
3412 | CONTRACTL_END; |
3413 | |
3414 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "In RedirectedThreadFrame::ExceptionUnwind pFrame = %p\n" , this); |
3415 | |
3416 | Thread* pThread = GetThread(); |
3417 | |
3418 | if (pThread->GetSavedRedirectContext()) |
3419 | { |
3420 | delete m_Regs; |
3421 | } |
3422 | else |
3423 | { |
3424 | // Save it for future use to avoid repeatedly new'ing |
3425 | pThread->SetSavedRedirectContext(m_Regs); |
3426 | } |
3427 | |
3428 | m_Regs = NULL; |
3429 | } |
3430 | |
3431 | #ifndef PLATFORM_UNIX |
3432 | |
3433 | #ifdef _TARGET_X86_ |
3434 | //**************************************************************************************** |
3435 | // This will check who caused the exception. If it was caused by the the redirect function, |
3436 | // the reason is to resume the thread back at the point it was redirected in the first |
3437 | // place. If the exception was not caused by the function, then it was caused by the call |
3438 | // out to the I[GC|Debugger]ThreadControl client and we need to determine if it's an |
3439 | // exception that we can just eat and let the runtime resume the thread, or if it's an |
3440 | // uncatchable exception that we need to pass on to the runtime. |
3441 | // |
3442 | int RedirectedHandledJITCaseExceptionFilter( |
3443 | PEXCEPTION_POINTERS pExcepPtrs, // Exception data |
3444 | RedirectedThreadFrame *pFrame, // Frame on stack |
3445 | BOOL fDone, // Whether redirect completed without exception |
3446 | CONTEXT *pCtx) // Saved context |
3447 | { |
3448 | // !!! Do not use a non-static contract here. |
3449 | // !!! Contract may insert an exception handling record. |
3450 | // !!! This function assumes that GetCurrentSEHRecord() returns the exception record set up in |
3451 | // !!! Thread::RedirectedHandledJITCase |
3452 | // |
3453 | // !!! Do not use an object with dtor, since it injects a fs:0 entry. |
3454 | STATIC_CONTRACT_NOTHROW; |
3455 | STATIC_CONTRACT_GC_TRIGGERS; |
3456 | STATIC_CONTRACT_MODE_ANY; |
3457 | |
3458 | if (pExcepPtrs->ExceptionRecord->ExceptionCode == STATUS_STACK_OVERFLOW) |
3459 | { |
3460 | return EXCEPTION_CONTINUE_SEARCH; |
3461 | } |
3462 | |
3463 | // Get the thread handle |
3464 | Thread *pThread = GetThread(); |
3465 | _ASSERTE(pThread); |
3466 | |
3467 | |
3468 | STRESS_LOG2(LF_SYNC, LL_INFO100, "In RedirectedHandledJITCaseExceptionFilter fDone = %d pFrame = %p\n" , fDone, pFrame); |
3469 | |
3470 | // If we get here via COM+ exception, gc-mode is unknown. We need it to |
3471 | // be cooperative for this function. |
3472 | GCX_COOP_NO_DTOR(); |
3473 | |
3474 | // If the exception was due to the called client, then we need to figure out if it |
3475 | // is an exception that can be eaten or if it needs to be handled elsewhere. |
3476 | if (!fDone) |
3477 | { |
3478 | if (pExcepPtrs->ExceptionRecord->ExceptionFlags & EXCEPTION_NONCONTINUABLE) |
3479 | { |
3480 | return (EXCEPTION_CONTINUE_SEARCH); |
3481 | } |
3482 | |
3483 | // Get the latest thrown object |
3484 | OBJECTREF throwable = CLRException::GetThrowableFromExceptionRecord(pExcepPtrs->ExceptionRecord); |
3485 | |
3486 | // If this is an uncatchable exception, then let the exception be handled elsewhere |
3487 | if (IsUncatchable(&throwable)) |
3488 | { |
3489 | pThread->EnablePreemptiveGC(); |
3490 | return (EXCEPTION_CONTINUE_SEARCH); |
3491 | } |
3492 | } |
3493 | #ifdef _DEBUG |
3494 | else |
3495 | { |
3496 | _ASSERTE(pExcepPtrs->ExceptionRecord->ExceptionCode == EXCEPTION_HIJACK); |
3497 | } |
3498 | #endif |
3499 | |
3500 | // Unlink the frame in preparation for resuming in managed code |
3501 | pFrame->Pop(); |
3502 | |
3503 | // Copy the saved context record into the EH context; |
3504 | ReplaceExceptionContextRecord(pExcepPtrs->ContextRecord, pCtx); |
3505 | |
3506 | DWORD espValue = pCtx->Esp; |
3507 | if (pThread->GetSavedRedirectContext()) |
3508 | { |
3509 | delete pCtx; |
3510 | } |
3511 | else |
3512 | { |
3513 | // Save it for future use to avoid repeatedly new'ing |
3514 | pThread->SetSavedRedirectContext(pCtx); |
3515 | } |
3516 | |
3517 | ///////////////////////////////////////////////////////////////////////////// |
3518 | // NOTE: Ugly, ugly workaround. |
3519 | // We need to resume the thread into the managed code where it was redirected, |
3520 | // and the corresponding ESP is below the current one. But C++ expects that |
3521 | // on an EXCEPTION_CONTINUE_EXECUTION that the ESP will be above where it has |
3522 | // installed the SEH handler. To solve this, we need to remove all handlers |
3523 | // that reside above the resumed ESP, but we must leave the OS-installed |
3524 | // handler at the top, so we grab the top SEH handler, call |
3525 | // PopSEHRecords which will remove all SEH handlers above the target ESP and |
3526 | // then link the OS handler back in with SetCurrentSEHRecord. |
3527 | |
3528 | // Get the special OS handler and save it until PopSEHRecords is done |
3529 | EXCEPTION_REGISTRATION_RECORD *pCurSEH = GetCurrentSEHRecord(); |
3530 | |
3531 | // Unlink all records above the target resume ESP |
3532 | PopSEHRecords((LPVOID)(size_t)espValue); |
3533 | |
3534 | // Link the special OS handler back in to the top |
3535 | pCurSEH->Next = GetCurrentSEHRecord(); |
3536 | |
3537 | // Register the special OS handler as the top handler with the OS |
3538 | SetCurrentSEHRecord(pCurSEH); |
3539 | |
3540 | // Resume execution at point where thread was originally redirected |
3541 | return (EXCEPTION_CONTINUE_EXECUTION); |
3542 | } |
3543 | #endif // _TARGET_X86_ |
3544 | |
3545 | void NotifyHostOnGCSuspension() |
3546 | { |
3547 | CONTRACTL |
3548 | { |
3549 | NOTHROW; |
3550 | GC_NOTRIGGER; |
3551 | MODE_ANY; |
3552 | SO_TOLERANT; |
3553 | } |
3554 | CONTRACTL_END; |
3555 | |
3556 | } |
3557 | |
3558 | // This function is called from the assembly functions used to redirect a thread. It must not cause |
3559 | // an exception (except SO). |
3560 | extern "C" PCONTEXT __stdcall GetCurrentSavedRedirectContext() |
3561 | { |
3562 | LIMITED_METHOD_CONTRACT; |
3563 | |
3564 | DWORD dwLastError = GetLastError(); |
3565 | PCONTEXT pContext = GetThread()->GetSavedRedirectContext(); |
3566 | SetLastError(dwLastError); |
3567 | |
3568 | return pContext; |
3569 | } |
3570 | |
3571 | void __stdcall Thread::RedirectedHandledJITCase(RedirectReason reason) |
3572 | { |
3573 | STATIC_CONTRACT_THROWS; |
3574 | STATIC_CONTRACT_GC_TRIGGERS; |
3575 | STATIC_CONTRACT_MODE_COOPERATIVE; |
3576 | |
3577 | // We must preserve this in case we've interrupted an IL pinvoke stub before it |
3578 | // was able to save the error. |
3579 | DWORD dwLastError = GetLastError(); |
3580 | |
3581 | Thread *pThread = GetThread(); |
3582 | _ASSERTE(pThread); |
3583 | |
3584 | #ifdef FEATURE_STACK_PROBE |
3585 | if (GetEEPolicy()->GetActionOnFailure(FAIL_StackOverflow) == eRudeUnloadAppDomain) |
3586 | { |
3587 | RetailStackProbe(ADJUST_PROBE(DEFAULT_ENTRY_PROBE_AMOUNT), pThread); |
3588 | } |
3589 | #endif |
3590 | |
3591 | BEGIN_CONTRACT_VIOLATION(SOToleranceViolation); |
3592 | |
3593 | // Get the saved context |
3594 | CONTEXT *pCtx = pThread->GetSavedRedirectContext(); |
3595 | _ASSERTE(pCtx); |
3596 | |
3597 | INDEBUG(Thread::ObjectRefFlush(pThread)); |
3598 | |
3599 | // Create a frame on the stack |
3600 | FrameWithCookie<RedirectedThreadFrame> frame(pCtx); |
3601 | |
3602 | STRESS_LOG5(LF_SYNC, LL_INFO1000, "In RedirectedHandledJITcase reason 0x%x pFrame = %p pc = %p sp = %p fp = %p" , reason, &frame, GetIP(pCtx), GetSP(pCtx), GetFP(pCtx)); |
3603 | |
3604 | #ifdef _TARGET_X86_ |
3605 | // This will indicate to the exception filter whether or not the exception is caused |
3606 | // by us or the client. |
3607 | BOOL fDone = FALSE; |
3608 | int filter_count = 0; // A counter to avoid a nasty case where an |
3609 | // up-stack filter throws another exception |
3610 | // causing our filter to be run again for |
3611 | // some unrelated exception. |
3612 | |
3613 | __try |
3614 | #endif // _TARGET_X86_ |
3615 | { |
3616 | // Make sure this thread doesn't reuse the context memory in re-entrancy cases |
3617 | _ASSERTE(pThread->GetSavedRedirectContext() != NULL); |
3618 | pThread->SetSavedRedirectContext(NULL); |
3619 | |
3620 | // Link in the frame |
3621 | frame.Push(); |
3622 | |
3623 | #if defined(HAVE_GCCOVER) && defined(USE_REDIRECT_FOR_GCSTRESS) // GCCOVER |
3624 | if (reason == RedirectReason_GCStress) |
3625 | { |
3626 | _ASSERTE(pThread->PreemptiveGCDisabledOther()); |
3627 | DoGcStress(frame.GetContext(), NULL); |
3628 | } |
3629 | else |
3630 | #endif // HAVE_GCCOVER && USE_REDIRECT_FOR_GCSTRESS |
3631 | { |
3632 | // Enable PGC before calling out to the client to allow runtime suspend to finish |
3633 | GCX_PREEMP_NO_DTOR(); |
3634 | |
3635 | // Notify the interface of the pending suspension |
3636 | switch (reason) { |
3637 | case RedirectReason_GCSuspension: |
3638 | break; |
3639 | case RedirectReason_DebugSuspension: |
3640 | break; |
3641 | case RedirectReason_UserSuspension: |
3642 | // Do nothing; |
3643 | break; |
3644 | default: |
3645 | _ASSERTE(!"Invalid redirect reason" ); |
3646 | break; |
3647 | } |
3648 | |
3649 | // Disable preemptive GC so we can unlink the frame |
3650 | GCX_PREEMP_NO_DTOR_END(); |
3651 | } |
3652 | |
3653 | #ifdef _TARGET_X86_ |
3654 | pThread->HandleThreadAbort(); // Might throw an exception. |
3655 | |
3656 | // Indicate that the call to the service went without an exception, and that |
3657 | // we're raising our own exception to resume the thread to where it was |
3658 | // redirected from |
3659 | fDone = TRUE; |
3660 | |
3661 | // Save the instruction pointer where we redirected last. This does not race with the check |
3662 | // against this variable in HandledJitCase because the GC will not attempt to redirect the |
3663 | // thread until the instruction pointer of this thread is back in managed code. |
3664 | pThread->m_LastRedirectIP = GetIP(pCtx); |
3665 | pThread->m_SpinCount = 0; |
3666 | |
3667 | RaiseException(EXCEPTION_HIJACK, 0, 0, NULL); |
3668 | |
3669 | #else // _TARGET_X86_ |
3670 | |
3671 | #if defined(HAVE_GCCOVER) && defined(USE_REDIRECT_FOR_GCSTRESS) // GCCOVER |
3672 | // |
3673 | // If GCStress interrupts an IL stub or inlined p/invoke while it's running in preemptive mode, it switches the mode to |
3674 | // cooperative - but we will resume to preemptive below. We should not trigger an abort in that case, as it will fail |
3675 | // due to the GC mode. |
3676 | // |
3677 | if (!pThread->m_fPreemptiveGCDisabledForGCStress) |
3678 | #endif |
3679 | { |
3680 | |
3681 | UINT_PTR uAbortAddr; |
3682 | UINT_PTR uResumePC = (UINT_PTR)GetIP(pCtx); |
3683 | CopyOSContext(pThread->m_OSContext, pCtx); |
3684 | uAbortAddr = (UINT_PTR)COMPlusCheckForAbort(); |
3685 | if (uAbortAddr) |
3686 | { |
3687 | LOG((LF_EH, LL_INFO100, "thread abort in progress, resuming thread under control... (handled jit case)\n" )); |
3688 | |
3689 | CONSISTENCY_CHECK(CheckPointer(pCtx)); |
3690 | |
3691 | STRESS_LOG1(LF_EH, LL_INFO10, "resume under control: ip: %p (handled jit case)\n" , uResumePC); |
3692 | |
3693 | SetIP(pThread->m_OSContext, uResumePC); |
3694 | |
3695 | #if defined(_TARGET_ARM_) |
3696 | // Save the original resume PC in Lr |
3697 | pCtx->Lr = uResumePC; |
3698 | |
3699 | // Since we have set a new IP, we have to clear conditional execution flags too. |
3700 | ClearITState(pThread->m_OSContext); |
3701 | #endif // _TARGET_ARM_ |
3702 | |
3703 | SetIP(pCtx, uAbortAddr); |
3704 | } |
3705 | } |
3706 | |
3707 | // Unlink the frame in preparation for resuming in managed code |
3708 | frame.Pop(); |
3709 | |
3710 | { |
3711 | // Free the context struct if we already have one cached |
3712 | if (pThread->GetSavedRedirectContext()) |
3713 | { |
3714 | CONTEXT* pCtxTemp = (CONTEXT*)_alloca(sizeof(CONTEXT)); |
3715 | memcpy(pCtxTemp, pCtx, sizeof(CONTEXT)); |
3716 | delete pCtx; |
3717 | pCtx = pCtxTemp; |
3718 | } |
3719 | else |
3720 | { |
3721 | // Save it for future use to avoid repeatedly new'ing |
3722 | pThread->SetSavedRedirectContext(pCtx); |
3723 | } |
3724 | |
3725 | #if defined(HAVE_GCCOVER) && defined(USE_REDIRECT_FOR_GCSTRESS) // GCCOVER |
3726 | if (pThread->m_fPreemptiveGCDisabledForGCStress) |
3727 | { |
3728 | pThread->EnablePreemptiveGC(); |
3729 | pThread->m_fPreemptiveGCDisabledForGCStress = false; |
3730 | } |
3731 | #endif |
3732 | |
3733 | LOG((LF_SYNC, LL_INFO1000, "Resuming execution with RtlRestoreContext\n" )); |
3734 | |
3735 | SetLastError(dwLastError); |
3736 | |
3737 | RtlRestoreContext(pCtx, NULL); |
3738 | } |
3739 | #endif // _TARGET_X86_ |
3740 | } |
3741 | #ifdef _TARGET_X86_ |
3742 | __except (++filter_count == 1 |
3743 | ? RedirectedHandledJITCaseExceptionFilter(GetExceptionInformation(), &frame, fDone, pCtx) |
3744 | : EXCEPTION_CONTINUE_SEARCH) |
3745 | { |
3746 | _ASSERTE(!"Reached body of __except in Thread::RedirectedHandledJITCase" ); |
3747 | } |
3748 | |
3749 | #endif // _TARGET_X86_ |
3750 | |
3751 | END_CONTRACT_VIOLATION; |
3752 | |
3753 | } |
3754 | |
3755 | //**************************************************************************************** |
3756 | // This helper is called when a thread suspended in managed code at a sequence point while |
3757 | // suspending the runtime and there is a client interested in re-assigning the thread to |
3758 | // do interesting work while the runtime is suspended. This will call into the client |
3759 | // notifying it that the thread will be suspended for a runtime suspension. |
3760 | // |
3761 | void __stdcall Thread::RedirectedHandledJITCaseForDbgThreadControl() |
3762 | { |
3763 | WRAPPER_NO_CONTRACT; |
3764 | RedirectedHandledJITCase(RedirectReason_DebugSuspension); |
3765 | } |
3766 | |
3767 | //**************************************************************************************** |
3768 | // This helper is called when a thread suspended in managed code at a sequence point when |
3769 | // suspending the runtime. |
3770 | // |
3771 | // We do this because the obvious code sequence: |
3772 | // |
3773 | // SuspendThread(t1); |
3774 | // GetContext(t1, &ctx); |
3775 | // ctx.Ecx = <some new value>; |
3776 | // SetContext(t1, &ctx); |
3777 | // ResumeThread(t1); |
3778 | // |
3779 | // simply does not work due to a nasty race with exception handling in the OS. If the |
3780 | // thread that is suspended has just faulted, then the update can disappear without ever |
3781 | // modifying the real thread ... and there is no way to tell. |
3782 | // |
3783 | // Updating the EIP may not work ... but when it doesn't, we're ok ... an exception ends |
3784 | // up getting dispatched anyway. |
3785 | // |
3786 | // If the host is interested in getting control, then we give control to the host. If the |
3787 | // host is not interested in getting control, then we call out to the host. After that, |
3788 | // we raise an exception and will end up waiting for the GC to finish inside the filter. |
3789 | // |
3790 | void __stdcall Thread::RedirectedHandledJITCaseForGCThreadControl() |
3791 | { |
3792 | WRAPPER_NO_CONTRACT; |
3793 | RedirectedHandledJITCase(RedirectReason_GCSuspension); |
3794 | } |
3795 | |
3796 | //*********************** |
3797 | // Like the above, but called for a UserSuspend. |
3798 | // |
3799 | void __stdcall Thread::RedirectedHandledJITCaseForUserSuspend() |
3800 | { |
3801 | WRAPPER_NO_CONTRACT; |
3802 | RedirectedHandledJITCase(RedirectReason_UserSuspension); |
3803 | } |
3804 | |
3805 | #if defined(HAVE_GCCOVER) && defined(USE_REDIRECT_FOR_GCSTRESS) // GCCOVER |
3806 | |
3807 | //*********************** |
3808 | // Like the above, but called for GC stress. |
3809 | // |
3810 | void __stdcall Thread::RedirectedHandledJITCaseForGCStress() |
3811 | { |
3812 | WRAPPER_NO_CONTRACT; |
3813 | RedirectedHandledJITCase(RedirectReason_GCStress); |
3814 | } |
3815 | |
3816 | #endif // HAVE_GCCOVER && _DEBUG && USE_REDIRECT_FOR_GCSTRESS |
3817 | |
3818 | //**************************************************************************************** |
3819 | // This will take a thread that's been suspended in managed code at a sequence point and |
3820 | // will Redirect the thread. It will save all register information, build a frame on the |
3821 | // thread's stack, put a pointer to the frame at the top of the stack and set the IP of |
3822 | // the thread to pTgt. pTgt is then responsible for unlinking the thread, |
3823 | // |
3824 | // NOTE: Cannot play with a suspended thread's stack memory, since the OS will use the |
3825 | // top of the stack to store information. The thread must be resumed and play with it's |
3826 | // own stack. |
3827 | // |
3828 | |
3829 | #ifdef _TARGET_X86_ |
3830 | #define CONTEXT_COMPLETE (CONTEXT_FULL | CONTEXT_FLOATING_POINT | \ |
3831 | CONTEXT_DEBUG_REGISTERS | CONTEXT_EXTENDED_REGISTERS | CONTEXT_EXCEPTION_REQUEST) |
3832 | #else |
3833 | #define CONTEXT_COMPLETE (CONTEXT_FULL | CONTEXT_DEBUG_REGISTERS | CONTEXT_EXCEPTION_REQUEST) |
3834 | #endif |
3835 | |
3836 | BOOL Thread::RedirectThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt) |
3837 | { |
3838 | CONTRACTL { |
3839 | NOTHROW; |
3840 | GC_NOTRIGGER; |
3841 | } |
3842 | CONTRACTL_END; |
3843 | |
3844 | _ASSERTE(HandledJITCase()); |
3845 | _ASSERTE(GetThread() != this); |
3846 | |
3847 | //////////////////////////////////////////////////////////////// |
3848 | // Acquire a context structure to save the thread state into |
3849 | |
3850 | // We need to distinguish between two types of callers: |
3851 | // - Most callers, including GC, operate while holding the ThreadStore |
3852 | // lock. This means that we can pre-allocate a context structure |
3853 | // globally in the ThreadStore and use it in this function. |
3854 | // - Some callers (currently only YieldTask) cannot take the ThreadStore |
3855 | // lock. Therefore we always allocate a SavedRedirectContext in the |
3856 | // Thread constructor. (Since YieldTask currently is the only caller |
3857 | // that does not hold the ThreadStore lock, we only do this when |
3858 | // we're hosted.) |
3859 | |
3860 | // Check whether we have a SavedRedirectContext we can reuse: |
3861 | CONTEXT *pCtx = GetSavedRedirectContext(); |
3862 | |
3863 | // If we've never allocated a context for this thread, do so now |
3864 | if (!pCtx) |
3865 | { |
3866 | // If our caller took the ThreadStore lock, then it pre-allocated |
3867 | // a context in the ThreadStore: |
3868 | if (ThreadStore::HoldingThreadStore()) |
3869 | { |
3870 | pCtx = ThreadStore::GrabOSContext(); |
3871 | } |
3872 | |
3873 | if (!pCtx) |
3874 | { |
3875 | // Even when our caller is YieldTask, we can find a NULL |
3876 | // SavedRedirectContext in this function: Consider the scenario |
3877 | // where GC is in progress and has already redirected a thread. |
3878 | // That thread will set its SavedRedirectContext to NULL to enable |
3879 | // reentrancy. Now assume that the host calls YieldTask for the |
3880 | // redirected thread. In this case, this function will simply |
3881 | // fail, but that is fine: The redirected thread will check, |
3882 | // before it resumes execution, whether it should yield. |
3883 | return (FALSE); |
3884 | } |
3885 | |
3886 | // Save the pointer for the redirect function |
3887 | _ASSERTE(GetSavedRedirectContext() == NULL); |
3888 | SetSavedRedirectContext(pCtx); |
3889 | } |
3890 | |
3891 | ////////////////////////////////////// |
3892 | // Get and save the thread's context |
3893 | |
3894 | // Always get complete context |
3895 | pCtx->ContextFlags = CONTEXT_COMPLETE; |
3896 | BOOL bRes = EEGetThreadContext(this, pCtx); |
3897 | _ASSERTE(bRes && "Failed to GetThreadContext in RedirectThreadAtHandledJITCase - aborting redirect." ); |
3898 | |
3899 | if (!bRes) |
3900 | return (FALSE); |
3901 | |
3902 | if (!IsContextSafeToRedirect(pCtx)) |
3903 | return (FALSE); |
3904 | |
3905 | //////////////////////////////////////////////////// |
3906 | // Now redirect the thread to the helper function |
3907 | |
3908 | // Temporarily set the IP of the context to the target for SetThreadContext |
3909 | PCODE dwOrigEip = GetIP(pCtx); |
3910 | #ifdef _TARGET_ARM_ |
3911 | // Redirection can be required when in IT Block. |
3912 | // In that case must reset the IT state before redirection. |
3913 | DWORD dwOrigCpsr = pCtx->Cpsr; |
3914 | ClearITState(pCtx); |
3915 | #endif |
3916 | _ASSERTE(ExecutionManager::IsManagedCode(dwOrigEip)); |
3917 | SetIP(pCtx, (PCODE)pTgt); |
3918 | |
3919 | |
3920 | STRESS_LOG4(LF_SYNC, LL_INFO10000, "Redirecting thread %p(tid=%x) from address 0x%08x to address 0x%p\n" , |
3921 | this, this->GetThreadId(), dwOrigEip, pTgt); |
3922 | |
3923 | bRes = EESetThreadContext(this, pCtx); |
3924 | _ASSERTE(bRes && "Failed to SetThreadContext in RedirectThreadAtHandledJITCase - aborting redirect." ); |
3925 | |
3926 | // Restore original IP |
3927 | SetIP(pCtx, dwOrigEip); |
3928 | #ifdef _TARGET_ARM_ |
3929 | // restore IT State in the context |
3930 | pCtx->Cpsr = dwOrigCpsr; |
3931 | #endif |
3932 | |
3933 | |
3934 | ////////////////////////////////////////////////// |
3935 | // Indicate whether or not the redirect succeeded |
3936 | |
3937 | return (bRes); |
3938 | } |
3939 | |
3940 | BOOL Thread::CheckForAndDoRedirect(PFN_REDIRECTTARGET pRedirectTarget) |
3941 | { |
3942 | CONTRACTL |
3943 | { |
3944 | NOTHROW; |
3945 | GC_NOTRIGGER; |
3946 | MODE_ANY; |
3947 | } |
3948 | CONTRACTL_END; |
3949 | |
3950 | _ASSERTE(this != GetThread()); |
3951 | _ASSERTE(PreemptiveGCDisabledOther()); |
3952 | _ASSERTE(IsAddrOfRedirectFunc(pRedirectTarget)); |
3953 | |
3954 | BOOL fRes = FALSE; |
3955 | fRes = RedirectThreadAtHandledJITCase(pRedirectTarget); |
3956 | LOG((LF_GC, LL_INFO1000, "RedirectThreadAtHandledJITCase %s.\n" , fRes ? "SUCCEEDED" : "FAILED" )); |
3957 | |
3958 | return (fRes); |
3959 | } |
3960 | |
3961 | BOOL Thread::RedirectCurrentThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt, CONTEXT *pCurrentThreadCtx) |
3962 | { |
3963 | CONTRACTL { |
3964 | NOTHROW; |
3965 | GC_NOTRIGGER; |
3966 | } |
3967 | CONTRACTL_END; |
3968 | |
3969 | // REVISIT_TODO need equivalent of this for the current thread |
3970 | //_ASSERTE(HandledJITCase()); |
3971 | |
3972 | _ASSERTE(GetThread() == this); |
3973 | _ASSERTE(PreemptiveGCDisabledOther()); |
3974 | _ASSERTE(IsAddrOfRedirectFunc(pTgt)); |
3975 | _ASSERTE(pCurrentThreadCtx); |
3976 | _ASSERTE((pCurrentThreadCtx->ContextFlags & (CONTEXT_COMPLETE - CONTEXT_EXCEPTION_REQUEST)) |
3977 | == (CONTEXT_COMPLETE - CONTEXT_EXCEPTION_REQUEST)); |
3978 | _ASSERTE(ExecutionManager::IsManagedCode(GetIP(pCurrentThreadCtx))); |
3979 | |
3980 | //////////////////////////////////////////////////////////////// |
3981 | // Allocate a context structure to save the thread state into |
3982 | |
3983 | // Check to see if we've already got memory allocated for this purpose. |
3984 | CONTEXT *pCtx = GetSavedRedirectContext(); |
3985 | |
3986 | // If we've never allocated a context for this thread, do so now |
3987 | if (!pCtx) |
3988 | { |
3989 | pCtx = new (nothrow) CONTEXT(); |
3990 | |
3991 | if (!pCtx) |
3992 | return (FALSE); |
3993 | |
3994 | // Save the pointer for the redirect function |
3995 | _ASSERTE(GetSavedRedirectContext() == NULL); |
3996 | SetSavedRedirectContext(pCtx); |
3997 | } |
3998 | |
3999 | ////////////////////////////////////// |
4000 | // Get and save the thread's context |
4001 | |
4002 | CopyMemory(pCtx, pCurrentThreadCtx, sizeof(CONTEXT)); |
4003 | |
4004 | // Clear any new bits we don't understand (like XSAVE) in case we pass |
4005 | // this context to RtlRestoreContext (like for gcstress) |
4006 | pCtx->ContextFlags &= CONTEXT_ALL; |
4007 | |
4008 | // Ensure that this flag is set for the next time through the normal path, |
4009 | // RedirectThreadAtHandledJITCase. |
4010 | pCtx->ContextFlags |= CONTEXT_EXCEPTION_REQUEST; |
4011 | |
4012 | //////////////////////////////////////////////////// |
4013 | // Now redirect the thread to the helper function |
4014 | |
4015 | SetIP(pCurrentThreadCtx, (PCODE)pTgt); |
4016 | |
4017 | #ifdef _TARGET_ARM_ |
4018 | // Redirection can be required when in IT Block |
4019 | // Clear the IT State before redirecting |
4020 | ClearITState(pCurrentThreadCtx); |
4021 | #endif |
4022 | |
4023 | ////////////////////////////////////////////////// |
4024 | // Indicate whether or not the redirect succeeded |
4025 | |
4026 | return TRUE; |
4027 | } |
4028 | |
4029 | //************************************************************************ |
4030 | // Exception handling needs to special case the redirection. So provide |
4031 | // a helper to identify redirection targets and keep the exception |
4032 | // checks in sync with the redirection here. |
4033 | // See CPFH_AdjustContextForThreadSuspensionRace for details. |
4034 | BOOL Thread::IsAddrOfRedirectFunc(void * pFuncAddr) |
4035 | { |
4036 | WRAPPER_NO_CONTRACT; |
4037 | |
4038 | #if defined(HAVE_GCCOVER) && defined(USE_REDIRECT_FOR_GCSTRESS) // GCCOVER |
4039 | if (pFuncAddr == GetRedirectHandlerForGCStress()) |
4040 | return TRUE; |
4041 | #endif // HAVE_GCCOVER && USE_REDIRECT_FOR_GCSTRESS |
4042 | |
4043 | return |
4044 | (pFuncAddr == GetRedirectHandlerForGCThreadControl()) || |
4045 | (pFuncAddr == GetRedirectHandlerForDbgThreadControl()) || |
4046 | (pFuncAddr == GetRedirectHandlerForUserSuspend()); |
4047 | } |
4048 | |
4049 | //************************************************************************ |
4050 | // Redirect thread at a GC suspension. |
4051 | BOOL Thread::CheckForAndDoRedirectForGC() |
4052 | { |
4053 | CONTRACTL |
4054 | { |
4055 | NOTHROW; |
4056 | GC_NOTRIGGER; |
4057 | MODE_ANY; |
4058 | } |
4059 | CONTRACTL_END; |
4060 | |
4061 | LOG((LF_GC, LL_INFO1000, "Redirecting thread %08x for GCThreadSuspension" , GetThreadId())); |
4062 | return CheckForAndDoRedirect(GetRedirectHandlerForGCThreadControl()); |
4063 | } |
4064 | |
4065 | //************************************************************************ |
4066 | // Redirect thread at a debug suspension. |
4067 | BOOL Thread::CheckForAndDoRedirectForDbg() |
4068 | { |
4069 | CONTRACTL |
4070 | { |
4071 | NOTHROW; |
4072 | GC_NOTRIGGER; |
4073 | MODE_ANY; |
4074 | } |
4075 | CONTRACTL_END; |
4076 | |
4077 | LOG((LF_CORDB, LL_INFO1000, "Redirecting thread %08x for DebugSuspension" , GetThreadId())); |
4078 | return CheckForAndDoRedirect(GetRedirectHandlerForDbgThreadControl()); |
4079 | } |
4080 | |
4081 | //************************************************************************* |
4082 | // Redirect thread at a user suspend. |
4083 | BOOL Thread::CheckForAndDoRedirectForUserSuspend() |
4084 | { |
4085 | CONTRACTL |
4086 | { |
4087 | NOTHROW; |
4088 | GC_NOTRIGGER; |
4089 | MODE_ANY; |
4090 | } |
4091 | CONTRACTL_END; |
4092 | |
4093 | LOG((LF_SYNC, LL_INFO1000, "Redirecting thread %08x for UserSuspension" , GetThreadId())); |
4094 | return CheckForAndDoRedirect(GetRedirectHandlerForUserSuspend()); |
4095 | } |
4096 | |
4097 | #if defined(HAVE_GCCOVER) && defined(USE_REDIRECT_FOR_GCSTRESS) // GCCOVER |
4098 | //************************************************************************* |
4099 | // Redirect thread at a GC stress point. |
4100 | BOOL Thread::CheckForAndDoRedirectForGCStress (CONTEXT *pCurrentThreadCtx) |
4101 | { |
4102 | WRAPPER_NO_CONTRACT; |
4103 | |
4104 | LOG((LF_CORDB, LL_INFO1000, "Redirecting thread %08x for GCStress" , GetThreadId())); |
4105 | |
4106 | m_fPreemptiveGCDisabledForGCStress = !PreemptiveGCDisabled(); |
4107 | GCX_COOP_NO_DTOR(); |
4108 | |
4109 | BOOL fSuccess = RedirectCurrentThreadAtHandledJITCase(GetRedirectHandlerForGCStress(), pCurrentThreadCtx); |
4110 | |
4111 | if (!fSuccess) |
4112 | { |
4113 | GCX_COOP_NO_DTOR_END(); |
4114 | m_fPreemptiveGCDisabledForGCStress = false; |
4115 | } |
4116 | |
4117 | return fSuccess; |
4118 | } |
4119 | #endif // HAVE_GCCOVER && USE_REDIRECT_FOR_GCSTRESS |
4120 | |
4121 | #endif // !PLATFORM_UNIX |
4122 | #endif // FEATURE_HIJACK |
4123 | |
4124 | |
4125 | #ifdef PROFILING_SUPPORTED |
4126 | // Simple helper to convert the GC's SUSPEND_REASON enum to the profiling API's public |
4127 | // COR_PRF_SUSPEND_REASON enum. Used in code:Thread::SuspendRuntime to help with |
4128 | // sending the suspension event to the profiler. |
4129 | COR_PRF_SUSPEND_REASON GCSuspendReasonToProfSuspendReason(ThreadSuspend::SUSPEND_REASON gcReason) |
4130 | { |
4131 | LIMITED_METHOD_CONTRACT; |
4132 | |
4133 | switch(gcReason) |
4134 | { |
4135 | default: |
4136 | return COR_PRF_SUSPEND_OTHER; |
4137 | case ThreadSuspend::SUSPEND_FOR_GC: |
4138 | return COR_PRF_SUSPEND_FOR_GC; |
4139 | case ThreadSuspend::SUSPEND_FOR_APPDOMAIN_SHUTDOWN: |
4140 | return COR_PRF_SUSPEND_FOR_APPDOMAIN_SHUTDOWN; |
4141 | case ThreadSuspend::SUSPEND_FOR_REJIT: |
4142 | return COR_PRF_SUSPEND_FOR_REJIT; |
4143 | case ThreadSuspend::SUSPEND_FOR_SHUTDOWN: |
4144 | return COR_PRF_SUSPEND_FOR_SHUTDOWN; |
4145 | case ThreadSuspend::SUSPEND_FOR_DEBUGGER: |
4146 | return COR_PRF_SUSPEND_FOR_INPROC_DEBUGGER; |
4147 | case ThreadSuspend::SUSPEND_FOR_GC_PREP: |
4148 | return COR_PRF_SUSPEND_FOR_GC_PREP; |
4149 | } |
4150 | } |
4151 | #endif // PROFILING_SUPPORTED |
4152 | |
4153 | //************************************************************************************ |
4154 | // To support fast application switch (FAS), one requirement is that the CPU |
4155 | // consumption during the time the CLR is paused should be 0. Given that the process |
4156 | // will be anyway suspended this should've been an NOP for CLR. However, in Mango |
4157 | // we ensured that no handle timed out or no other such context switch happens |
4158 | // during the pause time. To match that and also to ensure that in-between the |
4159 | // pause and when the process is suspended (~60 sec) no context switch happens due to |
4160 | // CLR handles (like Waits/sleeps due to calls from BCL) we call APC on these |
4161 | // Threads and make them wait on the resume handle |
4162 | void __stdcall PauseAPC(__in ULONG_PTR dwParam) |
4163 | { |
4164 | CONTRACTL |
4165 | { |
4166 | NOTHROW; |
4167 | GC_NOTRIGGER; |
4168 | MODE_ANY; |
4169 | } |
4170 | CONTRACTL_END; |
4171 | |
4172 | if(g_IsPaused && (GetThread()->m_State & Thread::TS_Interruptible)) |
4173 | { |
4174 | _ASSERTE(g_ClrResumeEvent.IsValid()); |
4175 | EX_TRY { |
4176 | g_ClrResumeEvent.Wait(INFINITE, FALSE); |
4177 | } |
4178 | EX_CATCH { |
4179 | // Assert on debug builds |
4180 | _ASSERTE(FALSE); |
4181 | } |
4182 | EX_END_CATCH(SwallowAllExceptions); |
4183 | } |
4184 | } |
4185 | |
4186 | |
4187 | //************************************************************************************ |
4188 | // |
4189 | // SuspendRuntime is responsible for ensuring that all managed threads reach a |
4190 | // "safe point." It returns when all threads are known to be in "preemptive" mode. |
4191 | // This is *only* called by ThreadSuspend::SuspendEE; these two methods should really |
4192 | // be refactored into a separate "managed execution lock." |
4193 | // |
4194 | // Note that we use this method for more than just GC suspension. We also suspend |
4195 | // for debugging, etc. |
4196 | // |
4197 | // The basic algorithm is this: |
4198 | // |
4199 | // while there are threads in cooperative mode: |
4200 | // for each thread in cooprative mode: |
4201 | // suspend the native thread. |
4202 | // if it's still in cooperative mode, and it's running JIT'd code: |
4203 | // Redirect/hijack the thread |
4204 | // |
4205 | // Redirection vs. Hijacking: |
4206 | // |
4207 | // JIT'd code does not generally poll to see if a GC wants to run. Instead, the JIT |
4208 | // records "GC info" describing where the "safe points" are in the code. While we |
4209 | // have a native thread suspended in JIT'd code, we can see which instruction it |
4210 | // is currently executing. If that instruction is a safe point, then the GC may proceed. |
4211 | // Returning from a managed method is *always* a safe point, so if the thread is not |
4212 | // currently at a safe point we can "hijack" its return address. Once that it done, |
4213 | // if/when the method tried to return the thread will be sent to a hijack routine |
4214 | // that will leave cooperative mode and wait for the GC to complete. |
4215 | // |
4216 | // If the thread is already at a safe point, you might think we could simply leave it |
4217 | // suspended and proceed with the GC. In principle, this should be what we do. |
4218 | // However, various historical OS bugs prevent this from working. The problem is that |
4219 | // we are not guaranteed to capture an accurate CONTEXT (register state) for a suspended |
4220 | // thread. So instead, we "redirect" the thread, by overwriting its instruction pointer. |
4221 | // We then resume the thread, and it immediately starts executing our "redirect" routine, |
4222 | // which leaves cooperative mode and waits for the GC to complete. |
4223 | // |
4224 | // See code:Thread#SuspendingTheRuntime for more |
4225 | HRESULT ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) |
4226 | { |
4227 | CONTRACTL { |
4228 | NOTHROW; |
4229 | if (GetThread()) |
4230 | { |
4231 | GC_TRIGGERS; // CLREvent::Wait is GC_TRIGGERS |
4232 | } |
4233 | else |
4234 | { |
4235 | DISABLED(GC_TRIGGERS); |
4236 | } |
4237 | } |
4238 | CONTRACTL_END; |
4239 | |
4240 | // This thread |
4241 | Thread *pCurThread = GetThread(); |
4242 | |
4243 | // The thread we're working on (suspending, etc.) right now. |
4244 | Thread *thread = NULL; |
4245 | |
4246 | // The number of threads we found in COOP mode. |
4247 | LONG countThreads = 0; |
4248 | |
4249 | DWORD res; |
4250 | |
4251 | // Caller is expected to be holding the ThreadStore lock. Also, caller must |
4252 | // have set GcInProgress before coming here, or things will break; |
4253 | _ASSERTE(ThreadStore::HoldingThreadStore() || IsAtProcessExit()); |
4254 | _ASSERTE(GCHeapUtilities::IsGCInProgress() ); |
4255 | |
4256 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime(reason=0x%x)\n" , reason); |
4257 | |
4258 | |
4259 | #ifdef PROFILING_SUPPORTED |
4260 | // If the profiler desires information about GCs, then let it know that one |
4261 | // is starting. |
4262 | { |
4263 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
4264 | _ASSERTE(reason != ThreadSuspend::SUSPEND_FOR_DEBUGGER); |
4265 | _ASSERTE(reason != ThreadSuspend::SUSPEND_FOR_DEBUGGER_SWEEP); |
4266 | |
4267 | { |
4268 | g_profControlBlock.pProfInterface->RuntimeSuspendStarted( |
4269 | GCSuspendReasonToProfSuspendReason(reason)); |
4270 | } |
4271 | if (pCurThread) |
4272 | { |
4273 | // Notify the profiler that the thread that is actually doing the GC is 'suspended', |
4274 | // meaning that it is doing stuff other than run the managed code it was before the |
4275 | // GC started. |
4276 | g_profControlBlock.pProfInterface->RuntimeThreadSuspended((ThreadID)pCurThread); |
4277 | } |
4278 | END_PIN_PROFILER(); |
4279 | } |
4280 | #endif // PROFILING_SUPPORTED |
4281 | |
4282 | // |
4283 | // If this thread is running at low priority, boost its priority. We remember the old |
4284 | // priority so that we can restore it in ResumeRuntime. |
4285 | // |
4286 | if (pCurThread) // concurrent GC occurs on threads we don't know about |
4287 | { |
4288 | _ASSERTE(pCurThread->m_Priority == INVALID_THREAD_PRIORITY); |
4289 | int priority = pCurThread->GetThreadPriority(); |
4290 | if (priority < THREAD_PRIORITY_NORMAL) |
4291 | { |
4292 | pCurThread->m_Priority = priority; |
4293 | pCurThread->SetThreadPriority(THREAD_PRIORITY_NORMAL); |
4294 | } |
4295 | } |
4296 | |
4297 | // From this point until the end of the function, consider all active thread |
4298 | // suspension to be in progress. This is mainly to give the profiler API a hint |
4299 | // that trying to suspend a thread (in order to walk its stack) could delay the |
4300 | // overall EE suspension. So the profiler API would early-abort the stackwalk |
4301 | // in such a case. |
4302 | SuspendRuntimeInProgressHolder hldSuspendRuntimeInProgress; |
4303 | |
4304 | |
4305 | // Flush the store buffers on all CPUs, to ensure two things: |
4306 | // - we get a reliable reading of the threads' m_fPreemptiveGCDisabled state |
4307 | // - other threads see that g_TrapReturningThreads is set |
4308 | // See VSW 475315 and 488918 for details. |
4309 | |
4310 | ::FlushProcessWriteBuffers(); |
4311 | |
4312 | // |
4313 | // Make a pass through all threads. We do a couple of things here: |
4314 | // 1) we count the number of threads that are observed to be in cooperative mode. |
4315 | // 2) for threads currently running managed code, we try to redirect/jihack them. |
4316 | // |
4317 | // Later we will make more passes where we do roughly the same thing. We should combine the two loops. |
4318 | // |
4319 | |
4320 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
4321 | { |
4322 | if (thread->HasThreadState(Thread::TS_GCSuspendPending)) |
4323 | { |
4324 | thread->ResetThreadState(Thread::TS_GCSuspendPending); |
4325 | } |
4326 | |
4327 | if (thread == pCurThread) |
4328 | continue; |
4329 | |
4330 | STRESS_LOG3(LF_SYNC, LL_INFO10000, " Inspecting thread 0x%x ID 0x%x coop mode = %d\n" , |
4331 | thread, thread->GetThreadId(), thread->m_fPreemptiveGCDisabled.Load()); |
4332 | |
4333 | // Nothing confusing left over from last time. |
4334 | _ASSERTE((thread->m_State & Thread::TS_GCSuspendPending) == 0); |
4335 | |
4336 | // Threads can be in Preemptive or Cooperative GC mode. Threads cannot switch |
4337 | // to Cooperative mode without special treatment when a GC is happening. |
4338 | if (thread->m_fPreemptiveGCDisabled) |
4339 | { |
4340 | // Check a little more carefully. Threads might sneak out without telling |
4341 | // us, because of inlined PInvoke which doesn't go through RareEnablePreemptiveGC. |
4342 | |
4343 | #ifdef DISABLE_THREADSUSPEND |
4344 | // On platforms that do not support safe thread suspension, we do one of two things: |
4345 | // |
4346 | // - If we're on a Unix platform where hijacking is enabled, we attempt |
4347 | // to inject a GC suspension which will try to redirect or hijack the |
4348 | // thread to get it to a safe point. |
4349 | // |
4350 | // - Otherwise, we rely on the GCPOLL mechanism enabled by |
4351 | // TrapReturningThreads. |
4352 | |
4353 | // When reading shared state we need to erect appropriate memory barriers. |
4354 | // The interlocked operation below ensures that any future reads on this |
4355 | // thread will happen after any earlier writes on a different thread. |
4356 | // |
4357 | // <TODO> Need more careful review of this </TODO> |
4358 | // |
4359 | FastInterlockOr(&thread->m_fPreemptiveGCDisabled, 0); |
4360 | |
4361 | if (thread->m_fPreemptiveGCDisabled) |
4362 | { |
4363 | FastInterlockOr((ULONG *) &thread->m_State, Thread::TS_GCSuspendPending); |
4364 | countThreads++; |
4365 | |
4366 | #if defined(FEATURE_HIJACK) && defined(PLATFORM_UNIX) |
4367 | bool gcSuspensionSignalSuccess = thread->InjectGcSuspension(); |
4368 | if (!gcSuspensionSignalSuccess) |
4369 | { |
4370 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Failed to raise GC suspension signal for thread %p.\n" , thread); |
4371 | } |
4372 | #endif // FEATURE_HIJACK && PLATFORM_UNIX |
4373 | } |
4374 | |
4375 | #else // DISABLE_THREADSUSPEND |
4376 | |
4377 | #if defined(FEATURE_HIJACK) && !defined(PLATFORM_UNIX) |
4378 | DWORD dwSwitchCount = 0; |
4379 | RetrySuspension: |
4380 | #endif |
4381 | |
4382 | // We can not allocate memory after we suspend a thread. |
4383 | // Otherwise, we may deadlock the process, because the thread we just suspended |
4384 | // might hold locks we would need to acquire while allocating. |
4385 | ThreadStore::AllocateOSContext(); |
4386 | |
4387 | #ifdef TIME_SUSPEND |
4388 | DWORD startSuspend = g_SuspendStatistics.GetTime(); |
4389 | #endif |
4390 | |
4391 | // |
4392 | // Suspend the native thread. |
4393 | // |
4394 | Thread::SuspendThreadResult str = thread->SuspendThread(); |
4395 | |
4396 | // We should just always build with this TIME_SUSPEND stuff, and report the results via ETW. |
4397 | #ifdef TIME_SUSPEND |
4398 | g_SuspendStatistics.osSuspend.Accumulate( |
4399 | SuspendStatistics::GetElapsed(startSuspend, |
4400 | g_SuspendStatistics.GetTime())); |
4401 | |
4402 | if (str == Thread::STR_Success) |
4403 | g_SuspendStatistics.cntOSSuspendResume++; |
4404 | else |
4405 | g_SuspendStatistics.cntFailedSuspends++; |
4406 | #endif |
4407 | |
4408 | if (str == Thread::STR_NoStressLog) |
4409 | { |
4410 | STRESS_LOG2(LF_SYNC, LL_ERROR, " ERROR: Could not suspend thread 0x%x, result = %d\n" , thread, str); |
4411 | } |
4412 | else |
4413 | if (thread->m_fPreemptiveGCDisabled) |
4414 | { |
4415 | // We now know for sure that the thread is still in cooperative mode. If it's in JIT'd code, here |
4416 | // is where we try to hijack/redirect the thread. If it's in VM code, we have to just let the VM |
4417 | // finish what it's doing. |
4418 | |
4419 | #if defined(FEATURE_HIJACK) && !defined(PLATFORM_UNIX) |
4420 | // Only check for HandledJITCase if we actually suspended the thread. |
4421 | if (str == Thread::STR_Success) |
4422 | { |
4423 | Thread::WorkingOnThreadContextHolder workingOnThreadContext(thread); |
4424 | |
4425 | // |
4426 | // Note that thread->HandledJITCase is not a simple predicate - it actually will hijack the thread if that's possible. |
4427 | // So HandledJITCase can do one of these: |
4428 | // |
4429 | // - Return TRUE, in which case it's our responsibility to redirect the thread |
4430 | // - Return FALSE after hijacking the thread - we shouldn't try to redirect |
4431 | // - Return FALSE but not hijack the thread - there's nothing we can do either |
4432 | // |
4433 | // Here is another great opportunity for refactoring :) |
4434 | // |
4435 | if (workingOnThreadContext.Acquired() && thread->HandledJITCase()) |
4436 | { |
4437 | // Redirect thread so we can capture a good thread context |
4438 | // (GetThreadContext is not sufficient, due to an OS bug). |
4439 | if (!thread->CheckForAndDoRedirectForGC()) |
4440 | { |
4441 | #ifdef TIME_SUSPEND |
4442 | g_SuspendStatistics.cntFailedRedirections++; |
4443 | #endif |
4444 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "Failed to CheckForAndDoRedirectForGC(). Retry suspension for thread %p\n" , thread); |
4445 | thread->ResumeThread(); |
4446 | __SwitchToThread(0, ++dwSwitchCount); |
4447 | goto RetrySuspension; |
4448 | } |
4449 | #ifdef TIME_SUSPEND |
4450 | else |
4451 | g_SuspendStatistics.cntRedirections++; |
4452 | #endif |
4453 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Thread %p redirected().\n" , thread); |
4454 | } |
4455 | } |
4456 | #endif // FEATURE_HIJACK && !PLATFORM_UNIX |
4457 | |
4458 | FastInterlockOr((ULONG *) &thread->m_State, Thread::TS_GCSuspendPending); |
4459 | |
4460 | countThreads++; |
4461 | |
4462 | // Only resume if we actually suspended the thread above. |
4463 | if (str == Thread::STR_Success) |
4464 | thread->ResumeThread(); |
4465 | |
4466 | STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread 0x%x is in cooperative needs to rendezvous\n" , thread); |
4467 | } |
4468 | else |
4469 | if (str == Thread::STR_Success) |
4470 | { |
4471 | STRESS_LOG1(LF_SYNC, LL_WARNING, " Inspecting thread 0x%x was in cooperative, but now is not\n" , thread); |
4472 | // Oops. |
4473 | thread->ResumeThread(); |
4474 | } |
4475 | else |
4476 | if (str == Thread::STR_SwitchedOut) { |
4477 | STRESS_LOG1(LF_SYNC, LL_WARNING, " Inspecting thread 0x%x was in cooperative, but now is switched out\n" , thread); |
4478 | } |
4479 | else { |
4480 | _ASSERTE(str == Thread::STR_Failure || str == Thread::STR_UnstartedOrDead); |
4481 | STRESS_LOG3(LF_SYNC, LL_ERROR, " ERROR: Could not suspend thread 0x%x, result = %d, lastError = 0x%x\n" , thread, str, GetLastError()); |
4482 | } |
4483 | |
4484 | #endif // DISABLE_THREADSUSPEND |
4485 | |
4486 | } |
4487 | else |
4488 | { |
4489 | // To ensure 0 CPU utilization for FAS (see implementation of PauseAPC) |
4490 | // we queue the APC to all interruptable threads. |
4491 | if(g_IsPaused && (thread->m_State & Thread::TS_Interruptible)) |
4492 | { |
4493 | HANDLE handle = thread->GetThreadHandle(); |
4494 | QueueUserAPC((PAPCFUNC)PauseAPC, handle, APC_Code); |
4495 | } |
4496 | } |
4497 | } |
4498 | |
4499 | #ifdef _DEBUG |
4500 | |
4501 | { |
4502 | int countCheck = 0; |
4503 | Thread *InnerThread = NULL; |
4504 | |
4505 | while ((InnerThread = ThreadStore::GetThreadList(InnerThread)) != NULL) |
4506 | { |
4507 | if (InnerThread != pCurThread && |
4508 | (InnerThread->m_State & Thread::TS_GCSuspendPending) != 0) |
4509 | { |
4510 | countCheck++; |
4511 | } |
4512 | } |
4513 | _ASSERTE(countCheck == countThreads); |
4514 | } |
4515 | |
4516 | #endif |
4517 | |
4518 | // |
4519 | // Now we keep retrying until we find that no threads are in cooperative mode. This should be merged into |
4520 | // the first loop. |
4521 | // |
4522 | while (countThreads) |
4523 | { |
4524 | _ASSERTE (thread == NULL); |
4525 | STRESS_LOG1(LF_SYNC, LL_INFO1000, " A total of %d threads need to rendezvous\n" , countThreads); |
4526 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
4527 | { |
4528 | if (thread == pCurThread) |
4529 | continue; |
4530 | |
4531 | if (thread->HasThreadState(Thread::TS_BlockGCForSO)) |
4532 | { |
4533 | // The thread is trying to block for GC. But we don't have enough stack to do |
4534 | // this operation. |
4535 | // We will let the thread switch back to cooperative mode, and continue running. |
4536 | if (thread->m_fPreemptiveGCDisabled.Load() == 0) |
4537 | { |
4538 | if (!thread->HasThreadState(Thread::TS_GCSuspendPending)) |
4539 | { |
4540 | thread->SetThreadState(Thread::TS_GCSuspendPending); |
4541 | countThreads ++; |
4542 | } |
4543 | thread->ResetThreadState(Thread::TS_BlockGCForSO); |
4544 | FastInterlockOr (&thread->m_fPreemptiveGCDisabled, 1); |
4545 | } |
4546 | continue; |
4547 | } |
4548 | if ((thread->m_State & Thread::TS_GCSuspendPending) == 0) |
4549 | continue; |
4550 | |
4551 | if (!thread->m_fPreemptiveGCDisabled) |
4552 | { |
4553 | // Inlined N/Direct can sneak out to preemptive without actually checking. |
4554 | // If we find one, we can consider it suspended (since it can't get back in). |
4555 | STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread %x went preemptive it is at a GC safe point\n" , thread); |
4556 | countThreads--; |
4557 | thread->ResetThreadState(Thread::TS_GCSuspendPending); |
4558 | |
4559 | // To ensure 0 CPU utilization for FAS (see implementation of PauseAPC) |
4560 | // we queue the APC to all interruptable threads. |
4561 | if(g_IsPaused && (thread->m_State & Thread::TS_Interruptible)) |
4562 | { |
4563 | HANDLE handle = thread->GetThreadHandle(); |
4564 | QueueUserAPC((PAPCFUNC)PauseAPC, handle, APC_Code); |
4565 | } |
4566 | } |
4567 | } |
4568 | |
4569 | if (countThreads == 0) |
4570 | { |
4571 | break; |
4572 | } |
4573 | |
4574 | #ifdef _DEBUG |
4575 | DWORD dbgStartTimeout = GetTickCount(); |
4576 | #endif |
4577 | |
4578 | // If another thread is trying to do a GC, there is a chance of deadlock |
4579 | // because this thread holds the threadstore lock and the GC thread is stuck |
4580 | // trying to get it, so this thread must bail and do a retry after the GC completes. |
4581 | // |
4582 | // <REVISIT> Shouldn't we do this only if *this* thread isn't attempting a GC? We're mostly |
4583 | // done suspending the EE at this point - why give up just because another thread wants |
4584 | // to do exactly the same thing? Note that GetGCThreadAttemptingSuspend will never (AFAIK) |
4585 | // return the current thread here, because we NULL it out after obtaining the thread store lock. </REVISIT> |
4586 | // |
4587 | if (m_pThreadAttemptingSuspendForGC != NULL && m_pThreadAttemptingSuspendForGC != pCurThread) |
4588 | { |
4589 | #ifdef PROFILING_SUPPORTED |
4590 | // Must let the profiler know that this thread is aborting its attempt at suspending |
4591 | { |
4592 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
4593 | g_profControlBlock.pProfInterface->RuntimeSuspendAborted(); |
4594 | END_PIN_PROFILER(); |
4595 | } |
4596 | #endif // PROFILING_SUPPORTED |
4597 | |
4598 | STRESS_LOG0(LF_SYNC, LL_ALWAYS, "Thread::SuspendRuntime() - Timing out.\n" ); |
4599 | return (ERROR_TIMEOUT); |
4600 | } |
4601 | |
4602 | #ifdef TIME_SUSPEND |
4603 | DWORD startWait = g_SuspendStatistics.GetTime(); |
4604 | #endif |
4605 | |
4606 | // |
4607 | // Wait for at least one thread to tell us it's left cooperative mode. |
4608 | // we do this by waiting on g_pGCSuspendEvent. We cannot simply wait forever, because we |
4609 | // might have done return-address hijacking on a thread, and that thread might not |
4610 | // return from the method we hijacked (maybe it calls into some other managed code that |
4611 | // executes a long loop, for example). We we wait with a timeout, and retry hijacking/redirection. |
4612 | // |
4613 | // This is unfortunate, because it means that in some cases we wait for PING_JIT_TIMEOUT |
4614 | // milliseconds, causing long GC pause times. |
4615 | // |
4616 | // We should fix this, by calling SwitchToThread/Sleep(0) a few times before waiting on the event. |
4617 | // This will not fix it 100% of the time (we may still have to wait on the event), but |
4618 | // the event is needed to work around limitations of SwitchToThread/Sleep(0). |
4619 | // |
4620 | // For now, we simply wait. |
4621 | // |
4622 | |
4623 | res = g_pGCSuspendEvent->Wait(PING_JIT_TIMEOUT, FALSE); |
4624 | |
4625 | |
4626 | #ifdef TIME_SUSPEND |
4627 | g_SuspendStatistics.wait.Accumulate( |
4628 | SuspendStatistics::GetElapsed(startWait, |
4629 | g_SuspendStatistics.GetTime())); |
4630 | |
4631 | g_SuspendStatistics.cntWaits++; |
4632 | if (res == WAIT_TIMEOUT) |
4633 | g_SuspendStatistics.cntWaitTimeouts++; |
4634 | #endif |
4635 | |
4636 | if (res == WAIT_TIMEOUT || res == WAIT_IO_COMPLETION) |
4637 | { |
4638 | STRESS_LOG1(LF_SYNC, LL_INFO1000, " Timed out waiting for rendezvous event %d threads remaining\n" , countThreads); |
4639 | #ifdef _DEBUG |
4640 | DWORD dbgEndTimeout = GetTickCount(); |
4641 | |
4642 | if ((dbgEndTimeout > dbgStartTimeout) && |
4643 | (dbgEndTimeout - dbgStartTimeout > g_pConfig->SuspendDeadlockTimeout())) |
4644 | { |
4645 | // Do not change this to _ASSERTE. |
4646 | // We want to catch the state of the machine at the |
4647 | // time when we can not suspend some threads. |
4648 | // It takes too long for _ASSERTE to stop the process. |
4649 | DebugBreak(); |
4650 | _ASSERTE(!"Timed out trying to suspend EE due to thread" ); |
4651 | char message[256]; |
4652 | _ASSERTE (thread == NULL); |
4653 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
4654 | { |
4655 | if (thread == pCurThread) |
4656 | continue; |
4657 | |
4658 | if ((thread->m_State & Thread::TS_GCSuspendPending) == 0) |
4659 | continue; |
4660 | |
4661 | if (thread->m_fPreemptiveGCDisabled) |
4662 | { |
4663 | DWORD id = thread->m_OSThreadId; |
4664 | if (id == 0xbaadf00d) |
4665 | { |
4666 | sprintf_s (message, COUNTOF(message), "Thread CLR ID=%x cannot be suspended" , |
4667 | thread->GetThreadId()); |
4668 | } |
4669 | else |
4670 | { |
4671 | sprintf_s (message, COUNTOF(message), "Thread OS ID=%x cannot be suspended" , |
4672 | id); |
4673 | } |
4674 | DbgAssertDialog(__FILE__, __LINE__, message); |
4675 | } |
4676 | } |
4677 | // if we continue from the assert we'll reset the time |
4678 | dbgStartTimeout = GetTickCount(); |
4679 | } |
4680 | #endif |
4681 | |
4682 | #if defined(FEATURE_HIJACK) && defined(PLATFORM_UNIX) |
4683 | _ASSERTE (thread == NULL); |
4684 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
4685 | { |
4686 | if (thread == pCurThread) |
4687 | continue; |
4688 | |
4689 | if ((thread->m_State & Thread::TS_GCSuspendPending) == 0) |
4690 | continue; |
4691 | |
4692 | if (!thread->m_fPreemptiveGCDisabled) |
4693 | continue; |
4694 | |
4695 | // When we tried to inject the suspension before, we may have been in a place |
4696 | // where it wasn't possible. Try one more time. |
4697 | bool gcSuspensionSignalSuccess = thread->InjectGcSuspension(); |
4698 | if (!gcSuspensionSignalSuccess) |
4699 | { |
4700 | // If we failed to raise the signal for some reason, just log it and move on. |
4701 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Failed to raise GC suspension signal for thread %p.\n" , thread); |
4702 | } |
4703 | } |
4704 | #endif |
4705 | |
4706 | #ifndef DISABLE_THREADSUSPEND |
4707 | // all these threads should be in cooperative mode unless they have |
4708 | // set their SafeEvent on the way out. But there's a race between |
4709 | // when we time out and when they toggle their mode, so sometimes |
4710 | // we will suspend a thread that has just left. |
4711 | _ASSERTE (thread == NULL); |
4712 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
4713 | { |
4714 | if (thread == pCurThread) |
4715 | continue; |
4716 | |
4717 | if ((thread->m_State & Thread::TS_GCSuspendPending) == 0) |
4718 | continue; |
4719 | |
4720 | if (!thread->m_fPreemptiveGCDisabled) |
4721 | continue; |
4722 | |
4723 | #if defined(FEATURE_HIJACK) && !defined(PLATFORM_UNIX) |
4724 | RetrySuspension2: |
4725 | #endif |
4726 | // We can not allocate memory after we suspend a thread. |
4727 | // Otherwise, we may deadlock the process when CLR is hosted. |
4728 | ThreadStore::AllocateOSContext(); |
4729 | |
4730 | #ifdef TIME_SUSPEND |
4731 | DWORD startSuspend = g_SuspendStatistics.GetTime(); |
4732 | #endif |
4733 | |
4734 | Thread::SuspendThreadResult str = thread->SuspendThread(); |
4735 | |
4736 | #ifdef TIME_SUSPEND |
4737 | g_SuspendStatistics.osSuspend.Accumulate( |
4738 | SuspendStatistics::GetElapsed(startSuspend, |
4739 | g_SuspendStatistics.GetTime())); |
4740 | |
4741 | if (str == Thread::STR_Success) |
4742 | g_SuspendStatistics.cntOSSuspendResume++; |
4743 | else |
4744 | g_SuspendStatistics.cntFailedSuspends++; |
4745 | #endif |
4746 | |
4747 | #if defined(FEATURE_HIJACK) && !defined(PLATFORM_UNIX) |
4748 | // Only check HandledJITCase if we actually suspended the thread, and |
4749 | // the thread is in cooperative mode. |
4750 | // See comment at the previous invocation of HandledJITCase - it does |
4751 | // more than you think! |
4752 | if (str == Thread::STR_Success && thread->m_fPreemptiveGCDisabled) |
4753 | { |
4754 | Thread::WorkingOnThreadContextHolder workingOnThreadContext(thread); |
4755 | if (workingOnThreadContext.Acquired() && thread->HandledJITCase()) |
4756 | { |
4757 | // Redirect thread so we can capture a good thread context |
4758 | // (GetThreadContext is not sufficient, due to an OS bug). |
4759 | if (!thread->CheckForAndDoRedirectForGC()) |
4760 | { |
4761 | #ifdef TIME_SUSPEND |
4762 | g_SuspendStatistics.cntFailedRedirections++; |
4763 | #endif |
4764 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "Failed to CheckForAndDoRedirectForGC(). Retry suspension 2 for thread %p\n" , thread); |
4765 | thread->ResumeThread(); |
4766 | goto RetrySuspension2; |
4767 | } |
4768 | #ifdef TIME_SUSPEND |
4769 | else |
4770 | g_SuspendStatistics.cntRedirections++; |
4771 | #endif |
4772 | } |
4773 | } |
4774 | #endif // FEATURE_HIJACK && !PLATFORM_UNIX |
4775 | |
4776 | if (str == Thread::STR_Success) |
4777 | thread->ResumeThread(); |
4778 | } |
4779 | #endif // DISABLE_THREADSUSPEND |
4780 | } |
4781 | else |
4782 | if (res == WAIT_OBJECT_0) |
4783 | { |
4784 | g_pGCSuspendEvent->Reset(); |
4785 | continue; |
4786 | } |
4787 | else |
4788 | { |
4789 | // No WAIT_FAILED, WAIT_ABANDONED, etc. |
4790 | _ASSERTE(!"unexpected wait termination during gc suspension" ); |
4791 | } |
4792 | } |
4793 | |
4794 | #ifdef PROFILING_SUPPORTED |
4795 | // If a profiler is keeping track of GC events, notify it |
4796 | { |
4797 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
4798 | g_profControlBlock.pProfInterface->RuntimeSuspendFinished(); |
4799 | END_PIN_PROFILER(); |
4800 | } |
4801 | #endif // PROFILING_SUPPORTED |
4802 | |
4803 | #ifdef _DEBUG |
4804 | if (reason == ThreadSuspend::SUSPEND_FOR_GC) { |
4805 | thread = NULL; |
4806 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
4807 | { |
4808 | thread->DisableStressHeap(); |
4809 | _ASSERTE (!thread->HasThreadState(Thread::TS_GCSuspendPending)); |
4810 | } |
4811 | } |
4812 | #endif |
4813 | |
4814 | // We know all threads are in preemptive mode, so go ahead and reset the event. |
4815 | g_pGCSuspendEvent->Reset(); |
4816 | |
4817 | #ifdef HAVE_GCCOVER |
4818 | // |
4819 | // Now that the EE has been suspended, let's see if any oustanding |
4820 | // gcstress instruction updates need to occur. Each thread can |
4821 | // have only one pending at a time. |
4822 | // |
4823 | thread = NULL; |
4824 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
4825 | { |
4826 | thread->CommitGCStressInstructionUpdate(); |
4827 | } |
4828 | #endif // HAVE_GCCOVER |
4829 | |
4830 | STRESS_LOG0(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Success\n" ); |
4831 | return S_OK; |
4832 | } |
4833 | |
4834 | #ifdef HAVE_GCCOVER |
4835 | |
4836 | void Thread::CommitGCStressInstructionUpdate() |
4837 | { |
4838 | CONTRACTL |
4839 | { |
4840 | NOTHROW; |
4841 | GC_NOTRIGGER; |
4842 | MODE_ANY; |
4843 | } |
4844 | CONTRACTL_END; |
4845 | |
4846 | BYTE* pbDestCode = NULL; |
4847 | BYTE* pbSrcCode = NULL; |
4848 | |
4849 | if (TryClearGCStressInstructionUpdate(&pbDestCode, &pbSrcCode)) |
4850 | { |
4851 | assert(pbDestCode != NULL); |
4852 | assert(pbSrcCode != NULL); |
4853 | |
4854 | #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) |
4855 | |
4856 | *pbDestCode = *pbSrcCode; |
4857 | |
4858 | #elif defined(_TARGET_ARM_) |
4859 | |
4860 | if (GetARMInstructionLength(pbDestCode) == 2) |
4861 | *(WORD*)pbDestCode = *(WORD*)pbSrcCode; |
4862 | else |
4863 | *(DWORD*)pbDestCode = *(DWORD*)pbSrcCode; |
4864 | |
4865 | #elif defined(_TARGET_ARM64_) |
4866 | |
4867 | *(DWORD*)pbDestCode = *(DWORD*)pbSrcCode; |
4868 | |
4869 | #else |
4870 | |
4871 | *pbDestCode = *pbSrcCode; |
4872 | |
4873 | #endif |
4874 | |
4875 | FlushInstructionCache(GetCurrentProcess(), (LPCVOID)pbDestCode, 4); |
4876 | } |
4877 | } |
4878 | |
4879 | #endif // HAVE_GCCOVER |
4880 | |
4881 | |
4882 | #ifdef _DEBUG |
4883 | void EnableStressHeapHelper() |
4884 | { |
4885 | WRAPPER_NO_CONTRACT; |
4886 | ENABLESTRESSHEAP(); |
4887 | } |
4888 | #endif |
4889 | |
4890 | // We're done with our GC. Let all the threads run again. |
4891 | // By this point we've already unblocked most threads. This just releases the ThreadStore lock. |
4892 | void ThreadSuspend::ResumeRuntime(BOOL bFinishedGC, BOOL SuspendSucceded) |
4893 | { |
4894 | CONTRACTL { |
4895 | NOTHROW; |
4896 | if (GetThread()) {GC_TRIGGERS;} else {DISABLED(GC_NOTRIGGER);} |
4897 | } |
4898 | CONTRACTL_END; |
4899 | |
4900 | Thread *pCurThread = GetThread(); |
4901 | |
4902 | // Caller is expected to be holding the ThreadStore lock. But they must have |
4903 | // reset GcInProgress, or threads will continue to suspend themselves and won't |
4904 | // be resumed until the next GC. |
4905 | _ASSERTE(IsGCSpecialThread() || ThreadStore::HoldingThreadStore()); |
4906 | _ASSERTE(!GCHeapUtilities::IsGCInProgress() ); |
4907 | |
4908 | STRESS_LOG2(LF_SYNC, LL_INFO1000, "Thread::ResumeRuntime(finishedGC=%d, SuspendSucceeded=%d) - Start\n" , bFinishedGC, SuspendSucceded); |
4909 | |
4910 | // |
4911 | // Notify everyone who cares, that this suspension is over, and this thread is going to go do other things. |
4912 | // |
4913 | |
4914 | |
4915 | #ifdef PROFILING_SUPPORTED |
4916 | // Need to give resume event for the GC thread |
4917 | { |
4918 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
4919 | if (pCurThread) |
4920 | { |
4921 | g_profControlBlock.pProfInterface->RuntimeThreadResumed((ThreadID)pCurThread); |
4922 | } |
4923 | END_PIN_PROFILER(); |
4924 | } |
4925 | #endif // PROFILING_SUPPORTED |
4926 | |
4927 | #ifdef TIME_SUSPEND |
4928 | DWORD startRelease = g_SuspendStatistics.GetTime(); |
4929 | #endif |
4930 | |
4931 | // |
4932 | // Unlock the thread store. At this point, all threads should be allowed to run. |
4933 | // |
4934 | ThreadSuspend::UnlockThreadStore(); |
4935 | |
4936 | #ifdef TIME_SUSPEND |
4937 | g_SuspendStatistics.releaseTSL.Accumulate(SuspendStatistics::GetElapsed(startRelease, |
4938 | g_SuspendStatistics.GetTime())); |
4939 | #endif |
4940 | |
4941 | #ifdef PROFILING_SUPPORTED |
4942 | // |
4943 | // This thread is logically "resuming" from a GC now. Tell the profiler. |
4944 | // |
4945 | { |
4946 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
4947 | GCX_PREEMP(); |
4948 | g_profControlBlock.pProfInterface->RuntimeResumeFinished(); |
4949 | END_PIN_PROFILER(); |
4950 | } |
4951 | #endif // PROFILING_SUPPORTED |
4952 | |
4953 | // |
4954 | // If we raised this thread's priority in SuspendRuntime, we restore it here. |
4955 | // |
4956 | if (pCurThread) |
4957 | { |
4958 | if (pCurThread->m_Priority != INVALID_THREAD_PRIORITY) |
4959 | { |
4960 | pCurThread->SetThreadPriority(pCurThread->m_Priority); |
4961 | pCurThread->m_Priority = INVALID_THREAD_PRIORITY; |
4962 | } |
4963 | } |
4964 | |
4965 | STRESS_LOG0(LF_SYNC, LL_INFO1000, "Thread::ResumeRuntime() - End\n" ); |
4966 | } |
4967 | |
4968 | #ifndef FEATURE_PAL |
4969 | #ifdef _TARGET_X86_ |
4970 | //**************************************************************************************** |
4971 | // This will resume the thread at the location of redirection. |
4972 | // |
4973 | int RedirectedThrowControlExceptionFilter( |
4974 | PEXCEPTION_POINTERS pExcepPtrs // Exception data |
4975 | ) |
4976 | { |
4977 | // !!! Do not use a non-static contract here. |
4978 | // !!! Contract may insert an exception handling record. |
4979 | // !!! This function assumes that GetCurrentSEHRecord() returns the exception record set up in |
4980 | // !!! ThrowControlForThread |
4981 | STATIC_CONTRACT_NOTHROW; |
4982 | STATIC_CONTRACT_GC_NOTRIGGER; |
4983 | STATIC_CONTRACT_MODE_ANY; |
4984 | |
4985 | if (pExcepPtrs->ExceptionRecord->ExceptionCode == STATUS_STACK_OVERFLOW) |
4986 | { |
4987 | return EXCEPTION_CONTINUE_SEARCH; |
4988 | } |
4989 | |
4990 | // Get the thread handle |
4991 | Thread *pThread = GetThread(); |
4992 | _ASSERTE(pThread); |
4993 | |
4994 | |
4995 | STRESS_LOG0(LF_SYNC, LL_INFO100, "In RedirectedThrowControlExceptionFilter\n" ); |
4996 | |
4997 | // If we get here via COM+ exception, gc-mode is unknown. We need it to |
4998 | // be cooperative for this function. |
4999 | _ASSERTE (pThread->PreemptiveGCDisabled()); |
5000 | |
5001 | _ASSERTE(pExcepPtrs->ExceptionRecord->ExceptionCode == BOOTUP_EXCEPTION_COMPLUS); |
5002 | |
5003 | // Copy the saved context record into the EH context; |
5004 | CONTEXT *pCtx = pThread->m_OSContext; |
5005 | ReplaceExceptionContextRecord(pExcepPtrs->ContextRecord, pCtx); |
5006 | |
5007 | ///////////////////////////////////////////////////////////////////////////// |
5008 | // NOTE: Ugly, ugly workaround. |
5009 | // We need to resume the thread into the managed code where it was redirected, |
5010 | // and the corresponding ESP is below the current one. But C++ expects that |
5011 | // on an EXCEPTION_CONTINUE_EXECUTION that the ESP will be above where it has |
5012 | // installed the SEH handler. To solve this, we need to remove all handlers |
5013 | // that reside above the resumed ESP, but we must leave the OS-installed |
5014 | // handler at the top, so we grab the top SEH handler, call |
5015 | // PopSEHRecords which will remove all SEH handlers above the target ESP and |
5016 | // then link the OS handler back in with SetCurrentSEHRecord. |
5017 | |
5018 | // Get the special OS handler and save it until PopSEHRecords is done |
5019 | EXCEPTION_REGISTRATION_RECORD *pCurSEH = GetCurrentSEHRecord(); |
5020 | |
5021 | // Unlink all records above the target resume ESP |
5022 | PopSEHRecords((LPVOID)(size_t)pCtx->Esp); |
5023 | |
5024 | // Link the special OS handler back in to the top |
5025 | pCurSEH->Next = GetCurrentSEHRecord(); |
5026 | |
5027 | // Register the special OS handler as the top handler with the OS |
5028 | SetCurrentSEHRecord(pCurSEH); |
5029 | |
5030 | // Resume execution at point where thread was originally redirected |
5031 | return (EXCEPTION_CONTINUE_EXECUTION); |
5032 | } |
5033 | #endif |
5034 | #endif // !FEATURE_PAL |
5035 | |
5036 | // Resume a thread at this location, to persuade it to throw a ThreadStop. The |
5037 | // exception handler needs a reasonable idea of how large this method is, so don't |
5038 | // add lots of arbitrary code here. |
5039 | void |
5040 | ThrowControlForThread( |
5041 | #ifdef WIN64EXCEPTIONS |
5042 | FaultingExceptionFrame *pfef |
5043 | #endif // WIN64EXCEPTIONS |
5044 | ) |
5045 | { |
5046 | STATIC_CONTRACT_THROWS; |
5047 | STATIC_CONTRACT_GC_NOTRIGGER; |
5048 | |
5049 | Thread *pThread = GetThread(); |
5050 | _ASSERTE(pThread); |
5051 | _ASSERTE(pThread->m_OSContext); |
5052 | |
5053 | _ASSERTE(pThread->PreemptiveGCDisabled()); |
5054 | |
5055 | #ifdef FEATURE_STACK_PROBE |
5056 | if (GetEEPolicy()->GetActionOnFailure(FAIL_StackOverflow) == eRudeUnloadAppDomain) |
5057 | { |
5058 | RetailStackProbe(ADJUST_PROBE(DEFAULT_ENTRY_PROBE_AMOUNT), pThread); |
5059 | } |
5060 | #endif |
5061 | |
5062 | // Check if we can start abort |
5063 | // We use InducedThreadRedirect as a marker to tell stackwalker that a thread is redirected from JIT code. |
5064 | // This is to distinguish a thread is in Preemptive mode and in JIT code. |
5065 | // After stackcrawl, we change to InducedThreadStop. |
5066 | if (pThread->ThrewControlForThread() == Thread::InducedThreadRedirect || |
5067 | pThread->ThrewControlForThread() == Thread::InducedThreadRedirectAtEndOfCatch) |
5068 | { |
5069 | _ASSERTE((pThread->m_OSContext->ContextFlags & CONTEXT_ALL) == CONTEXT_ALL); |
5070 | if (!pThread->ReadyForAbort()) |
5071 | { |
5072 | STRESS_LOG0(LF_SYNC, LL_INFO100, "ThrowControlForThread resume\n" ); |
5073 | pThread->ResetThrowControlForThread(); |
5074 | // Thread abort is not allowed at this point |
5075 | #ifndef WIN64EXCEPTIONS |
5076 | __try{ |
5077 | RaiseException(BOOTUP_EXCEPTION_COMPLUS,0,0,NULL); |
5078 | } |
5079 | __except(RedirectedThrowControlExceptionFilter(GetExceptionInformation())) |
5080 | { |
5081 | _ASSERTE(!"Should not reach here" ); |
5082 | } |
5083 | #else // WIN64EXCEPTIONS |
5084 | RtlRestoreContext(pThread->m_OSContext, NULL); |
5085 | #endif // !WIN64EXCEPTIONS |
5086 | _ASSERTE(!"Should not reach here" ); |
5087 | } |
5088 | pThread->SetThrowControlForThread(Thread::InducedThreadStop); |
5089 | } |
5090 | |
5091 | #if defined(WIN64EXCEPTIONS) |
5092 | *(TADDR*)pfef = FaultingExceptionFrame::GetMethodFrameVPtr(); |
5093 | *pfef->GetGSCookiePtr() = GetProcessGSCookie(); |
5094 | #else // WIN64EXCEPTIONS |
5095 | FrameWithCookie<FaultingExceptionFrame> fef; |
5096 | FaultingExceptionFrame *pfef = &fef; |
5097 | #endif // WIN64EXCEPTIONS |
5098 | pfef->InitAndLink(pThread->m_OSContext); |
5099 | |
5100 | // !!! Can not assert here. Sometimes our EHInfo for catch clause extends beyond |
5101 | // !!! Jit_EndCatch. Not sure if we have guarantee on catch clause. |
5102 | //_ASSERTE (pThread->ReadyForAbort()); |
5103 | |
5104 | STRESS_LOG0(LF_SYNC, LL_INFO100, "ThrowControlForThread Aborting\n" ); |
5105 | |
5106 | // Here we raise an exception. |
5107 | RaiseComPlusException(); |
5108 | } |
5109 | |
5110 | #if defined(FEATURE_HIJACK) && !defined(PLATFORM_UNIX) |
5111 | // This function is called by UserAbort and StopEEAndUnwindThreads. |
5112 | // It forces a thread to abort if allowed and the thread is running managed code. |
5113 | BOOL Thread::HandleJITCaseForAbort() |
5114 | { |
5115 | CONTRACTL { |
5116 | NOTHROW; |
5117 | GC_NOTRIGGER; |
5118 | } |
5119 | CONTRACTL_END; |
5120 | |
5121 | _ASSERTE(ThreadStore::HoldingThreadStore()); |
5122 | |
5123 | WorkingOnThreadContextHolder workingOnThreadContext(this); |
5124 | if (!workingOnThreadContext.Acquired()) |
5125 | { |
5126 | return FALSE; |
5127 | } |
5128 | |
5129 | _ASSERTE (m_fPreemptiveGCDisabled); |
5130 | |
5131 | CONTEXT ctx; |
5132 | ctx.ContextFlags = CONTEXT_CONTROL | CONTEXT_DEBUG_REGISTERS | CONTEXT_EXCEPTION_REQUEST; |
5133 | BOOL success = EEGetThreadContext(this, &ctx); |
5134 | _ASSERTE(success && "Thread::HandleJITCaseForAbort : Failed to get thread context" ); |
5135 | |
5136 | if (success) |
5137 | { |
5138 | success = IsContextSafeToRedirect(&ctx); |
5139 | } |
5140 | |
5141 | if (success) |
5142 | { |
5143 | PCODE curIP = GetIP(&ctx); |
5144 | |
5145 | // check if this is code managed by the code manager (ie. in the code heap) |
5146 | if (ExecutionManager::IsManagedCode(curIP)) |
5147 | { |
5148 | return ResumeUnderControl(&ctx); |
5149 | } |
5150 | } |
5151 | |
5152 | return FALSE; |
5153 | } |
5154 | |
5155 | // Threads suspended by the Win32 ::SuspendThread() are resumed in two ways. If we |
5156 | // suspended them in error, they are resumed via the Win32 ::ResumeThread(). But if |
5157 | // this is the HandledJIT() case and the thread is in fully interruptible code, we |
5158 | // can resume them under special control. ResumeRuntime and UserResume are cases |
5159 | // of this. |
5160 | // |
5161 | // The suspension has done its work (e.g. GC or user thread suspension). But during |
5162 | // the resumption we may have more that we want to do with this thread. For example, |
5163 | // there may be a pending ThreadAbort request. Instead of resuming the thread at its |
5164 | // current EIP, we tweak its resumption point via the thread context. Then it starts |
5165 | // executing at a new spot where we can have our way with it. |
5166 | |
5167 | BOOL Thread::ResumeUnderControl(CONTEXT *pCtx) |
5168 | { |
5169 | CONTRACTL { |
5170 | NOTHROW; |
5171 | GC_NOTRIGGER; |
5172 | } |
5173 | CONTRACTL_END; |
5174 | |
5175 | BOOL fSuccess = FALSE; |
5176 | |
5177 | LOG((LF_APPDOMAIN, LL_INFO100, "ResumeUnderControl %x\n" , GetThreadId())); |
5178 | |
5179 | BOOL fSucceeded; |
5180 | |
5181 | m_OSContext->ContextFlags = CONTEXT_ALL | CONTEXT_EXCEPTION_REQUEST; |
5182 | fSucceeded = EEGetThreadContext(this, m_OSContext); |
5183 | |
5184 | if (fSucceeded) |
5185 | { |
5186 | if (GetIP(pCtx) != GetIP(m_OSContext)) |
5187 | { |
5188 | return FALSE; |
5189 | } |
5190 | fSucceeded = IsContextSafeToRedirect(m_OSContext); |
5191 | } |
5192 | |
5193 | if (fSucceeded) |
5194 | { |
5195 | PCODE resumePC = GetIP(m_OSContext); |
5196 | SetIP(m_OSContext, GetEEFuncEntryPoint(THROW_CONTROL_FOR_THREAD_FUNCTION)); |
5197 | SetThrowControlForThread(InducedThreadRedirect); |
5198 | STRESS_LOG1(LF_SYNC, LL_INFO100, "ResumeUnderControl for Thread %p\n" , this); |
5199 | |
5200 | #ifdef _TARGET_AMD64_ |
5201 | // We need to establish the return value on the stack in the redirection stub, to |
5202 | // achieve crawlability. We use 'rcx' as the way to communicate the return value. |
5203 | // However, we are going to crawl in ReadyForAbort and we are going to resume in |
5204 | // ThrowControlForThread using m_OSContext. It's vital that the original correct |
5205 | // Rcx is present at those times, or we will have corrupted Rcx at the point of |
5206 | // resumption. |
5207 | UINT_PTR keepRcx = m_OSContext->Rcx; |
5208 | |
5209 | m_OSContext->Rcx = (UINT_PTR)resumePC; |
5210 | #endif // _TARGET_AMD64_ |
5211 | |
5212 | #if defined(_TARGET_ARM_) |
5213 | // We save the original ControlPC in LR on ARM. |
5214 | UINT_PTR originalLR = m_OSContext->Lr; |
5215 | m_OSContext->Lr = (UINT_PTR)resumePC; |
5216 | |
5217 | // Since we have set a new IP, we have to clear conditional execution flags too. |
5218 | UINT_PTR originalCpsr = m_OSContext->Cpsr; |
5219 | ClearITState(m_OSContext); |
5220 | #endif // _TARGET_ARM_ |
5221 | |
5222 | EESetThreadContext(this, m_OSContext); |
5223 | |
5224 | #ifdef _TARGET_ARM_ |
5225 | // Restore the original LR now that the OS context has been updated to resume @ redirection function. |
5226 | m_OSContext->Lr = originalLR; |
5227 | m_OSContext->Cpsr = originalCpsr; |
5228 | #endif // _TARGET_ARM_ |
5229 | |
5230 | #ifdef _TARGET_AMD64_ |
5231 | // and restore. |
5232 | m_OSContext->Rcx = keepRcx; |
5233 | #endif // _TARGET_AMD64_ |
5234 | |
5235 | SetIP(m_OSContext, resumePC); |
5236 | |
5237 | fSuccess = TRUE; |
5238 | } |
5239 | #if _DEBUG |
5240 | else |
5241 | _ASSERTE(!"Couldn't obtain thread context -- StopRequest delayed" ); |
5242 | #endif |
5243 | return fSuccess; |
5244 | } |
5245 | |
5246 | #endif // FEATURE_HIJACK && !PLATFORM_UNIX |
5247 | |
5248 | |
5249 | PCONTEXT Thread::GetAbortContext () |
5250 | { |
5251 | LIMITED_METHOD_CONTRACT; |
5252 | |
5253 | LOG((LF_EH, LL_INFO100, "Returning abort context: %p\n" , m_OSContext)); |
5254 | return m_OSContext; |
5255 | } |
5256 | |
5257 | |
5258 | //**************************************************************************** |
5259 | // Return true if we've Suspended the runtime, |
5260 | // False if we still need to sweep. |
5261 | //**************************************************************************** |
5262 | bool Thread::SysStartSuspendForDebug(AppDomain *pAppDomain) |
5263 | { |
5264 | CONTRACTL { |
5265 | NOTHROW; |
5266 | GC_NOTRIGGER; |
5267 | } |
5268 | CONTRACTL_END; |
5269 | |
5270 | Thread *pCurThread = GetThread(); |
5271 | Thread *thread = NULL; |
5272 | |
5273 | if (IsAtProcessExit()) |
5274 | { |
5275 | LOG((LF_CORDB, LL_INFO1000, |
5276 | "SUSPEND: skipping suspend due to process detach.\n" )); |
5277 | return true; |
5278 | } |
5279 | |
5280 | LOG((LF_CORDB, LL_INFO1000, "[0x%x] SUSPEND: starting suspend. Trap count: %d\n" , |
5281 | pCurThread ? pCurThread->GetThreadId() : (DWORD) -1, g_TrapReturningThreads.Load())); |
5282 | |
5283 | // Caller is expected to be holding the ThreadStore lock |
5284 | _ASSERTE(ThreadStore::HoldingThreadStore() || IsAtProcessExit()); |
5285 | |
5286 | |
5287 | // NOTE::NOTE::NOTE::NOTE::NOTE |
5288 | // This function has parallel logic in SuspendRuntime. Please make |
5289 | // sure to make appropriate changes there as well. |
5290 | |
5291 | _ASSERTE(m_DebugWillSyncCount == -1); |
5292 | m_DebugWillSyncCount++; |
5293 | |
5294 | // From this point until the end of the function, consider all active thread |
5295 | // suspension to be in progress. This is mainly to give the profiler API a hint |
5296 | // that trying to suspend a thread (in order to walk its stack) could delay the |
5297 | // overall EE suspension. So the profiler API would early-abort the stackwalk |
5298 | // in such a case. |
5299 | ThreadSuspend::SuspendRuntimeInProgressHolder hldSuspendRuntimeInProgress; |
5300 | |
5301 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
5302 | { |
5303 | #if 0 |
5304 | //<REVISIT_TODO> @todo APPD This needs to be finished, replaced, or yanked --MiPanitz</REVISIT_TODO> |
5305 | if (m_DebugAppDomainTarget != NULL && |
5306 | thread->GetDomain() != m_DebugAppDomainTarget) |
5307 | { |
5308 | continue; |
5309 | } |
5310 | #endif |
5311 | |
5312 | // Don't try to suspend threads that you've left suspended. |
5313 | if (thread->m_StateNC & TSNC_DebuggerUserSuspend) |
5314 | continue; |
5315 | |
5316 | if (thread == pCurThread) |
5317 | { |
5318 | LOG((LF_CORDB, LL_INFO1000, |
5319 | "[0x%x] SUSPEND: marking current thread.\n" , |
5320 | thread->GetThreadId())); |
5321 | |
5322 | _ASSERTE(!thread->m_fPreemptiveGCDisabled); |
5323 | |
5324 | // Mark this thread so it trips when it tries to re-enter |
5325 | // after completing this call. |
5326 | thread->SetupForSuspension(TS_DebugSuspendPending); |
5327 | thread->MarkForSuspension(TS_DebugSuspendPending); |
5328 | continue; |
5329 | } |
5330 | |
5331 | thread->SetupForSuspension(TS_DebugSuspendPending); |
5332 | |
5333 | // Threads can be in Preemptive or Cooperative GC mode. |
5334 | // Threads cannot switch to Cooperative mode without special |
5335 | // treatment when a GC is happening. But they can certainly |
5336 | // switch back and forth during a debug suspension -- until we |
5337 | // can get their Pending bit set. |
5338 | |
5339 | #if defined(FEATURE_HIJACK) && !defined(PLATFORM_UNIX) |
5340 | DWORD dwSwitchCount = 0; |
5341 | RetrySuspension: |
5342 | #endif // FEATURE_HIJACK && !PLATFORM_UNIX |
5343 | |
5344 | // We can not allocate memory after we suspend a thread. |
5345 | // Otherwise, we may deadlock the process when CLR is hosted. |
5346 | ThreadStore::AllocateOSContext(); |
5347 | |
5348 | #ifdef DISABLE_THREADSUSPEND |
5349 | // On platforms that do not support safe thread suspension we have |
5350 | // to rely on the GCPOLL mechanism. |
5351 | |
5352 | // When we do not suspend the target thread we rely on the GCPOLL |
5353 | // mechanism enabled by TrapReturningThreads. However when reading |
5354 | // shared state we need to erect appropriate memory barriers. So |
5355 | // the interlocked operation below ensures that any future reads on |
5356 | // this thread will happen after any earlier writes on a different |
5357 | // thread. |
5358 | SuspendThreadResult str = STR_Success; |
5359 | FastInterlockOr(&thread->m_fPreemptiveGCDisabled, 0); |
5360 | #else |
5361 | SuspendThreadResult str = thread->SuspendThread(); |
5362 | #endif // DISABLE_THREADSUSPEND |
5363 | |
5364 | if (thread->m_fPreemptiveGCDisabled && str == STR_Success) |
5365 | { |
5366 | |
5367 | #if defined(FEATURE_HIJACK) && !defined(PLATFORM_UNIX) |
5368 | WorkingOnThreadContextHolder workingOnThreadContext(thread); |
5369 | if (workingOnThreadContext.Acquired() && thread->HandledJITCase()) |
5370 | { |
5371 | // Redirect thread so we can capture a good thread context |
5372 | // (GetThreadContext is not sufficient, due to an OS bug). |
5373 | // If we don't succeed (should only happen on Win9X, due to |
5374 | // a different OS bug), we must resume the thread and try |
5375 | // again. |
5376 | if (!thread->CheckForAndDoRedirectForDbg()) |
5377 | { |
5378 | thread->ResumeThread(); |
5379 | __SwitchToThread(0, ++dwSwitchCount); |
5380 | goto RetrySuspension; |
5381 | } |
5382 | } |
5383 | #endif // FEATURE_HIJACK && !PLATFORM_UNIX |
5384 | |
5385 | // Remember that this thread will be running to a safe point |
5386 | FastInterlockIncrement(&m_DebugWillSyncCount); |
5387 | |
5388 | // When the thread reaches a safe place, it will wait |
5389 | // on the DebugSuspendEvent which clients can set when they |
5390 | // want to release us. |
5391 | thread->MarkForSuspension(TS_DebugSuspendPending | |
5392 | TS_DebugWillSync |
5393 | ); |
5394 | |
5395 | #ifdef DISABLE_THREADSUSPEND |
5396 | // There'a a race above between the moment we first check m_fPreemptiveGCDisabled |
5397 | // and the moment we enable TrapReturningThreads in MarkForSuspension. However, |
5398 | // nothing bad happens if the thread has transitioned to preemptive before marking |
5399 | // the thread for suspension; the thread will later be identified as Synced in |
5400 | // SysSweepThreadsForDebug |
5401 | #else // DISABLE_THREADSUSPEND |
5402 | // Resume the thread and let it run to a safe point |
5403 | thread->ResumeThread(); |
5404 | #endif // DISABLE_THREADSUSPEND |
5405 | |
5406 | LOG((LF_CORDB, LL_INFO1000, |
5407 | "[0x%x] SUSPEND: gc disabled - will sync.\n" , |
5408 | thread->GetThreadId())); |
5409 | } |
5410 | else if (!thread->m_fPreemptiveGCDisabled) |
5411 | { |
5412 | // Mark threads that are outside the Runtime so that if |
5413 | // they attempt to re-enter they will trip. |
5414 | thread->MarkForSuspension(TS_DebugSuspendPending); |
5415 | |
5416 | #ifdef DISABLE_THREADSUSPEND |
5417 | // There'a a race above between the moment we first check m_fPreemptiveGCDisabled |
5418 | // and the moment we enable TrapReturningThreads in MarkForSuspension. To account |
5419 | // for that we check whether the thread moved into cooperative mode, and if it had |
5420 | // we mark it as a DebugWillSync thread, that will be handled later in |
5421 | // SysSweepThreadsForDebug |
5422 | if (thread->m_fPreemptiveGCDisabled) |
5423 | { |
5424 | // Remember that this thread will be running to a safe point |
5425 | FastInterlockIncrement(&m_DebugWillSyncCount); |
5426 | thread->SetThreadState(TS_DebugWillSync); |
5427 | } |
5428 | #else // DISABLE_THREADSUSPEND |
5429 | if (str == STR_Success) { |
5430 | thread->ResumeThread(); |
5431 | } |
5432 | #endif // DISABLE_THREADSUSPEND |
5433 | |
5434 | LOG((LF_CORDB, LL_INFO1000, |
5435 | "[0x%x] SUSPEND: gc enabled.\n" , thread->GetThreadId())); |
5436 | } |
5437 | } |
5438 | |
5439 | // |
5440 | // Return true if all threads are synchronized now, otherwise the |
5441 | // debugge must wait for the SuspendComplete, called from the last |
5442 | // thread to sync. |
5443 | // |
5444 | |
5445 | if (FastInterlockDecrement(&m_DebugWillSyncCount) < 0) |
5446 | { |
5447 | LOG((LF_CORDB, LL_INFO1000, |
5448 | "SUSPEND: all threads sync before return.\n" )); |
5449 | return true; |
5450 | } |
5451 | else |
5452 | return false; |
5453 | } |
5454 | |
5455 | // |
5456 | // This method is called by the debugger helper thread when it times out waiting for a set of threads to |
5457 | // synchronize. Its used to chase down threads that are not syncronizing quickly. It returns true if all the threads are |
5458 | // now synchronized. This also means that we own the thread store lock. |
5459 | // |
5460 | // This can be safely called if we're already suspended. |
5461 | bool Thread::SysSweepThreadsForDebug(bool forceSync) |
5462 | { |
5463 | CONTRACT(bool) { |
5464 | NOTHROW; |
5465 | DISABLED(GC_TRIGGERS); // WaitUntilConcurrentGCComplete toggle GC mode, disabled because called by unmanaged thread |
5466 | |
5467 | // We assume that only the "real" helper thread ever calls this (not somebody doing helper thread duty). |
5468 | PRECONDITION(ThreadStore::HoldingThreadStore()); |
5469 | PRECONDITION(IsDbgHelperSpecialThread()); |
5470 | PRECONDITION(GetThread() == NULL); |
5471 | |
5472 | // Iff we return true, then we have the TSL (or the aux lock used in workarounds). |
5473 | POSTCONDITION(ThreadStore::HoldingThreadStore()); |
5474 | } |
5475 | CONTRACT_END; |
5476 | |
5477 | _ASSERTE(!forceSync); // deprecated parameter |
5478 | |
5479 | Thread *thread = NULL; |
5480 | |
5481 | // NOTE::NOTE::NOTE::NOTE::NOTE |
5482 | // This function has parallel logic in SuspendRuntime. Please make |
5483 | // sure to make appropriate changes there as well. |
5484 | |
5485 | // From this point until the end of the function, consider all active thread |
5486 | // suspension to be in progress. This is mainly to give the profiler API a hint |
5487 | // that trying to suspend a thread (in order to walk its stack) could delay the |
5488 | // overall EE suspension. So the profiler API would early-abort the stackwalk |
5489 | // in such a case. |
5490 | ThreadSuspend::SuspendRuntimeInProgressHolder hldSuspendRuntimeInProgress; |
5491 | |
5492 | // Loop over the threads... |
5493 | while (((thread = ThreadStore::GetThreadList(thread)) != NULL) && (m_DebugWillSyncCount >= 0)) |
5494 | { |
5495 | // Skip threads that we aren't waiting for to sync. |
5496 | if ((thread->m_State & TS_DebugWillSync) == 0) |
5497 | continue; |
5498 | |
5499 | #ifdef DISABLE_THREADSUSPEND |
5500 | |
5501 | // On platforms that do not support safe thread suspension we have |
5502 | // to rely on the GCPOLL mechanism. |
5503 | |
5504 | // When we do not suspend the target thread we rely on the GCPOLL |
5505 | // mechanism enabled by TrapReturningThreads. However when reading |
5506 | // shared state we need to erect appropriate memory barriers. So |
5507 | // the interlocked operation below ensures that any future reads on |
5508 | // this thread will happen after any earlier writes on a different |
5509 | // thread. |
5510 | FastInterlockOr(&thread->m_fPreemptiveGCDisabled, 0); |
5511 | if (!thread->m_fPreemptiveGCDisabled) |
5512 | { |
5513 | // If the thread toggled to preemptive mode, then it's synced. |
5514 | goto Label_MarkThreadAsSynced; |
5515 | } |
5516 | else |
5517 | { |
5518 | continue; |
5519 | } |
5520 | |
5521 | #else // DISABLE_THREADSUSPEND |
5522 | // Suspend the thread |
5523 | |
5524 | #if defined(FEATURE_HIJACK) && !defined(PLATFORM_UNIX) |
5525 | DWORD dwSwitchCount = 0; |
5526 | #endif |
5527 | |
5528 | RetrySuspension: |
5529 | // We can not allocate memory after we suspend a thread. |
5530 | // Otherwise, we may deadlock the process when CLR is hosted. |
5531 | ThreadStore::AllocateOSContext(); |
5532 | |
5533 | SuspendThreadResult str = thread->SuspendThread(); |
5534 | |
5535 | if (str == STR_Failure || str == STR_UnstartedOrDead) |
5536 | { |
5537 | // The thread cannot actually be unstarted - if it was, we would not |
5538 | // have marked it with TS_DebugWillSync in the first phase. |
5539 | _ASSERTE(!(thread->m_State & TS_Unstarted)); |
5540 | |
5541 | // If the thread has gone, we can't wait on it. |
5542 | goto Label_MarkThreadAsSynced; |
5543 | } |
5544 | else if (str == STR_SwitchedOut) |
5545 | { |
5546 | // The thread was switched b/c of fiber-mode stuff. |
5547 | if (!thread->m_fPreemptiveGCDisabled) |
5548 | { |
5549 | goto Label_MarkThreadAsSynced; |
5550 | } |
5551 | else |
5552 | { |
5553 | goto RetrySuspension; |
5554 | } |
5555 | } |
5556 | else if (str == STR_NoStressLog) |
5557 | { |
5558 | goto RetrySuspension; |
5559 | } |
5560 | else if (!thread->m_fPreemptiveGCDisabled) |
5561 | { |
5562 | // If the thread toggled to preemptive mode, then it's synced. |
5563 | |
5564 | // We can safely resume the thread here b/c it's in PreemptiveMode and the |
5565 | // EE will trap anybody trying to re-enter cooperative. So letting it run free |
5566 | // won't hurt the runtime. |
5567 | _ASSERTE(str == STR_Success); |
5568 | thread->ResumeThread(); |
5569 | |
5570 | goto Label_MarkThreadAsSynced; |
5571 | } |
5572 | #if defined(FEATURE_HIJACK) && !defined(PLATFORM_UNIX) |
5573 | // If the thread is in jitted code, HandledJitCase will try to hijack it; and the hijack |
5574 | // will toggle the GC. |
5575 | else |
5576 | { |
5577 | _ASSERTE(str == STR_Success); |
5578 | WorkingOnThreadContextHolder workingOnThreadContext(thread); |
5579 | if (workingOnThreadContext.Acquired() && thread->HandledJITCase()) |
5580 | { |
5581 | // Redirect thread so we can capture a good thread context |
5582 | // (GetThreadContext is not sufficient, due to an OS bug). |
5583 | // If we don't succeed (should only happen on Win9X, due to |
5584 | // a different OS bug), we must resume the thread and try |
5585 | // again. |
5586 | if (!thread->CheckForAndDoRedirectForDbg()) |
5587 | { |
5588 | thread->ResumeThread(); |
5589 | __SwitchToThread(0, ++dwSwitchCount); |
5590 | goto RetrySuspension; |
5591 | } |
5592 | |
5593 | // The hijack will toggle our GC mode, and thus we could wait for the next sweep, |
5594 | // and the GC-mode check above would catch and sync us. But there's no reason to wait, |
5595 | // if the thread is hijacked, it's as good as synced, so mark it now. |
5596 | thread->ResumeThread(); |
5597 | goto Label_MarkThreadAsSynced; |
5598 | } |
5599 | } |
5600 | #endif // FEATURE_HIJACK && !PLATFORM_UNIX |
5601 | |
5602 | // If we didn't take the thread out of the set, then resume it and give it another chance to reach a safe |
5603 | // point. |
5604 | thread->ResumeThread(); |
5605 | continue; |
5606 | |
5607 | #endif // DISABLE_THREADSUSPEND |
5608 | |
5609 | // The thread is synced. Remove the sync bits and dec the sync count. |
5610 | Label_MarkThreadAsSynced: |
5611 | FastInterlockAnd((ULONG *) &thread->m_State, ~TS_DebugWillSync); |
5612 | if (FastInterlockDecrement(&m_DebugWillSyncCount) < 0) |
5613 | { |
5614 | // If that was the last thread, then the CLR is synced. |
5615 | // We return while own the thread store lock. We return true now, which indicates this to the caller. |
5616 | RETURN true; |
5617 | } |
5618 | continue; |
5619 | |
5620 | } // end looping through Thread Store |
5621 | |
5622 | if (m_DebugWillSyncCount < 0) |
5623 | { |
5624 | RETURN true; |
5625 | } |
5626 | |
5627 | // The CLR is not yet synced. We release the threadstore lock and return false. |
5628 | hldSuspendRuntimeInProgress.Release(); |
5629 | |
5630 | RETURN false; |
5631 | } |
5632 | |
5633 | void Thread::SysResumeFromDebug(AppDomain *pAppDomain) |
5634 | { |
5635 | CONTRACTL { |
5636 | NOTHROW; |
5637 | GC_NOTRIGGER; |
5638 | } |
5639 | CONTRACTL_END; |
5640 | |
5641 | Thread *thread = NULL; |
5642 | |
5643 | if (IsAtProcessExit()) |
5644 | { |
5645 | LOG((LF_CORDB, LL_INFO1000, |
5646 | "RESUME: skipping resume due to process detach.\n" )); |
5647 | return; |
5648 | } |
5649 | |
5650 | LOG((LF_CORDB, LL_INFO1000, "RESUME: starting resume AD:0x%x.\n" , pAppDomain)); |
5651 | |
5652 | |
5653 | // Make sure we completed the previous sync |
5654 | _ASSERTE(m_DebugWillSyncCount == -1); |
5655 | |
5656 | // Caller is expected to be holding the ThreadStore lock |
5657 | _ASSERTE(ThreadStore::HoldingThreadStore() || IsAtProcessExit()); |
5658 | |
5659 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
5660 | { |
5661 | // Only consider resuming threads if they're in the correct appdomain |
5662 | if (pAppDomain != NULL && thread->GetDomain() != pAppDomain) |
5663 | { |
5664 | LOG((LF_CORDB, LL_INFO1000, "RESUME: Not resuming thread 0x%x, since it's " |
5665 | "in appdomain 0x%x.\n" , thread, pAppDomain)); |
5666 | continue; |
5667 | } |
5668 | |
5669 | // If the user wants to keep the thread suspended, then |
5670 | // don't release the thread. |
5671 | if (!(thread->m_StateNC & TSNC_DebuggerUserSuspend)) |
5672 | { |
5673 | // If we are still trying to suspend this thread, forget about it. |
5674 | if (thread->m_State & TS_DebugSuspendPending) |
5675 | { |
5676 | LOG((LF_CORDB, LL_INFO1000, |
5677 | "[0x%x] RESUME: TS_DebugSuspendPending was set, but will be removed\n" , |
5678 | thread->GetThreadId())); |
5679 | |
5680 | #ifdef _TARGET_ARM_ |
5681 | if (thread->IsSingleStepEnabled()) |
5682 | { |
5683 | if (ISREDIRECTEDTHREAD(thread)) |
5684 | thread->ApplySingleStep(GETREDIRECTEDCONTEXT(thread)); |
5685 | } |
5686 | #endif |
5687 | // Note: we unmark for suspension _then_ set the suspend event. |
5688 | thread->ReleaseFromSuspension(TS_DebugSuspendPending); |
5689 | } |
5690 | |
5691 | } |
5692 | else |
5693 | { |
5694 | // Thread will remain suspended due to a request from the debugger. |
5695 | |
5696 | LOG((LF_CORDB,LL_INFO10000,"Didn't unsuspend thread 0x%x" |
5697 | "(ID:0x%x)\n" , thread, thread->GetThreadId())); |
5698 | LOG((LF_CORDB,LL_INFO10000,"Suspending:0x%x\n" , |
5699 | thread->m_State & TS_DebugSuspendPending)); |
5700 | _ASSERTE((thread->m_State & TS_DebugWillSync) == 0); |
5701 | |
5702 | } |
5703 | } |
5704 | |
5705 | LOG((LF_CORDB, LL_INFO1000, "RESUME: resume complete. Trap count: %d\n" , g_TrapReturningThreads.Load())); |
5706 | } |
5707 | |
5708 | /* |
5709 | * |
5710 | * WaitSuspendEventsHelper |
5711 | * |
5712 | * This function is a simple helper function for WaitSuspendEvents. It is needed |
5713 | * because of the EX_TRY macro. This macro does an alloca(), which allocates space |
5714 | * off the stack, not free'ing it. Thus, doing a EX_TRY in a loop can easily result |
5715 | * in a stack overflow error. By factoring out the EX_TRY into a separate function, |
5716 | * we recover that stack space. |
5717 | * |
5718 | * Parameters: |
5719 | * None. |
5720 | * |
5721 | * Return: |
5722 | * true if meant to continue, else false. |
5723 | * |
5724 | */ |
5725 | BOOL Thread::WaitSuspendEventsHelper(void) |
5726 | { |
5727 | STATIC_CONTRACT_NOTHROW; |
5728 | STATIC_CONTRACT_GC_NOTRIGGER; |
5729 | |
5730 | DWORD result = WAIT_FAILED; |
5731 | |
5732 | EX_TRY { |
5733 | |
5734 | // CoreCLR does not support user-requested thread suspension |
5735 | _ASSERTE(!(m_State & TS_UserSuspendPending)); |
5736 | |
5737 | if (m_State & TS_DebugSuspendPending) { |
5738 | |
5739 | ThreadState oldState = m_State; |
5740 | |
5741 | while (oldState & TS_DebugSuspendPending) { |
5742 | |
5743 | ThreadState newState = (ThreadState)(oldState | TS_SyncSuspended); |
5744 | if (FastInterlockCompareExchange((LONG *)&m_State, newState, oldState) == (LONG)oldState) |
5745 | { |
5746 | result = m_DebugSuspendEvent.Wait(INFINITE,FALSE); |
5747 | #if _DEBUG |
5748 | newState = m_State; |
5749 | _ASSERTE(!(newState & TS_SyncSuspended) || (newState & TS_UserSuspendPending)); |
5750 | #endif |
5751 | break; |
5752 | } |
5753 | |
5754 | oldState = m_State; |
5755 | } |
5756 | } |
5757 | } |
5758 | EX_CATCH { |
5759 | } |
5760 | EX_END_CATCH(SwallowAllExceptions) |
5761 | |
5762 | return result != WAIT_OBJECT_0; |
5763 | } |
5764 | |
5765 | |
5766 | // There's a bit of a workaround here |
5767 | void Thread::WaitSuspendEvents(BOOL fDoWait) |
5768 | { |
5769 | STATIC_CONTRACT_NOTHROW; |
5770 | STATIC_CONTRACT_GC_NOTRIGGER; |
5771 | |
5772 | _ASSERTE(!PreemptiveGCDisabled()); |
5773 | _ASSERTE((m_State & TS_SyncSuspended) == 0); |
5774 | |
5775 | // Let us do some useful work before suspending ourselves. |
5776 | |
5777 | // If we're required to perform a wait, do so. Typically, this is |
5778 | // skipped if this thread is a Debugger Special Thread. |
5779 | if (fDoWait) |
5780 | { |
5781 | while (TRUE) |
5782 | { |
5783 | WaitSuspendEventsHelper(); |
5784 | |
5785 | ThreadState oldState = m_State; |
5786 | |
5787 | // CoreCLR does not support user-requested thread suspension |
5788 | _ASSERTE(!(oldState & TS_UserSuspendPending)); |
5789 | |
5790 | // |
5791 | // If all reasons to suspend are off, we think we can exit |
5792 | // this loop, but we need to check atomically. |
5793 | // |
5794 | if ((oldState & (TS_UserSuspendPending | TS_DebugSuspendPending)) == 0) |
5795 | { |
5796 | // |
5797 | // Construct the destination state we desire - all suspension bits turned off. |
5798 | // |
5799 | ThreadState newState = (ThreadState)(oldState & ~(TS_UserSuspendPending | |
5800 | TS_DebugSuspendPending | |
5801 | TS_SyncSuspended)); |
5802 | |
5803 | if (FastInterlockCompareExchange((LONG *)&m_State, newState, oldState) == (LONG)oldState) |
5804 | { |
5805 | // |
5806 | // We are done. |
5807 | // |
5808 | break; |
5809 | } |
5810 | } |
5811 | } |
5812 | } |
5813 | } |
5814 | |
5815 | #ifdef FEATURE_HIJACK |
5816 | // Hijacking JITted calls |
5817 | // ====================== |
5818 | |
5819 | // State of execution when we suspend a thread |
5820 | struct ExecutionState |
5821 | { |
5822 | BOOL m_FirstPass; |
5823 | BOOL m_IsJIT; // are we executing JITted code? |
5824 | MethodDesc *m_pFD; // current function/method we're executing |
5825 | VOID **m_ppvRetAddrPtr; // pointer to return address in frame |
5826 | DWORD m_RelOffset; // relative offset at which we're currently executing in this fcn |
5827 | IJitManager *m_pJitManager; |
5828 | METHODTOKEN m_MethodToken; |
5829 | BOOL m_IsInterruptible; // is this code interruptible? |
5830 | |
5831 | ExecutionState() : m_FirstPass(TRUE) {LIMITED_METHOD_CONTRACT; } |
5832 | }; |
5833 | |
5834 | // Client is responsible for suspending the thread before calling |
5835 | void Thread::HijackThread(VOID *pvHijackAddr, ExecutionState *esb) |
5836 | { |
5837 | CONTRACTL { |
5838 | NOTHROW; |
5839 | GC_NOTRIGGER; |
5840 | } |
5841 | CONTRACTL_END; |
5842 | |
5843 | // Don't hijack if are in the first level of running a filter/finally/catch. |
5844 | // This is because they share ebp with their containing function further down the |
5845 | // stack and we will hijack their containing function incorrectly |
5846 | if (IsInFirstFrameOfHandler(this, esb->m_pJitManager, esb->m_MethodToken, esb->m_RelOffset)) |
5847 | { |
5848 | STRESS_LOG3(LF_SYNC, LL_INFO100, "Thread::HijackThread(%p to %p): Early out - IsInFirstFrameOfHandler. State=%x.\n" , this, pvHijackAddr, (ThreadState)m_State); |
5849 | return; |
5850 | } |
5851 | |
5852 | // Don't hijack if a profiler stackwalk is in progress |
5853 | HijackLockHolder hijackLockHolder(this); |
5854 | if (!hijackLockHolder.Acquired()) |
5855 | { |
5856 | STRESS_LOG3(LF_SYNC, LL_INFO100, "Thread::HijackThread(%p to %p): Early out - !hijackLockHolder.Acquired. State=%x.\n" , this, pvHijackAddr, (ThreadState)m_State); |
5857 | return; |
5858 | } |
5859 | |
5860 | IS_VALID_CODE_PTR((FARPROC) pvHijackAddr); |
5861 | |
5862 | if (m_State & TS_Hijacked) |
5863 | UnhijackThread(); |
5864 | |
5865 | // Make sure that the location of the return address is on the stack |
5866 | _ASSERTE(IsAddressInStack(esb->m_ppvRetAddrPtr)); |
5867 | |
5868 | // Obtain the location of the return address in the currently executing stack frame |
5869 | m_ppvHJRetAddrPtr = esb->m_ppvRetAddrPtr; |
5870 | |
5871 | // Remember the place that the return would have gone |
5872 | m_pvHJRetAddr = *esb->m_ppvRetAddrPtr; |
5873 | |
5874 | IS_VALID_CODE_PTR((FARPROC) (TADDR)m_pvHJRetAddr); |
5875 | // TODO [DAVBR]: For the full fix for VsWhidbey 450273, the below |
5876 | // may be uncommented once isLegalManagedCodeCaller works properly |
5877 | // with non-return address inputs, and with non-DEBUG builds |
5878 | //_ASSERTE(isLegalManagedCodeCaller((TADDR)m_pvHJRetAddr)); |
5879 | STRESS_LOG2(LF_SYNC, LL_INFO100, "Hijacking return address 0x%p for thread %p\n" , m_pvHJRetAddr, this); |
5880 | |
5881 | // Remember the method we're executing |
5882 | m_HijackedFunction = esb->m_pFD; |
5883 | |
5884 | // Bash the stack to return to one of our stubs |
5885 | *esb->m_ppvRetAddrPtr = pvHijackAddr; |
5886 | FastInterlockOr((ULONG *) &m_State, TS_Hijacked); |
5887 | } |
5888 | |
5889 | // If we are unhijacking another thread (not the current thread), then the caller is responsible for |
5890 | // suspending that thread. |
5891 | // It's legal to unhijack the current thread without special treatment. |
5892 | void Thread::UnhijackThread() |
5893 | { |
5894 | CONTRACTL { |
5895 | NOTHROW; |
5896 | GC_NOTRIGGER; |
5897 | SO_TOLERANT; |
5898 | CANNOT_TAKE_LOCK; |
5899 | } |
5900 | CONTRACTL_END; |
5901 | |
5902 | if (m_State & TS_Hijacked) |
5903 | { |
5904 | IS_VALID_WRITE_PTR(m_ppvHJRetAddrPtr, sizeof(void*)); |
5905 | IS_VALID_CODE_PTR((FARPROC) m_pvHJRetAddr); |
5906 | |
5907 | // Can't make the following assertion, because sometimes we unhijack after |
5908 | // the hijack has tripped (i.e. in the case we actually got some value from |
5909 | // it. |
5910 | // _ASSERTE(*m_ppvHJRetAddrPtr == OnHijackTripThread); |
5911 | |
5912 | STRESS_LOG2(LF_SYNC, LL_INFO100, "Unhijacking return address 0x%p for thread %p\n" , m_pvHJRetAddr, this); |
5913 | // restore the return address and clear the flag |
5914 | *m_ppvHJRetAddrPtr = m_pvHJRetAddr; |
5915 | FastInterlockAnd((ULONG *) &m_State, ~TS_Hijacked); |
5916 | |
5917 | // But don't touch m_pvHJRetAddr. We may need that to resume a thread that |
5918 | // is currently hijacked! |
5919 | } |
5920 | } |
5921 | |
5922 | // Get the ExecutionState for the specified *SUSPENDED* thread. Note that this is |
5923 | // a 'StackWalk' call back (PSTACKWALKFRAMESCALLBACK). |
5924 | StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) |
5925 | { |
5926 | CONTRACTL { |
5927 | NOTHROW; |
5928 | GC_NOTRIGGER; |
5929 | } |
5930 | CONTRACTL_END; |
5931 | |
5932 | ExecutionState *pES = (ExecutionState *) pData; |
5933 | StackWalkAction action = SWA_ABORT; |
5934 | |
5935 | if (pES->m_FirstPass) |
5936 | { |
5937 | // This will help factor out some repeated code. |
5938 | bool notJittedCase = false; |
5939 | |
5940 | // If we're jitted code at the top of the stack, grab everything |
5941 | if (pCF->IsFrameless() && pCF->IsActiveFunc()) |
5942 | { |
5943 | pES->m_IsJIT = TRUE; |
5944 | pES->m_pFD = pCF->GetFunction(); |
5945 | pES->m_MethodToken = pCF->GetMethodToken(); |
5946 | pES->m_ppvRetAddrPtr = 0; |
5947 | pES->m_IsInterruptible = pCF->IsGcSafe(); |
5948 | pES->m_RelOffset = pCF->GetRelOffset(); |
5949 | pES->m_pJitManager = pCF->GetJitManager(); |
5950 | |
5951 | STRESS_LOG3(LF_SYNC, LL_INFO1000, "Stopped in Jitted code at pc = %p sp = %p fullyInt=%d\n" , |
5952 | GetControlPC(pCF->GetRegisterSet()), GetRegdisplaySP(pCF->GetRegisterSet()), pES->m_IsInterruptible); |
5953 | |
5954 | #if defined(FEATURE_CONSERVATIVE_GC) && !defined(USE_GC_INFO_DECODER) |
5955 | if (g_pConfig->GetGCConservative()) |
5956 | { |
5957 | // Conservative GC enabled; behave as if HIJACK_NONINTERRUPTIBLE_THREADS had not been |
5958 | // set above: |
5959 | // |
5960 | notJittedCase = true; |
5961 | } |
5962 | else |
5963 | #endif // FEATURE_CONSERVATIVE_GC |
5964 | { |
5965 | #ifndef HIJACK_NONINTERRUPTIBLE_THREADS |
5966 | if (!pES->m_IsInterruptible) |
5967 | { |
5968 | notJittedCase = true; |
5969 | } |
5970 | #else // HIJACK_NONINTERRUPTIBLE_THREADS |
5971 | // if we're not interruptible right here, we need to determine the |
5972 | // return address for hijacking. |
5973 | if (!pES->m_IsInterruptible) |
5974 | { |
5975 | #ifdef WIN64EXCEPTIONS |
5976 | PREGDISPLAY pRDT = pCF->GetRegisterSet(); |
5977 | _ASSERTE(pRDT != NULL); |
5978 | |
5979 | // For simplicity, don't hijack in funclets |
5980 | bool fIsFunclet = pCF->IsFunclet(); |
5981 | if (fIsFunclet) |
5982 | { |
5983 | notJittedCase = true; |
5984 | } |
5985 | else |
5986 | { |
5987 | // We already have the caller context available at this point |
5988 | _ASSERTE(pRDT->IsCallerContextValid); |
5989 | #if defined(_TARGET_ARM_) || defined(_TARGET_ARM64_) |
5990 | |
5991 | // Why do we use CallerContextPointers below? |
5992 | // |
5993 | // Assume the following callstack, growing from left->right: |
5994 | // |
5995 | // C -> B -> A |
5996 | // |
5997 | // Assuming A is non-interruptible function and pushes LR on stack, |
5998 | // when we get the stackwalk callback for A, the CallerContext would |
5999 | // contain non-volatile register state for B and CallerContextPtrs would |
6000 | // contain the location where the caller's (B's) non-volatiles where restored |
6001 | // from. This would be the stack location in A where they were pushed. Thus, |
6002 | // CallerContextPtrs->Lr would contain the stack location in A where LR (representing an address in B) |
6003 | // was pushed and thus, contains the return address in B. |
6004 | |
6005 | // Note that the JIT always pushes LR even for leaf methods to make hijacking |
6006 | // work for them. See comment in code:Compiler::genPushCalleeSavedRegisters. |
6007 | |
6008 | if(pRDT->pCallerContextPointers->Lr == &pRDT->pContext->Lr) |
6009 | { |
6010 | // This is the case when we are either: |
6011 | // |
6012 | // 1) In a leaf method that does not push LR on stack, OR |
6013 | // 2) In the prolog/epilog of a non-leaf method that has not yet pushed LR on stack |
6014 | // or has LR already popped off. |
6015 | // |
6016 | // The remaining case of non-leaf method is that of IP being in the body of the |
6017 | // function. In such a case, LR would be have been pushed on the stack and thus, |
6018 | // we wouldnt be here but in the "else" clause below. |
6019 | // |
6020 | // For (1) we can use CallerContext->ControlPC to be used as the return address |
6021 | // since we know that leaf frames will return back to their caller. |
6022 | // For this, we may need JIT support to do so. |
6023 | notJittedCase = true; |
6024 | } |
6025 | else if (pCF->HasTailCalls()) |
6026 | { |
6027 | // Do not hijack functions that have tail calls, since there are two problems: |
6028 | // 1. When a function that tail calls another one is hijacked, the LR may be |
6029 | // stored at a different location in the stack frame of the tail call target. |
6030 | // So just by performing tail call, the hijacked location becomes invalid and |
6031 | // unhijacking would corrupt stack by writing to that location. |
6032 | // 2. There is a small window after the caller pops LR from the stack in its |
6033 | // epilog and before the tail called function pushes LR in its prolog when |
6034 | // the hijacked return address would not be not on the stack and so we would |
6035 | // not be able to unhijack. |
6036 | notJittedCase = true; |
6037 | } |
6038 | else |
6039 | { |
6040 | // This is the case of IP being inside the method body and LR is |
6041 | // pushed on the stack. We get it to determine the return address |
6042 | // in the caller of the current non-interruptible frame. |
6043 | pES->m_ppvRetAddrPtr = (void **) pRDT->pCallerContextPointers->Lr; |
6044 | } |
6045 | #elif defined(_TARGET_X86_) || defined(_TARGET_AMD64_) |
6046 | pES->m_ppvRetAddrPtr = (void **) (EECodeManager::GetCallerSp(pRDT) - sizeof(void*)); |
6047 | #else // _TARGET_X86_ || _TARGET_AMD64_ |
6048 | PORTABILITY_ASSERT("Platform NYI" ); |
6049 | #endif // _TARGET_???_ |
6050 | } |
6051 | #else // WIN64EXCEPTIONS |
6052 | // peel off the next frame to expose the return address on the stack |
6053 | pES->m_FirstPass = FALSE; |
6054 | action = SWA_CONTINUE; |
6055 | #endif // !WIN64EXCEPTIONS |
6056 | } |
6057 | #endif // HIJACK_NONINTERRUPTIBLE_THREADS |
6058 | } |
6059 | // else we are successfully out of here with SWA_ABORT |
6060 | } |
6061 | else |
6062 | { |
6063 | #ifdef _TARGET_X86_ |
6064 | STRESS_LOG2(LF_SYNC, LL_INFO1000, "Not in Jitted code at EIP = %p, &EIP = %p\n" , GetControlPC(pCF->GetRegisterSet()), pCF->GetRegisterSet()->PCTAddr); |
6065 | #else |
6066 | STRESS_LOG1(LF_SYNC, LL_INFO1000, "Not in Jitted code at pc = %p\n" , GetControlPC(pCF->GetRegisterSet())); |
6067 | #endif |
6068 | notJittedCase = true; |
6069 | } |
6070 | |
6071 | // Cases above may have set "notJITtedCase", which we handle as follows: |
6072 | if (notJittedCase) |
6073 | { |
6074 | pES->m_IsJIT = FALSE; |
6075 | #ifdef _DEBUG |
6076 | pES->m_pFD = (MethodDesc *)POISONC; |
6077 | pES->m_ppvRetAddrPtr = (void **)POISONC; |
6078 | pES->m_IsInterruptible = FALSE; |
6079 | #endif |
6080 | } |
6081 | } |
6082 | else |
6083 | { |
6084 | #if defined(_TARGET_X86_) && !defined(WIN64EXCEPTIONS) |
6085 | // Second pass, looking for the address of the return address so we can |
6086 | // hijack: |
6087 | |
6088 | PREGDISPLAY pRDT = pCF->GetRegisterSet(); |
6089 | |
6090 | if (pRDT != NULL) |
6091 | { |
6092 | // pPC points to the return address sitting on the stack, as our |
6093 | // current EIP for the penultimate stack frame. |
6094 | pES->m_ppvRetAddrPtr = (void **) pRDT->PCTAddr; |
6095 | |
6096 | STRESS_LOG2(LF_SYNC, LL_INFO1000, "Partially Int case hijack address = 0x%x val = 0x%x\n" , pES->m_ppvRetAddrPtr, *pES->m_ppvRetAddrPtr); |
6097 | } |
6098 | #else |
6099 | PORTABILITY_ASSERT("Platform NYI" ); |
6100 | #endif |
6101 | } |
6102 | |
6103 | return action; |
6104 | } |
6105 | |
6106 | HijackFrame::HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args) |
6107 | : m_ReturnAddress((TADDR)returnAddress), |
6108 | m_Thread(thread), |
6109 | m_Args(args) |
6110 | { |
6111 | CONTRACTL { |
6112 | NOTHROW; |
6113 | GC_NOTRIGGER; |
6114 | } |
6115 | CONTRACTL_END; |
6116 | |
6117 | _ASSERTE(m_Thread == GetThread()); |
6118 | |
6119 | m_Next = m_Thread->GetFrame(); |
6120 | m_Thread->SetFrame(this); |
6121 | } |
6122 | |
6123 | void STDCALL OnHijackWorker(HijackArgs * pArgs) |
6124 | { |
6125 | CONTRACTL{ |
6126 | THROWS; |
6127 | GC_TRIGGERS; |
6128 | SO_TOLERANT; |
6129 | } |
6130 | CONTRACTL_END; |
6131 | |
6132 | #ifdef HIJACK_NONINTERRUPTIBLE_THREADS |
6133 | Thread *thread = GetThread(); |
6134 | |
6135 | #ifdef FEATURE_STACK_PROBE |
6136 | if (GetEEPolicy()->GetActionOnFailure(FAIL_StackOverflow) == eRudeUnloadAppDomain) |
6137 | { |
6138 | // Make sure default domain does not see SO. |
6139 | // probe for our entry point amount and throw if not enough stack |
6140 | RetailStackProbe(ADJUST_PROBE(DEFAULT_ENTRY_PROBE_AMOUNT), thread); |
6141 | } |
6142 | #endif // FEATURE_STACK_PROBE |
6143 | |
6144 | CONTRACT_VIOLATION(SOToleranceViolation); |
6145 | |
6146 | thread->ResetThreadState(Thread::TS_Hijacked); |
6147 | |
6148 | // Fix up our caller's stack, so it can resume from the hijack correctly |
6149 | pArgs->ReturnAddress = (size_t)thread->m_pvHJRetAddr; |
6150 | |
6151 | // Build a frame so that stack crawling can proceed from here back to where |
6152 | // we will resume execution. |
6153 | FrameWithCookie<HijackFrame> frame((void *)pArgs->ReturnAddress, thread, pArgs); |
6154 | |
6155 | #ifdef _DEBUG |
6156 | BOOL GCOnTransition = FALSE; |
6157 | if (g_pConfig->FastGCStressLevel()) { |
6158 | GCOnTransition = GC_ON_TRANSITIONS(FALSE); |
6159 | } |
6160 | #endif // _DEBUG |
6161 | |
6162 | #ifdef TIME_SUSPEND |
6163 | g_SuspendStatistics.cntHijackTrap++; |
6164 | #endif // TIME_SUSPEND |
6165 | |
6166 | CommonTripThread(); |
6167 | |
6168 | #ifdef _DEBUG |
6169 | if (g_pConfig->FastGCStressLevel()) { |
6170 | GC_ON_TRANSITIONS(GCOnTransition); |
6171 | } |
6172 | #endif // _DEBUG |
6173 | |
6174 | frame.Pop(); |
6175 | #else |
6176 | PORTABILITY_ASSERT("OnHijackWorker not implemented on this platform." ); |
6177 | #endif // HIJACK_NONINTERRUPTIBLE_THREADS |
6178 | } |
6179 | |
6180 | ReturnKind GetReturnKindFromMethodTable(Thread *pThread, EECodeInfo *codeInfo) |
6181 | { |
6182 | #ifdef _WIN64 |
6183 | // For simplicity, we don't hijack in funclets, but if you ever change that, |
6184 | // be sure to choose the OnHijack... callback type to match that of the FUNCLET |
6185 | // not the main method (it would probably be Scalar). |
6186 | #endif // _WIN64 |
6187 | |
6188 | ENABLE_FORBID_GC_LOADER_USE_IN_THIS_SCOPE(); |
6189 | // Mark that we are performing a stackwalker like operation on the current thread. |
6190 | // This is necessary to allow the signature parsing functions to work without triggering any loads |
6191 | ClrFlsValueSwitch threadStackWalking(TlsIdx_StackWalkerWalkingThread, pThread); |
6192 | |
6193 | MethodDesc *methodDesc = codeInfo->GetMethodDesc(); |
6194 | _ASSERTE(methodDesc != nullptr); |
6195 | |
6196 | #ifdef _TARGET_X86_ |
6197 | MetaSig msig(methodDesc); |
6198 | if (msig.HasFPReturn()) |
6199 | { |
6200 | // Figuring out whether the function returns FP or not is hard to do |
6201 | // on-the-fly, so we use a different callback helper on x86 where this |
6202 | // piece of information is needed in order to perform the right save & |
6203 | // restore of the return value around the call to OnHijackScalarWorker. |
6204 | return RT_Float; |
6205 | } |
6206 | #endif // _TARGET_X86_ |
6207 | |
6208 | MethodTable* pMT = NULL; |
6209 | MetaSig::RETURNTYPE type = methodDesc->ReturnsObject(INDEBUG_COMMA(false) &pMT); |
6210 | if (type == MetaSig::RETOBJ) |
6211 | { |
6212 | return RT_Object; |
6213 | } |
6214 | |
6215 | if (type == MetaSig::RETBYREF) |
6216 | { |
6217 | return RT_ByRef; |
6218 | } |
6219 | |
6220 | #ifdef UNIX_AMD64_ABI |
6221 | // The Multi-reg return case using the classhandle is only implemented for AMD64 SystemV ABI. |
6222 | // On other platforms, multi-reg return is not supported with GcInfo v1. |
6223 | // So, the relevant information must be obtained from the GcInfo tables (which requires version2). |
6224 | if (type == MetaSig::RETVALUETYPE) |
6225 | { |
6226 | EEClass *eeClass = pMT->GetClass(); |
6227 | ReturnKind regKinds[2] = { RT_Unset, RT_Unset }; |
6228 | int orefCount = 0; |
6229 | for (int i = 0; i < 2; i++) |
6230 | { |
6231 | if (eeClass->GetEightByteClassification(i) == SystemVClassificationTypeIntegerReference) |
6232 | { |
6233 | regKinds[i] = RT_Object; |
6234 | } |
6235 | else if (eeClass->GetEightByteClassification(i) == SystemVClassificationTypeIntegerByRef) |
6236 | { |
6237 | regKinds[i] = RT_ByRef; |
6238 | } |
6239 | else |
6240 | { |
6241 | regKinds[i] = RT_Scalar; |
6242 | } |
6243 | } |
6244 | ReturnKind structReturnKind = GetStructReturnKind(regKinds[0], regKinds[1]); |
6245 | return structReturnKind; |
6246 | } |
6247 | #endif // UNIX_AMD64_ABI |
6248 | |
6249 | return RT_Scalar; |
6250 | } |
6251 | |
6252 | ReturnKind GetReturnKind(Thread *pThread, EECodeInfo *codeInfo) |
6253 | { |
6254 | GCInfoToken gcInfoToken = codeInfo->GetGCInfoToken(); |
6255 | ReturnKind returnKind = codeInfo->GetCodeManager()->GetReturnKind(gcInfoToken); |
6256 | |
6257 | if (!IsValidReturnKind(returnKind)) |
6258 | { |
6259 | returnKind = GetReturnKindFromMethodTable(pThread, codeInfo); |
6260 | } |
6261 | else |
6262 | { |
6263 | #if !defined(FEATURE_MULTIREG_RETURN) || defined(UNIX_AMD64_ABI) |
6264 | // For ARM64 struct-return, GetReturnKindFromMethodTable() is not supported |
6265 | _ASSERTE(returnKind == GetReturnKindFromMethodTable(pThread, codeInfo)); |
6266 | #endif // !FEATURE_MULTIREG_RETURN || UNIX_AMD64_ABI |
6267 | } |
6268 | |
6269 | _ASSERTE(IsValidReturnKind(returnKind)); |
6270 | return returnKind; |
6271 | } |
6272 | |
6273 | VOID * GetHijackAddr(Thread *pThread, EECodeInfo *codeInfo) |
6274 | { |
6275 | ReturnKind returnKind = GetReturnKind(pThread, codeInfo); |
6276 | pThread->SetHijackReturnKind(returnKind); |
6277 | |
6278 | #ifdef _TARGET_X86_ |
6279 | if (returnKind == RT_Float) |
6280 | { |
6281 | return reinterpret_cast<VOID *>(OnHijackFPTripThread); |
6282 | } |
6283 | #endif // _TARGET_X86_ |
6284 | |
6285 | return reinterpret_cast<VOID *>(OnHijackTripThread); |
6286 | } |
6287 | |
6288 | #ifndef PLATFORM_UNIX |
6289 | |
6290 | // Get the ExecutionState for the specified SwitchIn thread. Note that this is |
6291 | // a 'StackWalk' call back (PSTACKWALKFRAMESCALLBACK). |
6292 | StackWalkAction SWCB_GetExecutionStateForSwitchIn(CrawlFrame *pCF, VOID *pData) |
6293 | { |
6294 | CONTRACTL { |
6295 | NOTHROW; |
6296 | GC_NOTRIGGER; |
6297 | } |
6298 | CONTRACTL_END; |
6299 | |
6300 | ExecutionState *pES = (ExecutionState *) pData; |
6301 | StackWalkAction action = SWA_CONTINUE; |
6302 | |
6303 | if (pES->m_FirstPass) { |
6304 | if (pCF->IsFrameless()) { |
6305 | #ifdef _TARGET_X86_ |
6306 | pES->m_FirstPass = FALSE; |
6307 | #else |
6308 | _ASSERTE(!"Platform NYI" ); |
6309 | #endif |
6310 | |
6311 | pES->m_IsJIT = TRUE; |
6312 | pES->m_pFD = pCF->GetFunction(); |
6313 | pES->m_MethodToken = pCF->GetMethodToken(); |
6314 | // We do not care if the code is interruptible |
6315 | pES->m_IsInterruptible = FALSE; |
6316 | pES->m_RelOffset = pCF->GetRelOffset(); |
6317 | pES->m_pJitManager = pCF->GetJitManager(); |
6318 | } |
6319 | } |
6320 | else { |
6321 | #ifdef _TARGET_X86_ |
6322 | if (pCF->IsFrameless()) { |
6323 | PREGDISPLAY pRDT = pCF->GetRegisterSet(); |
6324 | if (pRDT) { |
6325 | // pPC points to the return address sitting on the stack, as our |
6326 | // current EIP for the penultimate stack frame. |
6327 | pES->m_ppvRetAddrPtr = (void **) pRDT->PCTAddr; |
6328 | action = SWA_ABORT; |
6329 | } |
6330 | } |
6331 | #else |
6332 | _ASSERTE(!"Platform NYI" ); |
6333 | #endif |
6334 | } |
6335 | return action; |
6336 | } |
6337 | |
6338 | // |
6339 | // The function below, ThreadCaughtInKernelModeExceptionHandling, exists to detect and work around a very subtle |
6340 | // race that we have when we suspend a thread while that thread is in the kernel handling an exception. |
6341 | // |
6342 | // When a user-mode thread takes an exception, the OS must get involved to handle that exception before user-mode |
6343 | // exception handling takes place. The exception causes the thread to enter kernel-mode. To handle the exception, |
6344 | // the kernel does the following: 1) pushes a CONTEXT, then an EXCEPTION_RECORD, and finally an EXCEPTION_POINTERS |
6345 | // struct onto the thread's user-mode stack. 2) the Esp value in the thread's user-mode context is updated to |
6346 | // reflect the fact that these structures have just been pushed. 3) some segment registers in the user-mode context |
6347 | // are modified. 4) the Eip value in the user-mode context is changed to point to the user-mode exception dispatch |
6348 | // routine. 5) the kernel resumes user-mode execution with the altered context. |
6349 | // |
6350 | // Note that during this entire process: 1) the thread can be suspeded by another user-mode thread, and 2) |
6351 | // Get/SetThreadContext all operate on the user-mode context. |
6352 | // |
6353 | // There are two important races to consider here: a race with attempting to hijack the thread in HandledJITCase, |
6354 | // and a race attempting to trace the thread's stack in HandledJITCase. |
6355 | // |
6356 | // |
6357 | // Race #1: failure to hijack a thread in HandledJITCase. |
6358 | // |
6359 | // In HandledJITCase, if we see that a thread's Eip is in managed code at an interruptable point, we will attempt |
6360 | // to move the thread to a hijack in order to stop it's execution for a variety of reasons (GC, debugger, user-mode |
6361 | // supension, etc.) We do this by suspending the thread, inspecting Eip, changing Eip to the address of the hijack |
6362 | // routine, and resuming the thread. |
6363 | // |
6364 | // The problem here is that in step #4 above, the kernel is going to change Eip in the thread's context to point to |
6365 | // the user-mode exception dispatch routine. If we suspend a thread when it has taken an exception in managed code, |
6366 | // we may see Eip pointing to managed code and attempt to hijack the thread. When we resume the thread, step #4 |
6367 | // will eventually execute and the thread will go to the user-mode exception dispatch routine instead of to our |
6368 | // hijack. |
6369 | // |
6370 | // We tollerate this by recgonizing that this has happened when we arrive in our exception handler |
6371 | // (COMPlusFrameHandler), and we fix up the IP in the context passed to the handler. |
6372 | // |
6373 | // |
6374 | // Race #2: inability to trace a managed call stack |
6375 | // |
6376 | // If we suspend a thread after step #2 above, but before step #4, then we will see an Eip pointing to managed |
6377 | // code, but an Esp that points to the newly pushed exception structures. If we are in a managed function that does |
6378 | // not have an Ebp frame, the return address will be relative to Esp and we will not be able to resolve the return |
6379 | // address properly. Since we will attempt to place a return address hijack (as part of our heroic efforts to trap |
6380 | // the thread quickly), we may end up writing over random memory with our hijack. This is obviously extremely |
6381 | // bad. Realistically, any attempt to trace a thread's stack in this case is suspect, even if the mangaed function |
6382 | // has a EBP frame. |
6383 | // |
6384 | // The solution is to attempt to detect this race and abandon the hijack attempt. We have developed the following |
6385 | // heuristic to detect this case. Basically, we look to see if Esp points to an EXCEPTION_POINTERS structure, and |
6386 | // that this structure points to valid EXCEPTION_RECORD and CONTEXT structures. They must be ordered on the stack, |
6387 | // and the faulting address in the EXCEPTION_RECORD should be the thread's current Eip, and the Eip in the CONTEXT |
6388 | // should be the thread's current Eip. |
6389 | // |
6390 | // This is the heuristic codified. Given Eip and Esp from the thread's current context: |
6391 | // |
6392 | // 1. if Eip points to a managed function, and... |
6393 | // 2. the pointer at Esp is equal to Esp + sizeof(EXCEPTION_POINTERS), and... |
6394 | // 3. the faulting address in the EXCEPTION_RECORD at that location is equal to the current Eip, and... |
6395 | // 4. the NumberParameters field in the EXCEPTION_RECORD is valid (between 0 and EXCEPTION_MAXIMUM_PARAMETERS), and... |
6396 | // 5. the pointer at Esp + 4 is equal to Esp + sizeof(EXCEPTION_POINTERS) + the dynamic size of the EXCEPTION_RECORD, and... |
6397 | // 6. the Eip value of the CONTEXT at that location is equal to the current Eip, then we have recgonized the race. |
6398 | // |
6399 | // The validation of Eip in both places, combined with ensuring that the pointer values are on the thread's stack |
6400 | // make this a safe heuristic to evaluate. Even if one could end up in a function with the stack looking exactly |
6401 | // like this, and even if we are trying to suspend such a thread and we catch it at the Eip that matches the values |
6402 | // at the proper places on the stack, then the worst that will happen is we won't attempt to hijack the thread at |
6403 | // that point. We'll resume it and try again later. There will be at least one other instruction in the function |
6404 | // that is not at the Eip value on the stack, and we'll be able to trace the thread's stack from that instruction |
6405 | // and place the return address hijack. |
6406 | // |
6407 | // As races go, race #1 above is very, very easy to hit. We hit it in the wild before we shipped V1, and a simple |
6408 | // test program with one thread constantly AV'ing and another thread attempting to suspend the first thread every |
6409 | // half second hit's the race almost instantly. |
6410 | // |
6411 | // Race #2 is extremely rare in comparison. The same program properly instrumented only hits the race about 5 times |
6412 | // every 2000 attempts or so. We did not hit this even in very stressful exception tests and |
6413 | // it's never been seen in the wild. |
6414 | // |
6415 | // Note: a new feature has been added in recent OS's that allows us to detect both of these races with a simple |
6416 | // call to GetThreadContext. This feature exists on all Win64 platforms, so this change is only for 32-bit |
6417 | // platforms. We've asked for this fix to be applied to future 32-bit OS's, so we can remove this on those |
6418 | // platforms when that happens. Furthermore, once we stop supporting the older 32-bit OS versions that don't have |
6419 | // the new feature, we can remove these altogether. |
6420 | // |
6421 | // WARNING: Interrupts (int 3) immediately increment the IP whereas traps (AVs) do not. |
6422 | // So this heuristic only works for trap, but not for interrupts. As a result, the race |
6423 | // is still a problem for interrupts. This means that the race can cause a process crash |
6424 | // if the managed debugger puts an "int 3" in order to do a stepping operation, |
6425 | // and GC or a sampling profiler tries to suspend the thread. This can be handled |
6426 | // by modifying the code below to additionally check if the instruction just before |
6427 | // the IP is an "int 3". |
6428 | // |
6429 | |
6430 | #ifdef _TARGET_X86_ |
6431 | |
6432 | #ifndef FEATURE_PAL |
6433 | #define WORKAROUND_RACES_WITH_KERNEL_MODE_EXCEPTION_HANDLING |
6434 | #endif // !FEATURE_PAL |
6435 | |
6436 | #ifdef WORKAROUND_RACES_WITH_KERNEL_MODE_EXCEPTION_HANDLING |
6437 | BOOL ThreadCaughtInKernelModeExceptionHandling(Thread *pThread, CONTEXT *ctx) |
6438 | { |
6439 | CONTRACTL |
6440 | { |
6441 | NOTHROW; |
6442 | GC_NOTRIGGER; |
6443 | MODE_ANY; |
6444 | PRECONDITION(pThread != NULL); |
6445 | PRECONDITION(ctx != NULL); |
6446 | } |
6447 | CONTRACTL_END; |
6448 | |
6449 | // Validate that Esp plus all of our maximum structure sizes is on the thread's stack. We use the cached bounds |
6450 | // on the Thread object. If we're that close to the top of the thread's stack, then we can't possibly have hit |
6451 | // the race. If we pass this test, we can assume all memory accesses below are legal, since they're all on the |
6452 | // thread's stack. |
6453 | if ((ctx->Esp + sizeof(EXCEPTION_POINTERS) + sizeof(EXCEPTION_RECORD) + sizeof(CONTEXT)) >= |
6454 | (UINT_PTR)pThread->GetCachedStackBase()) |
6455 | { |
6456 | return FALSE; |
6457 | } |
6458 | |
6459 | // The calculations below assume that a DWORD is the same size as a pointer. Since this is only needed on |
6460 | // 32-bit platforms, this should be fine. |
6461 | _ASSERTE(sizeof(DWORD) == sizeof(void*)); |
6462 | |
6463 | // There are cases where the ESP is just decremented but the page is not touched, thus the page is not commited or |
6464 | // still has page guard bit set. We can't hit the race in such case so we just leave. Besides, we can't access the |
6465 | // memory with page guard flag or not committed. |
6466 | MEMORY_BASIC_INFORMATION mbi; |
6467 | #undef VirtualQuery |
6468 | // This code can run below YieldTask, which means that it must not call back into the host. |
6469 | // The reason is that YieldTask is invoked by the host, and the host needs not be reentrant. |
6470 | if (VirtualQuery((LPCVOID)(UINT_PTR)ctx->Esp, &mbi, sizeof(mbi)) == sizeof(mbi)) |
6471 | { |
6472 | if (!(mbi.State & MEM_COMMIT) || (mbi.Protect & PAGE_GUARD)) |
6473 | return FALSE; |
6474 | } |
6475 | else |
6476 | STRESS_LOG0 (LF_SYNC, ERROR, "VirtualQuery failed!" ); |
6477 | #define VirtualQuery(lpAddress, lpBuffer, dwLength) Dont_Use_VirtualQuery(lpAddress, lpBuffer, dwLength) |
6478 | |
6479 | // The first two values on the stack should be a pointer to the EXCEPTION_RECORD and a pointer to the CONTEXT. |
6480 | UINT_PTR Esp = (UINT_PTR)ctx->Esp; |
6481 | UINT_PTR ER = *((UINT_PTR*)Esp); |
6482 | UINT_PTR CTX = *((UINT_PTR*)(Esp + sizeof(EXCEPTION_RECORD*))); |
6483 | |
6484 | // The EXCEPTION_RECORD should be at Esp + sizeof(EXCEPTION_POINTERS)... if it's not, then we haven't hit the race. |
6485 | if (ER != (Esp + sizeof(EXCEPTION_POINTERS))) |
6486 | { |
6487 | return FALSE; |
6488 | } |
6489 | |
6490 | // Assume we have an EXCEPTION_RECORD at Esp + sizeof(EXCEPTION_POINTERS) and look at values within that. |
6491 | EXCEPTION_RECORD *pER = (EXCEPTION_RECORD*)ER; |
6492 | |
6493 | // Make sure the faulting address in the EXCEPTION_RECORD matches the thread's current Eip. |
6494 | if ((UINT_PTR)pER->ExceptionAddress != ctx->Eip) |
6495 | { |
6496 | return FALSE; |
6497 | } |
6498 | |
6499 | // Validate the number of exception parameters. |
6500 | if ((pER->NumberParameters > EXCEPTION_MAXIMUM_PARAMETERS)) |
6501 | { |
6502 | return FALSE; |
6503 | } |
6504 | |
6505 | // We have a plausable number of exception parameters, so compute the exact size of this exception |
6506 | // record. Remember, an EXCEPTION_RECORD has a variable sized array of optional information at the end called |
6507 | // the ExceptionInformation. It's an array of pointers up to EXCEPTION_MAXIMUM_PARAMETERS in length. |
6508 | DWORD exceptionRecordSize = sizeof(EXCEPTION_RECORD) - |
6509 | ((EXCEPTION_MAXIMUM_PARAMETERS - pER->NumberParameters) * sizeof(pER->ExceptionInformation[0])); |
6510 | |
6511 | // On Vista WOW on X64, the OS pushes the maximum number of parameters onto the stack. |
6512 | DWORD exceptionRecordMaxSize = sizeof(EXCEPTION_RECORD); |
6513 | |
6514 | // The CONTEXT pointer should be pointing right after the EXCEPTION_RECORD. |
6515 | if ((CTX != (ER + exceptionRecordSize)) && |
6516 | (CTX != (ER + exceptionRecordMaxSize))) |
6517 | { |
6518 | return FALSE; |
6519 | } |
6520 | |
6521 | // Assume we have a CONTEXT at Esp + 8 + exceptionRecordSize and look at values within that. |
6522 | CONTEXT *pCTX = (CONTEXT*)CTX; |
6523 | |
6524 | // Make sure the Eip in the CONTEXT on the stack matches the current Eip value. |
6525 | if (pCTX->Eip != ctx->Eip) |
6526 | { |
6527 | return FALSE; |
6528 | } |
6529 | |
6530 | // If all the tests above fail, then it means that we've hit race #2 described in the text before this function. |
6531 | STRESS_LOG3(LF_SYNC, LL_INFO100, |
6532 | "ThreadCaughtInKernelModeExceptionHandling returning TRUE. Eip=%p, Esp=%p, ExceptionCode=%p\n" , |
6533 | ctx->Eip, ctx->Esp, pER->ExceptionCode); |
6534 | |
6535 | return TRUE; |
6536 | } |
6537 | #endif //WORKAROUND_RACES_WITH_KERNEL_MODE_EXCEPTION_HANDLING |
6538 | #endif //_TARGET_X86_ |
6539 | |
6540 | //--------------------------------------------------------------------------------------- |
6541 | // |
6542 | // Helper used by HandledJITCase and others (like the profiling API) who need an |
6543 | // absolutely reliable register context. |
6544 | // |
6545 | // Arguments: |
6546 | // * dwOptions - [in] Combination of flags from enum |
6547 | // GetSafelyRedirectableThreadContextOptions to customize the checks performed by |
6548 | // this function. |
6549 | // * pCtx - [out] This Thread's current context. Callers may rely on this only if nonzero |
6550 | // is returned |
6551 | // * pRD - [out] Matching REGDISPLAY filled from the pCtx found by this function. |
6552 | // Callers may rely on this only if nonzero is returned |
6553 | // |
6554 | // Return Value: |
6555 | // Nonzero iff all requested checks have succeeded, which would imply that it is |
6556 | // a reliable time to use this Thread's context. |
6557 | // |
6558 | BOOL Thread::GetSafelyRedirectableThreadContext(DWORD dwOptions, CONTEXT * pCtx, REGDISPLAY * pRD) |
6559 | { |
6560 | CONTRACTL { |
6561 | NOTHROW; |
6562 | GC_NOTRIGGER; |
6563 | } |
6564 | CONTRACTL_END; |
6565 | |
6566 | _ASSERTE(pCtx != NULL); |
6567 | _ASSERTE(pRD != NULL); |
6568 | |
6569 | // We are never in interruptible code if there if a filter context put in place by the debugger. |
6570 | if (GetFilterContext() != NULL) |
6571 | return FALSE; |
6572 | |
6573 | #ifdef DEBUGGING_SUPPORTED |
6574 | if ((dwOptions & kCheckDebuggerBreakpoints) != 0) |
6575 | { |
6576 | // If we are running under the control of a managed debugger that may have placed breakpoints in the code stream, |
6577 | // then there is a special case that we need to check. See the comments in debugger.cpp for more information. |
6578 | if (CORDebuggerAttached() && (g_pDebugInterface->IsThreadContextInvalid(this))) |
6579 | return FALSE; |
6580 | } |
6581 | #endif // DEBUGGING_SUPPORTED |
6582 | |
6583 | // Make sure we specify CONTEXT_EXCEPTION_REQUEST to detect "trap frame reporting". |
6584 | _ASSERTE(GetFilterContext() == NULL); |
6585 | |
6586 | ZeroMemory(pCtx, sizeof(*pCtx)); |
6587 | pCtx->ContextFlags = CONTEXT_FULL | CONTEXT_EXCEPTION_REQUEST; |
6588 | if (!EEGetThreadContext(this, pCtx)) |
6589 | { |
6590 | return FALSE; |
6591 | } |
6592 | |
6593 | // |
6594 | // workaround around WOW64 problems. Only do this workaround if a) this is x86, and b) the OS does not support trap frame reporting, |
6595 | // If the OS *does* support trap frame reporting, then the call to IsContextSafeToRedirect below will return FALSE if we run |
6596 | // into this race. |
6597 | // |
6598 | #ifdef _TARGET_X86_ |
6599 | if (!(pCtx->ContextFlags & CONTEXT_EXCEPTION_REPORTING) && |
6600 | ((dwOptions & kPerfomLastRedirectIPCheck) != 0)) |
6601 | { |
6602 | // This code fixes a race between GetThreadContext and NtContinue. If we redirect managed code |
6603 | // at the same place twice in a row, we run the risk of reading a bogus CONTEXT when we redirect |
6604 | // the second time. This leads to access violations on x86 machines. To fix the problem, we |
6605 | // never redirect at the same instruction pointer that we redirected at on the previous GC. |
6606 | if (GetIP(pCtx) == m_LastRedirectIP) |
6607 | { |
6608 | // We need to test for an infinite loop in assembly, as this will break the heuristic we |
6609 | // are using. |
6610 | const BYTE short_jmp = 0xeb; // Machine code for a short jump. |
6611 | const BYTE self = 0xfe; // -2. Short jumps are calculated as [ip]+2+[second_byte]. |
6612 | |
6613 | // If we find that we are in an infinite loop, we'll set the last redirected IP to 0 so that we will |
6614 | // redirect the next time we attempt it. Delaying one interation allows us to narrow the window of |
6615 | // the race we are working around in this corner case. |
6616 | BYTE *ip = (BYTE *)m_LastRedirectIP; |
6617 | if (ip[0] == short_jmp && ip[1] == self) |
6618 | m_LastRedirectIP = 0; |
6619 | |
6620 | // We set a hard limit of 5 times we will spin on this to avoid any tricky race which we have not |
6621 | // accounted for. |
6622 | m_SpinCount++; |
6623 | if (m_SpinCount >= 5) |
6624 | m_LastRedirectIP = 0; |
6625 | |
6626 | STRESS_LOG0(LF_GC, LL_INFO10000, "GetSafelyRedirectableThreadContext() - Cannot redirect at the same IP as the last redirection.\n" ); |
6627 | return FALSE; |
6628 | } |
6629 | } |
6630 | #endif |
6631 | |
6632 | if (!IsContextSafeToRedirect(pCtx)) |
6633 | { |
6634 | STRESS_LOG0(LF_GC, LL_INFO10000, "GetSafelyRedirectableThreadContext() - trap frame reporting an invalid CONTEXT\n" ); |
6635 | return FALSE; |
6636 | } |
6637 | |
6638 | ZeroMemory(pRD, sizeof(*pRD)); |
6639 | if (!InitRegDisplay(pRD, pCtx, true)) |
6640 | return FALSE; |
6641 | |
6642 | return TRUE; |
6643 | } |
6644 | |
6645 | // Called while the thread is suspended. If we aren't in JITted code, this isn't |
6646 | // a JITCase and we return FALSE. If it is a JIT case and we are in interruptible |
6647 | // code, then we are handled. Our caller has found a good spot and can keep us |
6648 | // suspended. If we aren't in interruptible code, then we aren't handled. So we |
6649 | // pick a spot to hijack the return address and our caller will wait to get us |
6650 | // somewhere safe. |
6651 | BOOL Thread::HandledJITCase(BOOL ForTaskSwitchIn) |
6652 | { |
6653 | CONTRACTL { |
6654 | NOTHROW; |
6655 | GC_NOTRIGGER; |
6656 | } |
6657 | CONTRACTL_END; |
6658 | |
6659 | BOOL ret = FALSE; |
6660 | ExecutionState esb; |
6661 | StackWalkAction action; |
6662 | |
6663 | CONTEXT ctx; |
6664 | REGDISPLAY rd; |
6665 | if (!GetSafelyRedirectableThreadContext( |
6666 | kPerfomLastRedirectIPCheck | kCheckDebuggerBreakpoints, |
6667 | &ctx, |
6668 | &rd)) |
6669 | { |
6670 | STRESS_LOG0(LF_GC, LL_INFO10000, "HandledJITCase() - GetSafelyRedirectableThreadContext() returned FALSE\n" ); |
6671 | return FALSE; |
6672 | } |
6673 | |
6674 | PCODE ip = GetIP(&ctx); |
6675 | if (!ExecutionManager::IsManagedCode(ip)) |
6676 | { |
6677 | return FALSE; |
6678 | } |
6679 | |
6680 | #ifdef WORKAROUND_RACES_WITH_KERNEL_MODE_EXCEPTION_HANDLING |
6681 | if (ThreadCaughtInKernelModeExceptionHandling(this, &ctx)) |
6682 | { |
6683 | return FALSE; |
6684 | } |
6685 | #endif //WORKAROUND_RACES_WITH_KERNEL_MODE_EXCEPTION_HANDLING |
6686 | |
6687 | #ifdef _DEBUG |
6688 | // We know IP is in managed code, mark current thread as safe for calls into host |
6689 | Thread * pCurThread = GetThread(); |
6690 | if (pCurThread != NULL) |
6691 | { |
6692 | pCurThread->dbg_m_cSuspendedThreadsWithoutOSLock ++; |
6693 | _ASSERTE(pCurThread->dbg_m_cSuspendedThreadsWithoutOSLock <= pCurThread->dbg_m_cSuspendedThreads); |
6694 | } |
6695 | #endif //_DEBUG |
6696 | |
6697 | // Walk one or two frames of the stack... |
6698 | if (ForTaskSwitchIn) { |
6699 | action = StackWalkFramesEx(&rd,SWCB_GetExecutionStateForSwitchIn, &esb, QUICKUNWIND | DISABLE_MISSING_FRAME_DETECTION | THREAD_IS_SUSPENDED | ALLOW_ASYNC_STACK_WALK, NULL); |
6700 | } |
6701 | else { |
6702 | #ifdef TIME_SUSPEND |
6703 | DWORD startCrawl = g_SuspendStatistics.GetTime(); |
6704 | #endif |
6705 | action = StackWalkFramesEx(&rd,SWCB_GetExecutionState, &esb, |
6706 | QUICKUNWIND | DISABLE_MISSING_FRAME_DETECTION | |
6707 | THREAD_IS_SUSPENDED | ALLOW_ASYNC_STACK_WALK, NULL); |
6708 | |
6709 | #ifdef TIME_SUSPEND |
6710 | g_SuspendStatistics.crawl.Accumulate( |
6711 | SuspendStatistics::GetElapsed(startCrawl, |
6712 | g_SuspendStatistics.GetTime())); |
6713 | |
6714 | g_SuspendStatistics.cntHijackCrawl++; |
6715 | #endif |
6716 | } |
6717 | |
6718 | // |
6719 | // action should either be SWA_ABORT, in which case we properly walked |
6720 | // the stack frame and found out whether this is a JIT case, or |
6721 | // SWA_FAILED, in which case the walk couldn't even be started because |
6722 | // there are no stack frames, which also isn't a JIT case. |
6723 | // |
6724 | if (action == SWA_ABORT && esb.m_IsJIT) |
6725 | { |
6726 | // If we are interruptible and we are in cooperative mode, our caller can |
6727 | // just leave us suspended. |
6728 | if (esb.m_IsInterruptible && m_fPreemptiveGCDisabled) |
6729 | { |
6730 | _ASSERTE(!ThreadStore::HoldingThreadStore(this)); |
6731 | ret = TRUE; |
6732 | } |
6733 | else |
6734 | if (esb.m_ppvRetAddrPtr) |
6735 | { |
6736 | // we need to hijack the return address. Base this on whether or not |
6737 | // the method returns an object reference, so we know whether to protect |
6738 | // it or not. |
6739 | EECodeInfo codeInfo(ip); |
6740 | VOID *pvHijackAddr = GetHijackAddr(this, &codeInfo); |
6741 | |
6742 | #ifdef FEATURE_ENABLE_GCPOLL |
6743 | // On platforms that support both hijacking and GC polling |
6744 | // decide whether to hijack based on a configuration value. |
6745 | // COMPlus_GCPollType = 1 is the setting that enables hijacking |
6746 | // in GCPOLL enabled builds. |
6747 | EEConfig::GCPollType pollType = g_pConfig->GetGCPollType(); |
6748 | if (EEConfig::GCPOLL_TYPE_HIJACK == pollType || EEConfig::GCPOLL_TYPE_DEFAULT == pollType) |
6749 | #endif // FEATURE_ENABLE_GCPOLL |
6750 | { |
6751 | HijackThread(pvHijackAddr, &esb); |
6752 | } |
6753 | } |
6754 | } |
6755 | // else it's not even a JIT case |
6756 | |
6757 | #ifdef _DEBUG |
6758 | // Restore back the number of threads without OS lock |
6759 | if (pCurThread != NULL) |
6760 | { |
6761 | pCurThread->dbg_m_cSuspendedThreadsWithoutOSLock--; |
6762 | } |
6763 | #endif //_DEBUG |
6764 | |
6765 | STRESS_LOG1(LF_SYNC, LL_INFO10000, " HandledJitCase returning %d\n" , ret); |
6766 | return ret; |
6767 | } |
6768 | |
6769 | #endif // !PLATFORM_UNIX |
6770 | |
6771 | #endif // FEATURE_HIJACK |
6772 | |
6773 | // Some simple helpers to keep track of the threads we are waiting for |
6774 | void Thread::MarkForSuspension(ULONG bit) |
6775 | { |
6776 | CONTRACTL { |
6777 | NOTHROW; |
6778 | GC_NOTRIGGER; |
6779 | } |
6780 | CONTRACTL_END; |
6781 | |
6782 | // CoreCLR does not support user-requested thread suspension |
6783 | _ASSERTE(bit == TS_DebugSuspendPending || |
6784 | bit == (TS_DebugSuspendPending | TS_DebugWillSync)); |
6785 | |
6786 | _ASSERTE(IsAtProcessExit() || ThreadStore::HoldingThreadStore()); |
6787 | |
6788 | _ASSERTE((m_State & bit) == 0); |
6789 | |
6790 | FastInterlockOr((ULONG *) &m_State, bit); |
6791 | ThreadStore::TrapReturningThreads(TRUE); |
6792 | } |
6793 | |
6794 | void Thread::UnmarkForSuspension(ULONG mask) |
6795 | { |
6796 | CONTRACTL { |
6797 | NOTHROW; |
6798 | GC_NOTRIGGER; |
6799 | } |
6800 | CONTRACTL_END; |
6801 | |
6802 | // CoreCLR does not support user-requested thread suspension |
6803 | _ASSERTE(mask == ~TS_DebugSuspendPending); |
6804 | |
6805 | _ASSERTE(IsAtProcessExit() || ThreadStore::HoldingThreadStore()); |
6806 | |
6807 | _ASSERTE((m_State & ~mask) != 0); |
6808 | |
6809 | // we decrement the global first to be able to satisfy the assert from DbgFindThread |
6810 | ThreadStore::TrapReturningThreads(FALSE); |
6811 | FastInterlockAnd((ULONG *) &m_State, mask); |
6812 | } |
6813 | |
6814 | //---------------------------------------------------------------------------- |
6815 | |
6816 | void ThreadSuspend::RestartEE(BOOL bFinishedGC, BOOL SuspendSucceded) |
6817 | { |
6818 | #ifdef TIME_SUSPEND |
6819 | g_SuspendStatistics.StartRestart(); |
6820 | #endif //TIME_SUSPEND |
6821 | |
6822 | FireEtwGCRestartEEBegin_V1(GetClrInstanceId()); |
6823 | |
6824 | // |
6825 | // SyncClean holds a list of things to be cleaned up when it's possible. |
6826 | // SyncClean uses the GC mode to synchronize access to this list. Threads must be |
6827 | // in COOP mode to add things to the list, and the list can only be cleaned up |
6828 | // while no threads are adding things. |
6829 | // Since we know that no threads are in COOP mode at this point (because the EE is |
6830 | // suspended), we clean up the list here. |
6831 | // |
6832 | SyncClean::CleanUp(); |
6833 | |
6834 | #ifdef PROFILING_SUPPORTED |
6835 | // If a profiler is keeping track suspend events, notify it. This notification |
6836 | // must happen before we set TrapReturning threads to FALSE because as soon as |
6837 | // we remove the return trap threads can start "running" managed code again as |
6838 | // they return from unmanaged. (Whidbey Bug #7505) |
6839 | // Also must notify before setting GcInProgress = FALSE. |
6840 | // |
6841 | // It's very odd that we do this here, in ThreadSuspend::RestartEE, while the |
6842 | // corresponding call to RuntimeSuspendStarted is done at a lower architectural layer, |
6843 | // in ThreadSuspend::SuspendRuntime. |
6844 | { |
6845 | BEGIN_PIN_PROFILER(CORProfilerTrackSuspends()); |
6846 | g_profControlBlock.pProfInterface->RuntimeResumeStarted(); |
6847 | END_PIN_PROFILER(); |
6848 | } |
6849 | #endif // PROFILING_SUPPORTED |
6850 | |
6851 | // |
6852 | // Unhijack all threads, and reset their "suspend pending" flags. Why isn't this in |
6853 | // Thread::ResumeRuntime? |
6854 | // |
6855 | Thread *thread = NULL; |
6856 | while ((thread = ThreadStore::GetThreadList(thread)) != NULL) |
6857 | { |
6858 | thread->PrepareForEERestart(SuspendSucceded); |
6859 | } |
6860 | |
6861 | // |
6862 | // Revert to being a normal thread |
6863 | // |
6864 | ClrFlsClearThreadType (ThreadType_DynamicSuspendEE); |
6865 | GCHeapUtilities::GetGCHeap()->SetGCInProgress(false); |
6866 | |
6867 | // |
6868 | // Allow threads to enter COOP mode (though we still need to wake the ones |
6869 | // that we hijacked). |
6870 | // |
6871 | // Note: this is the last barrier that keeps managed threads |
6872 | // from entering cooperative mode. If the sequence changes, |
6873 | // you may have to change routine GCHeapUtilities::SafeToRestartManagedThreads |
6874 | // as well. |
6875 | // |
6876 | ThreadStore::TrapReturningThreads(FALSE); |
6877 | g_pSuspensionThread = 0; |
6878 | |
6879 | // |
6880 | // Any threads that are waiting in WaitUntilGCComplete will continue now. |
6881 | // |
6882 | GCHeapUtilities::GetGCHeap()->SetWaitForGCEvent(); |
6883 | _ASSERTE(IsGCSpecialThread() || ThreadStore::HoldingThreadStore()); |
6884 | |
6885 | ResumeRuntime(bFinishedGC, SuspendSucceded); |
6886 | |
6887 | FireEtwGCRestartEEEnd_V1(GetClrInstanceId()); |
6888 | |
6889 | #ifdef TIME_SUSPEND |
6890 | g_SuspendStatistics.EndRestart(); |
6891 | #endif //TIME_SUSPEND |
6892 | } |
6893 | |
6894 | // The contract between GC and the EE, for starting and finishing a GC is as follows: |
6895 | // |
6896 | // SuspendEE: |
6897 | // LockThreadStore |
6898 | // SetGCInProgress |
6899 | // SuspendRuntime |
6900 | // |
6901 | // ... perform the GC ... |
6902 | // |
6903 | // RestartEE: |
6904 | // SetGCDone |
6905 | // ResumeRuntime |
6906 | // calls UnlockThreadStore |
6907 | // |
6908 | // Note that this is intentionally *not* symmetrical. The EE will assert that the |
6909 | // GC does most of this stuff in the correct sequence. |
6910 | |
6911 | // |
6912 | // This is the only way to call ThreadSuspend::SuspendRuntime, and that method is |
6913 | // so tightly coupled to this one, with intermingled responsibilities, that we don't |
6914 | // understand why we have a separation at all. At some point we should refactor all of |
6915 | // the suspension code into a separate abstraction, which we would like to call the |
6916 | // "managed execution lock." The current "layering" of this stuff has it mixed |
6917 | // randomly into the Thread and GC code, and split into almost completely arbitrary |
6918 | // layers. |
6919 | // |
6920 | void ThreadSuspend::SuspendEE(SUSPEND_REASON reason) |
6921 | { |
6922 | #ifdef TIME_SUSPEND |
6923 | g_SuspendStatistics.StartSuspend(); |
6924 | #endif //TIME_SUSPEND |
6925 | |
6926 | BOOL gcOnTransitions; |
6927 | |
6928 | ETW::GCLog::ETW_GC_INFO Info; |
6929 | Info.SuspendEE.Reason = reason; |
6930 | Info.SuspendEE.GcCount = (((reason == SUSPEND_FOR_GC) || (reason == SUSPEND_FOR_GC_PREP)) ? |
6931 | (ULONG)GCHeapUtilities::GetGCHeap()->GetGcCount() : (ULONG)-1); |
6932 | |
6933 | FireEtwGCSuspendEEBegin_V1(Info.SuspendEE.Reason, Info.SuspendEE.GcCount, GetClrInstanceId()); |
6934 | |
6935 | LOG((LF_SYNC, INFO3, "Suspending the runtime for reason %d\n" , reason)); |
6936 | |
6937 | gcOnTransitions = GC_ON_TRANSITIONS(FALSE); // dont do GC for GCStress 3 |
6938 | |
6939 | Thread* pCurThread = GetThread(); |
6940 | |
6941 | DWORD dwSwitchCount = 0; |
6942 | |
6943 | // Note: we need to make sure to re-set m_pThreadAttemptingSuspendForGC when we retry |
6944 | // due to the debugger case below! |
6945 | retry_for_debugger: |
6946 | |
6947 | // |
6948 | // Set variable to indicate that this thread is preforming a true GC |
6949 | // This gives this thread priority over other threads that are trying to acquire the ThreadStore Lock |
6950 | // for other reasons. |
6951 | // |
6952 | if (reason == ThreadSuspend::SUSPEND_FOR_GC || reason == ThreadSuspend::SUSPEND_FOR_GC_PREP) |
6953 | { |
6954 | m_pThreadAttemptingSuspendForGC = pCurThread; |
6955 | |
6956 | // |
6957 | // also unblock any thread waiting around for this thread to suspend. This prevents us from completely |
6958 | // starving other suspension clients, such as the debugger, which we otherwise would do because of |
6959 | // the priority we just established. |
6960 | // |
6961 | g_pGCSuspendEvent->Set(); |
6962 | } |
6963 | |
6964 | #ifdef TIME_SUSPEND |
6965 | DWORD startAcquire = g_SuspendStatistics.GetTime(); |
6966 | #endif |
6967 | |
6968 | // |
6969 | // Acquire the TSL. We will hold this until the we restart the EE. |
6970 | // |
6971 | ThreadSuspend::LockThreadStore(reason); |
6972 | |
6973 | #ifdef TIME_SUSPEND |
6974 | g_SuspendStatistics.acquireTSL.Accumulate(SuspendStatistics::GetElapsed(startAcquire, |
6975 | g_SuspendStatistics.GetTime())); |
6976 | #endif |
6977 | |
6978 | // |
6979 | // If we've blocked other threads that are waiting for the ThreadStore lock, unblock them now |
6980 | // (since we already got it). This allows them to get the TSL after we release it. |
6981 | // |
6982 | if ( s_hAbortEvtCache != NULL && |
6983 | (reason == ThreadSuspend::SUSPEND_FOR_GC || reason == ThreadSuspend::SUSPEND_FOR_GC_PREP)) |
6984 | { |
6985 | LOG((LF_SYNC, INFO3, "GC thread is backing out the suspend abort event.\n" )); |
6986 | s_hAbortEvt = NULL; |
6987 | |
6988 | LOG((LF_SYNC, INFO3, "GC thread is signalling the suspend abort event.\n" )); |
6989 | s_hAbortEvtCache->Set(); |
6990 | } |
6991 | |
6992 | // |
6993 | // Also null-out m_pThreadAttemptingSuspendForGC since it should only matter if s_hAbortEvt is |
6994 | // in play. |
6995 | // |
6996 | if (reason == ThreadSuspend::SUSPEND_FOR_GC || reason == ThreadSuspend::SUSPEND_FOR_GC_PREP) |
6997 | { |
6998 | m_pThreadAttemptingSuspendForGC = NULL; |
6999 | } |
7000 | |
7001 | { |
7002 | // |
7003 | // Now we're going to acquire an exclusive lock on managed code execution (including |
7004 | // "maunally managed" code in GCX_COOP regions). |
7005 | // |
7006 | // First, we reset the event that we're about to tell other threads to wait for. |
7007 | // |
7008 | GCHeapUtilities::GetGCHeap()->ResetWaitForGCEvent(); |
7009 | |
7010 | // |
7011 | // Remember that we're the one doing the GC. Actually, maybe we're not doing a GC - |
7012 | // what this really indicates is that we are trying to acquire the "managed execution lock." |
7013 | // |
7014 | { |
7015 | g_pSuspensionThread = pCurThread; |
7016 | |
7017 | // |
7018 | // Tell all threads, globally, to wait for WaitForGCEvent. |
7019 | // |
7020 | ThreadStore::TrapReturningThreads(TRUE); |
7021 | |
7022 | // |
7023 | // Remember why we're doing this. |
7024 | // |
7025 | m_suspendReason = reason; |
7026 | |
7027 | // |
7028 | // There's a GC in progress. (again, not necessarily - we suspend the EE for other reasons. |
7029 | // I wonder how much confusion this has caused....) |
7030 | // It seems like much of the above is redundant. We should investigate reducing the number |
7031 | // of mechanisms we use to indicate that a suspension is in progress. |
7032 | // |
7033 | GCHeapUtilities::GetGCHeap()->SetGCInProgress(true); |
7034 | |
7035 | // |
7036 | // Gratuitous memory barrier. (may be needed - but I'm not sure why.) |
7037 | // |
7038 | MemoryBarrier(); |
7039 | |
7040 | ClrFlsSetThreadType (ThreadType_DynamicSuspendEE); |
7041 | } |
7042 | |
7043 | HRESULT hr; |
7044 | { |
7045 | _ASSERTE(ThreadStore::HoldingThreadStore() || g_fProcessDetach); |
7046 | |
7047 | // |
7048 | // Now that we've instructed all threads to please stop, |
7049 | // go interrupt the ones that are running managed code and force them to stop. |
7050 | // This does not return successfully until all threads have acknowledged that they |
7051 | // will not run managed code. |
7052 | // |
7053 | hr = SuspendRuntime(reason); |
7054 | ASSERT( hr == S_OK || hr == ERROR_TIMEOUT); |
7055 | |
7056 | #ifdef TIME_SUSPEND |
7057 | if (hr == ERROR_TIMEOUT) |
7058 | g_SuspendStatistics.cntCollideRetry++; |
7059 | #endif |
7060 | } |
7061 | |
7062 | if (hr == ERROR_TIMEOUT) |
7063 | STRESS_LOG0(LF_SYNC, LL_INFO1000, "SysSuspension colission" ); |
7064 | |
7065 | // If the debugging services are attached, then its possible |
7066 | // that there is a thread which appears to be stopped at a gc |
7067 | // safe point, but which really is not. If that is the case, |
7068 | // back off and try again. |
7069 | |
7070 | // If this is not the GC thread and another thread has triggered |
7071 | // a GC, then we may have bailed out of SuspendRuntime, so we |
7072 | // must resume all of the threads and tell the GC that we are |
7073 | // at a safepoint - since this is the exact same behaviour |
7074 | // that the debugger needs, just use it's code. |
7075 | if ((hr == ERROR_TIMEOUT) |
7076 | || Thread::ThreadsAtUnsafePlaces() |
7077 | #ifdef DEBUGGING_SUPPORTED // seriously? When would we want to disable debugging support? :) |
7078 | || (CORDebuggerAttached() && |
7079 | // When the debugger is synchronizing, trying to perform a GC could deadlock. The GC has the |
7080 | // threadstore lock and synchronization cannot complete until the debugger can get the |
7081 | // threadstore lock. However the GC can not complete until it sends the BeforeGarbageCollection |
7082 | // event, and the event can not be sent until the debugger is synchronized. In order to break |
7083 | // this deadlock cycle the GC must give up the threadstore lock, allow the debugger to synchronize, |
7084 | // then try again. |
7085 | (g_pDebugInterface->ThreadsAtUnsafePlaces() || g_pDebugInterface->IsSynchronizing())) |
7086 | #endif // DEBUGGING_SUPPORTED |
7087 | ) |
7088 | { |
7089 | // In this case, the debugger has stopped at least one |
7090 | // thread at an unsafe place. The debugger will usually |
7091 | // have already requested that we stop. If not, it will |
7092 | // usually either do so shortly, or resume the thread that is |
7093 | // at the unsafe place. Either way, we have to wait for the |
7094 | // debugger to decide what it wants to do. |
7095 | // |
7096 | // In some rare cases, the end-user debugger may have frozen |
7097 | // a thread at a gc-unsafe place, and so we'll loop forever |
7098 | // here and never resolve the deadlock. Unfortunately we can't |
7099 | // easily abort a GC |
7100 | // and so for now we just wait for the debugger to timeout and |
7101 | // hopefully thaw that thread. Maybe instead we should try to |
7102 | // detect this situation sooner (when thread abort is possible) |
7103 | // and notify the debugger with NotifyOfCrossThreadDependency, giving |
7104 | // it the chance to thaw other threads or abort us before getting |
7105 | // wedged in the GC. |
7106 | // |
7107 | // Note: we've still got the ThreadStore lock held. |
7108 | // |
7109 | // <REVISIT>The below manipulation of two static variables (s_hAbortEvtCache and s_hAbortEvt) |
7110 | // is protected by the ThreadStore lock, which we are still holding. But we access these |
7111 | // in ThreadSuspend::LockThreadStore, prior to obtaining the lock. </REVISIT> |
7112 | // |
7113 | LOG((LF_GCROOTS | LF_GC | LF_CORDB, |
7114 | LL_INFO10, |
7115 | "***** Giving up on current GC suspension due " |
7116 | "to debugger or timeout *****\n" )); |
7117 | |
7118 | if (s_hAbortEvtCache == NULL) |
7119 | { |
7120 | LOG((LF_SYNC, INFO3, "Creating suspend abort event.\n" )); |
7121 | |
7122 | CLREvent * pEvent = NULL; |
7123 | |
7124 | EX_TRY |
7125 | { |
7126 | pEvent = new CLREvent(); |
7127 | pEvent->CreateManualEvent(FALSE); |
7128 | s_hAbortEvtCache = pEvent; |
7129 | } |
7130 | EX_CATCH |
7131 | { |
7132 | // Bummer... couldn't init the abort event. Its a shame, but not fatal. We'll simply not use it |
7133 | // on this iteration and try again next time. |
7134 | if (pEvent) { |
7135 | _ASSERTE(!pEvent->IsValid()); |
7136 | pEvent->CloseEvent(); |
7137 | delete pEvent; |
7138 | } |
7139 | } |
7140 | EX_END_CATCH(SwallowAllExceptions) |
7141 | } |
7142 | |
7143 | if (s_hAbortEvtCache != NULL) |
7144 | { |
7145 | LOG((LF_SYNC, INFO3, "Using suspend abort event.\n" )); |
7146 | s_hAbortEvt = s_hAbortEvtCache; |
7147 | s_hAbortEvt->Reset(); |
7148 | } |
7149 | |
7150 | // Mark that we're done with the gc, so that the debugger can proceed. |
7151 | RestartEE(FALSE, FALSE); |
7152 | |
7153 | LOG((LF_GCROOTS | LF_GC | LF_CORDB, |
7154 | LL_INFO10, "The EE is free now...\n" )); |
7155 | |
7156 | // If someone's trying to suspent *this* thread, this is a good opportunity. |
7157 | // <REVIST>This call to CatchAtSafePoint is redundant - PulseGCMode already checks this.</REVISIT> |
7158 | if (pCurThread && pCurThread->CatchAtSafePoint()) |
7159 | { |
7160 | // <REVISIT> This assert is fired on BGC thread 'cause we |
7161 | // got timeout.</REVISIT> |
7162 | //_ASSERTE((pCurThread->PreemptiveGCDisabled()) || IsGCSpecialThread()); |
7163 | pCurThread->PulseGCMode(); // Go suspend myself. |
7164 | } |
7165 | else |
7166 | { |
7167 | // otherwise, just yield so the debugger can finish what it's doing. |
7168 | __SwitchToThread (0, ++dwSwitchCount); |
7169 | } |
7170 | |
7171 | goto retry_for_debugger; |
7172 | } |
7173 | } |
7174 | GC_ON_TRANSITIONS(gcOnTransitions); |
7175 | |
7176 | FireEtwGCSuspendEEEnd_V1(GetClrInstanceId()); |
7177 | |
7178 | #ifdef TIME_SUSPEND |
7179 | g_SuspendStatistics.EndSuspend(reason == SUSPEND_FOR_GC || reason == SUSPEND_FOR_GC_PREP); |
7180 | #endif //TIME_SUSPEND |
7181 | } |
7182 | |
7183 | #if defined(FEATURE_HIJACK) && defined(PLATFORM_UNIX) |
7184 | |
7185 | // This function is called by PAL to check if the specified instruction pointer |
7186 | // is in a function where we can safely inject activation. |
7187 | BOOL CheckActivationSafePoint(SIZE_T ip, BOOL checkingCurrentThread) |
7188 | { |
7189 | Thread *pThread = GetThread(); |
7190 | // It is safe to call the ExecutionManager::IsManagedCode only if we are making the check for |
7191 | // a thread different from the current one or if the current thread is in the cooperative mode. |
7192 | // Otherwise ExecutionManager::IsManagedCode could deadlock if the activation happened when the |
7193 | // thread was holding the ExecutionManager's writer lock. |
7194 | // When the thread is in preemptive mode, we know for sure that it is not executing managed code. |
7195 | BOOL checkForManagedCode = !checkingCurrentThread || (pThread != NULL && pThread->PreemptiveGCDisabled()); |
7196 | return checkForManagedCode && ExecutionManager::IsManagedCode(ip); |
7197 | } |
7198 | |
7199 | // This function is called when a GC is pending. It tries to ensure that the current |
7200 | // thread is taken to a GC-safe place as quickly as possible. It does this by doing |
7201 | // one of the following: |
7202 | // |
7203 | // - If the thread is in native code or preemptive GC is not disabled, there's |
7204 | // nothing to do, so we return. |
7205 | // |
7206 | // - If the thread is in interruptible managed code, we will push a frame that |
7207 | // has information about the context that was interrupted and then switch to |
7208 | // preemptive GC mode so that the pending GC can proceed, and then switch back. |
7209 | // |
7210 | // - If the thread is in uninterruptible managed code, we will patch the return |
7211 | // address to take the thread to the appropriate stub (based on the return |
7212 | // type of the method) which will then handle preparing the thread for GC. |
7213 | // |
7214 | void HandleGCSuspensionForInterruptedThread(CONTEXT *interruptedContext) |
7215 | { |
7216 | Thread *pThread = GetThread(); |
7217 | |
7218 | if (pThread->PreemptiveGCDisabled() != TRUE) |
7219 | return; |
7220 | |
7221 | #ifdef FEATURE_PERFTRACING |
7222 | // Mark that the thread is currently in managed code. |
7223 | pThread->SaveGCModeOnSuspension(); |
7224 | #endif // FEATURE_PERFTRACING |
7225 | |
7226 | PCODE ip = GetIP(interruptedContext); |
7227 | |
7228 | // This function can only be called when the interrupted thread is in |
7229 | // an activation safe point. |
7230 | _ASSERTE(CheckActivationSafePoint(ip, /* checkingCurrentThread */ TRUE)); |
7231 | |
7232 | Thread::WorkingOnThreadContextHolder workingOnThreadContext(pThread); |
7233 | if (!workingOnThreadContext.Acquired()) |
7234 | return; |
7235 | |
7236 | EECodeInfo codeInfo(ip); |
7237 | if (!codeInfo.IsValid()) |
7238 | return; |
7239 | |
7240 | DWORD addrOffset = codeInfo.GetRelOffset(); |
7241 | |
7242 | ICodeManager *pEECM = codeInfo.GetCodeManager(); |
7243 | _ASSERTE(pEECM != NULL); |
7244 | |
7245 | bool isAtSafePoint = pEECM->IsGcSafe(&codeInfo, addrOffset); |
7246 | if (isAtSafePoint) |
7247 | { |
7248 | // If the thread is at a GC safe point, push a RedirectedThreadFrame with |
7249 | // the interrupted context and pulse the GC mode so that GC can proceed. |
7250 | FrameWithCookie<RedirectedThreadFrame> frame(interruptedContext); |
7251 | pThread->SetSavedRedirectContext(NULL); |
7252 | |
7253 | frame.Push(pThread); |
7254 | |
7255 | pThread->PulseGCMode(); |
7256 | |
7257 | frame.Pop(pThread); |
7258 | } |
7259 | else |
7260 | { |
7261 | // The thread is in non-interruptible code. |
7262 | ExecutionState executionState; |
7263 | StackWalkAction action; |
7264 | REGDISPLAY regDisplay; |
7265 | pThread->InitRegDisplay(®Display, interruptedContext, true /* validContext */); |
7266 | |
7267 | BOOL unused; |
7268 | |
7269 | if (IsIPInEpilog(interruptedContext, &codeInfo, &unused)) |
7270 | return; |
7271 | |
7272 | // Use StackWalkFramesEx to find the location of the return address. This will locate the |
7273 | // return address by checking relative to the caller frame's SP, which is preferable to |
7274 | // checking next to the current RBP because we may have interrupted the function prior to |
7275 | // the point where RBP is updated. |
7276 | action = pThread->StackWalkFramesEx( |
7277 | ®Display, |
7278 | SWCB_GetExecutionState, |
7279 | &executionState, |
7280 | QUICKUNWIND | DISABLE_MISSING_FRAME_DETECTION | ALLOW_ASYNC_STACK_WALK); |
7281 | |
7282 | if (action != SWA_ABORT || !executionState.m_IsJIT) |
7283 | return; |
7284 | |
7285 | if (executionState.m_ppvRetAddrPtr == NULL) |
7286 | return; |
7287 | |
7288 | |
7289 | // Calling this turns off the GC_TRIGGERS/THROWS/INJECT_FAULT contract in LoadTypeHandle. |
7290 | // We should not trigger any loads for unresolved types. |
7291 | ENABLE_FORBID_GC_LOADER_USE_IN_THIS_SCOPE(); |
7292 | |
7293 | // Mark that we are performing a stackwalker like operation on the current thread. |
7294 | // This is necessary to allow the signature parsing functions to work without triggering any loads. |
7295 | ClrFlsValueSwitch threadStackWalking(TlsIdx_StackWalkerWalkingThread, pThread); |
7296 | |
7297 | // Hijack the return address to point to the appropriate routine based on the method's return type. |
7298 | void *pvHijackAddr = GetHijackAddr(pThread, &codeInfo); |
7299 | pThread->HijackThread(pvHijackAddr, &executionState); |
7300 | } |
7301 | } |
7302 | |
7303 | bool Thread::InjectGcSuspension() |
7304 | { |
7305 | static ConfigDWORD injectionEnabled; |
7306 | if (injectionEnabled.val(CLRConfig::INTERNAL_ThreadSuspendInjection) == 0) |
7307 | return false; |
7308 | |
7309 | Volatile<HANDLE> hThread; |
7310 | hThread = GetThreadHandle(); |
7311 | if (hThread != INVALID_HANDLE_VALUE && hThread != SWITCHOUT_HANDLE_VALUE) |
7312 | { |
7313 | ::PAL_InjectActivation(hThread); |
7314 | return true; |
7315 | } |
7316 | |
7317 | return false; |
7318 | } |
7319 | |
7320 | #endif // FEATURE_HIJACK && PLATFORM_UNIX |
7321 | |
7322 | // Initialize thread suspension support |
7323 | void ThreadSuspend::Initialize() |
7324 | { |
7325 | #if defined(FEATURE_HIJACK) && defined(PLATFORM_UNIX) |
7326 | ::PAL_SetActivationFunction(HandleGCSuspensionForInterruptedThread, CheckActivationSafePoint); |
7327 | #endif |
7328 | } |
7329 | |
7330 | #ifdef _DEBUG |
7331 | BOOL Debug_IsLockedViaThreadSuspension() |
7332 | { |
7333 | LIMITED_METHOD_CONTRACT; |
7334 | return GCHeapUtilities::IsGCInProgress() && |
7335 | (dbgOnly_IsSpecialEEThread() || |
7336 | IsGCSpecialThread() || |
7337 | GetThread() == ThreadSuspend::GetSuspensionThread()); |
7338 | } |
7339 | #endif |
7340 | |
7341 | #if defined(TIME_SUSPEND) || defined(GC_STATS) |
7342 | |
7343 | DWORD StatisticsBase::secondsToDisplay = 0; |
7344 | |
7345 | DWORD StatisticsBase::GetTime() |
7346 | { |
7347 | LIMITED_METHOD_CONTRACT; |
7348 | LARGE_INTEGER large; |
7349 | |
7350 | if (divisor == 0) |
7351 | { |
7352 | if (QueryPerformanceFrequency(&large) && (large.QuadPart != 0)) |
7353 | divisor = (DWORD)(large.QuadPart / (1000 * 1000)); // microseconds |
7354 | else |
7355 | divisor = 1; |
7356 | } |
7357 | |
7358 | if (QueryPerformanceCounter(&large)) |
7359 | return (DWORD) (large.QuadPart / divisor); |
7360 | else |
7361 | return 0; |
7362 | } |
7363 | |
7364 | DWORD StatisticsBase::GetElapsed(DWORD start, DWORD stop) |
7365 | { |
7366 | LIMITED_METHOD_CONTRACT; |
7367 | if (stop > start) |
7368 | return stop - start; |
7369 | |
7370 | INT64 bigStop = stop; |
7371 | bigStop += 0x100000000ULL; |
7372 | bigStop -= start; |
7373 | |
7374 | // The assert below was seen firing in stress, so comment it out for now |
7375 | //_ASSERTE(((INT64)(DWORD)bigStop) == bigStop); |
7376 | |
7377 | if (((INT64)(DWORD)bigStop) == bigStop) |
7378 | return (DWORD) bigStop; |
7379 | else |
7380 | return 0; |
7381 | } |
7382 | |
7383 | void StatisticsBase::RollOverIfNeeded() |
7384 | { |
7385 | LIMITED_METHOD_CONTRACT; |
7386 | |
7387 | // Our counters are 32 bits and can count to 4 GB in microseconds or 4K in seconds. |
7388 | // Reset when we get close to overflowing |
7389 | const DWORD RolloverInterval = 3900; |
7390 | |
7391 | // every so often, print a summary of our statistics |
7392 | DWORD ticksNow = GetTickCount(); |
7393 | |
7394 | if (secondsToDisplay == 0) |
7395 | { |
7396 | secondsToDisplay = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_StatsUpdatePeriod); |
7397 | if (secondsToDisplay == 0) |
7398 | secondsToDisplay = 1; |
7399 | else if (secondsToDisplay > RolloverInterval) |
7400 | secondsToDisplay = RolloverInterval; |
7401 | } |
7402 | |
7403 | if (ticksNow - startTick > secondsToDisplay * 1000) |
7404 | { |
7405 | DisplayAndUpdate(); |
7406 | |
7407 | startTick = GetTickCount(); |
7408 | |
7409 | // Our counters are 32 bits and can count to 4 GB in microseconds or 4K in seconds. |
7410 | // Reset when we get close to overflowing |
7411 | if (++cntDisplay >= (int)(RolloverInterval / secondsToDisplay)) |
7412 | Initialize(); |
7413 | } |
7414 | } |
7415 | |
7416 | #endif // defined(TIME_SUSPEND) || defined(GC_STATS) |
7417 | |
7418 | |
7419 | #ifdef TIME_SUSPEND |
7420 | |
7421 | // There is a current and a prior copy of the statistics. This allows us to display deltas per reporting |
7422 | // interval, as well as running totals. The 'min' and 'max' values require special treatment. They are |
7423 | // Reset (zeroed) in the current statistics when we begin a new interval and they are updated via a |
7424 | // comparison with the global min/max. |
7425 | SuspendStatistics g_SuspendStatistics; |
7426 | SuspendStatistics g_LastSuspendStatistics; |
7427 | |
7428 | WCHAR* SuspendStatistics::logFileName = NULL; |
7429 | |
7430 | // Called whenever our timers start to overflow |
7431 | void SuspendStatistics::Initialize() |
7432 | { |
7433 | LIMITED_METHOD_CONTRACT; |
7434 | // for efficiency sake we're taking a dependency on the layout of a C++ object |
7435 | // with a vtable. protect against violations of our premise: |
7436 | static_assert(offsetof(SuspendStatistics, cntDisplay) == sizeof(void*), |
7437 | "The first field of SuspendStatistics follows the pointer sized vtable" ); |
7438 | |
7439 | int podOffs = offsetof(SuspendStatistics, cntDisplay); // offset of the first POD field |
7440 | memset((BYTE*)(&g_SuspendStatistics)+podOffs, 0, sizeof(g_SuspendStatistics)-podOffs); |
7441 | memset((BYTE*)(&g_LastSuspendStatistics)+podOffs, 0, sizeof(g_LastSuspendStatistics)-podOffs); |
7442 | } |
7443 | |
7444 | // Top of SuspendEE |
7445 | void SuspendStatistics::StartSuspend() |
7446 | { |
7447 | LIMITED_METHOD_CONTRACT; |
7448 | startSuspend = GetTime(); |
7449 | } |
7450 | |
7451 | // Bottom of SuspendEE |
7452 | void SuspendStatistics::EndSuspend(BOOL bForGC) |
7453 | { |
7454 | LIMITED_METHOD_CONTRACT; |
7455 | DWORD time = GetElapsed(startSuspend, GetTime()); |
7456 | |
7457 | suspend.Accumulate(time); |
7458 | cntSuspends++; |
7459 | // details on suspends... |
7460 | if (!bForGC) |
7461 | cntNonGCSuspends++; |
7462 | if (GCHeapUtilities::GetGCHeap()->IsConcurrentGCInProgress()) |
7463 | { |
7464 | cntSuspendsInBGC++; |
7465 | if (!bForGC) |
7466 | cntNonGCSuspendsInBGC++; |
7467 | } |
7468 | } |
7469 | |
7470 | // Time spent in the current suspend (for pro-active debugging) |
7471 | DWORD SuspendStatistics::CurrentSuspend() |
7472 | { |
7473 | LIMITED_METHOD_CONTRACT; |
7474 | return GetElapsed(startSuspend, GetTime()); |
7475 | } |
7476 | |
7477 | // Top of RestartEE |
7478 | void SuspendStatistics::StartRestart() |
7479 | { |
7480 | LIMITED_METHOD_CONTRACT; |
7481 | startRestart = GetTime(); |
7482 | } |
7483 | |
7484 | // Bottom of RestartEE |
7485 | void SuspendStatistics::EndRestart() |
7486 | { |
7487 | LIMITED_METHOD_CONTRACT; |
7488 | DWORD timeNow = GetTime(); |
7489 | |
7490 | restart.Accumulate(GetElapsed(startRestart, timeNow)); |
7491 | cntRestarts++; |
7492 | |
7493 | paused.Accumulate(SuspendStatistics::GetElapsed(startSuspend, timeNow)); |
7494 | |
7495 | RollOverIfNeeded(); |
7496 | } |
7497 | |
7498 | // Time spent in the current restart |
7499 | DWORD SuspendStatistics::CurrentRestart() |
7500 | { |
7501 | LIMITED_METHOD_CONTRACT; |
7502 | return GetElapsed(startRestart, GetTime()); |
7503 | } |
7504 | |
7505 | void SuspendStatistics::DisplayAndUpdate() |
7506 | { |
7507 | LIMITED_METHOD_CONTRACT; |
7508 | |
7509 | // TODO: this fires at times... |
7510 | // _ASSERTE(cntSuspends == cntRestarts); |
7511 | |
7512 | if (logFileName == NULL) |
7513 | { |
7514 | logFileName = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_SuspendTimeLog); |
7515 | } |
7516 | |
7517 | FILE* logFile; |
7518 | |
7519 | if (logFileName != NULL && (logFile = _wfopen((LPCWSTR)logFileName, W("a" ))) != NULL) |
7520 | { |
7521 | if (cntDisplay == 0) |
7522 | fprintf(logFile, "\nSUSP **** Initialize *****\n\n" ); |
7523 | |
7524 | fprintf(logFile, "SUSP **** Summary ***** %d\n" , cntDisplay); |
7525 | |
7526 | paused.DisplayAndUpdate (logFile, "Paused " , &g_LastSuspendStatistics.paused, cntSuspends, g_LastSuspendStatistics.cntSuspends); |
7527 | suspend.DisplayAndUpdate (logFile, "Suspend" , &g_LastSuspendStatistics.suspend, cntSuspends, g_LastSuspendStatistics.cntSuspends); |
7528 | restart.DisplayAndUpdate (logFile, "Restart" , &g_LastSuspendStatistics.restart, cntRestarts, g_LastSuspendStatistics.cntSuspends); |
7529 | acquireTSL.DisplayAndUpdate(logFile, "LockTSL" , &g_LastSuspendStatistics.acquireTSL, cntSuspends, g_LastSuspendStatistics.cntSuspends); |
7530 | releaseTSL.DisplayAndUpdate(logFile, "Unlock " , &g_LastSuspendStatistics.releaseTSL, cntSuspends, g_LastSuspendStatistics.cntSuspends); |
7531 | osSuspend.DisplayAndUpdate (logFile, "OS Susp" , &g_LastSuspendStatistics.osSuspend, cntOSSuspendResume, g_LastSuspendStatistics.cntOSSuspendResume); |
7532 | crawl.DisplayAndUpdate (logFile, "Crawl" , &g_LastSuspendStatistics.crawl, cntHijackCrawl, g_LastSuspendStatistics.cntHijackCrawl); |
7533 | wait.DisplayAndUpdate (logFile, "Wait" , &g_LastSuspendStatistics.wait, cntWaits, g_LastSuspendStatistics.cntWaits); |
7534 | |
7535 | fprintf(logFile, "OS Suspend Failures %d (%d), Wait Timeouts %d (%d), Hijack traps %d (%d)\n" , |
7536 | cntFailedSuspends - g_LastSuspendStatistics.cntFailedSuspends, cntFailedSuspends, |
7537 | cntWaitTimeouts - g_LastSuspendStatistics.cntWaitTimeouts, cntWaitTimeouts, |
7538 | cntHijackTrap - g_LastSuspendStatistics.cntHijackTrap, cntHijackTrap); |
7539 | |
7540 | fprintf(logFile, "Redirected EIP Failures %d (%d), Collided GC/Debugger/ADUnload %d (%d)\n" , |
7541 | cntFailedRedirections - g_LastSuspendStatistics.cntFailedRedirections, cntFailedRedirections, |
7542 | cntCollideRetry - g_LastSuspendStatistics.cntCollideRetry, cntCollideRetry); |
7543 | |
7544 | fprintf(logFile, "Suspend: All %d (%d). NonGC: %d (%d). InBGC: %d (%d). NonGCInBGC: %d (%d)\n\n" , |
7545 | cntSuspends - g_LastSuspendStatistics.cntSuspends, cntSuspends, |
7546 | cntNonGCSuspends - g_LastSuspendStatistics.cntNonGCSuspends, cntNonGCSuspends, |
7547 | cntSuspendsInBGC - g_LastSuspendStatistics.cntSuspendsInBGC, cntSuspendsInBGC, |
7548 | cntNonGCSuspendsInBGC - g_LastSuspendStatistics.cntNonGCSuspendsInBGC, cntNonGCSuspendsInBGC); |
7549 | |
7550 | // close the log file... |
7551 | fclose(logFile); |
7552 | } |
7553 | |
7554 | memcpy(&g_LastSuspendStatistics, this, sizeof(g_LastSuspendStatistics)); |
7555 | |
7556 | suspend.Reset(); |
7557 | restart.Reset(); |
7558 | paused.Reset(); |
7559 | acquireTSL.Reset(); |
7560 | releaseTSL.Reset(); |
7561 | osSuspend.Reset(); |
7562 | crawl.Reset(); |
7563 | wait.Reset(); |
7564 | } |
7565 | |
7566 | #endif // TIME_SUSPEND |
7567 | |
7568 | #if defined(TIME_SUSPEND) || defined(GC_STATS) |
7569 | |
7570 | const char* const str_timeUnit[] = { "usec" , "msec" , "sec" }; |
7571 | const int timeUnitFactor[] = { 1, 1000, 1000000 }; |
7572 | |
7573 | void MinMaxTot::DisplayAndUpdate(FILE* logFile, __in_z const char *pName, MinMaxTot *pLastOne, int fullCount, int priorCount, timeUnit unit /* = usec */) |
7574 | { |
7575 | LIMITED_METHOD_CONTRACT; |
7576 | |
7577 | int tuf = timeUnitFactor[unit]; |
7578 | int delta = fullCount - priorCount; |
7579 | |
7580 | fprintf(logFile, "%s %u (%u) times for %u (%u) %s. Min %u (%u), Max %u (%u), Avg %u (%u)\n" , |
7581 | pName, |
7582 | delta, fullCount, |
7583 | (totVal - pLastOne->totVal) / tuf, totVal / tuf, |
7584 | str_timeUnit[(int)unit], |
7585 | minVal / tuf, pLastOne->minVal / tuf, |
7586 | maxVal / tuf, pLastOne->maxVal / tuf, |
7587 | (delta == 0 ? 0 : (totVal - pLastOne->totVal) / delta) / tuf, |
7588 | (fullCount == 0 ? 0 : totVal / fullCount) / tuf); |
7589 | |
7590 | if (minVal > pLastOne->minVal && pLastOne->minVal != 0) |
7591 | minVal = pLastOne->minVal; |
7592 | |
7593 | if (maxVal < pLastOne->maxVal) |
7594 | maxVal = pLastOne->maxVal; |
7595 | } |
7596 | |
7597 | #endif // defined(TIME_SUSPEND) || defined(GC_STATS) |
7598 | |