1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | // threadsuspend.h |
5 | |
6 | #ifndef _THREAD_SUSPEND_H_ |
7 | #define _THREAD_SUSPEND_H_ |
8 | |
9 | #if defined(TIME_SUSPEND) || defined(GC_STATS) |
10 | |
11 | enum timeUnit { usec, msec, sec }; |
12 | |
13 | // running aggregations |
14 | struct MinMaxTot |
15 | { |
16 | DWORD minVal, maxVal, totVal; |
17 | |
18 | void Accumulate(DWORD time) |
19 | { |
20 | LIMITED_METHOD_CONTRACT; |
21 | if (time < minVal || minVal == 0) |
22 | minVal = time; |
23 | |
24 | if (time > maxVal) |
25 | maxVal = time; |
26 | |
27 | // We are supposed to anticipate overflow and clear our totals |
28 | // However we still see this assert and for now, let's ignore it... |
29 | // _ASSERTE(((DWORD) (totVal + time)) > ((DWORD) totVal)); |
30 | if (((DWORD) (totVal + time)) > ((DWORD) totVal)) |
31 | totVal += time; |
32 | } |
33 | |
34 | void Reset() |
35 | { |
36 | LIMITED_METHOD_CONTRACT; |
37 | minVal = maxVal = 0; |
38 | } |
39 | |
40 | void DisplayAndUpdate(FILE* logFile, __in_z const char *pName, MinMaxTot *pLastOne, int fullCount, int priorCount, timeUnit=usec); |
41 | }; |
42 | |
43 | // A note about timings. We use QueryPerformanceCounter to measure all timings in units. During |
44 | // Initialization, we compute a divisor to convert those timings into microseconds. This means |
45 | // that we can accumulate about 4,000 seconds (over one hour) of GC time into 32-bit quantities |
46 | // before we must reinitialize. |
47 | |
48 | // A note about performance: derived classes have taken a dependency on cntDisplay being the first |
49 | // field of this class, following the vtable*. When this is violated a compile time assert will fire. |
50 | struct StatisticsBase |
51 | { |
52 | // display the statistics every so many seconds. |
53 | static DWORD secondsToDisplay; |
54 | |
55 | // we must re-initialize after an hour of GC time, to avoid overflow. It's more convenient to |
56 | // re-initialize after an hour of wall-clock time, instead |
57 | int cntDisplay; |
58 | |
59 | // convert all timings into microseconds |
60 | DWORD divisor; |
61 | DWORD GetTime(); |
62 | static DWORD GetElapsed(DWORD start, DWORD stop); |
63 | |
64 | // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval. |
65 | DWORD startTick; |
66 | |
67 | // derived classes must call this regularly (from a logical "end of a cycle") |
68 | void RollOverIfNeeded(); |
69 | |
70 | virtual void Initialize() = 0; |
71 | virtual void DisplayAndUpdate() = 0; |
72 | }; |
73 | |
74 | #endif // defined(TIME_SUSPEND) || defined(GC_STATS) |
75 | |
76 | #ifdef TIME_SUSPEND |
77 | |
78 | struct SuspendStatistics |
79 | : public StatisticsBase |
80 | { |
81 | static WCHAR* logFileName; |
82 | |
83 | // number of times we call SuspendEE, RestartEE |
84 | int cntSuspends, cntRestarts; |
85 | |
86 | int cntSuspendsInBGC, cntNonGCSuspends, cntNonGCSuspendsInBGC; |
87 | |
88 | // Times for current suspension & restart |
89 | DWORD startSuspend, startRestart; |
90 | |
91 | // min, max and total time spent performing a Suspend, a Restart, or Paused from the start of |
92 | // a Suspend to the end of a Restart. We can compute 'avg' using 'cnt' and 'tot' values. |
93 | MinMaxTot suspend, restart, paused; |
94 | |
95 | // We know there can be contention on acquiring the ThreadStoreLock. |
96 | MinMaxTot acquireTSL, releaseTSL; |
97 | |
98 | // And if we OS suspend a thread that is blocking or perhaps throwing an exception and is therefore |
99 | // stuck in the kernel, it could take approximately a second. So track the time taken for OS |
100 | // suspends |
101 | MinMaxTot osSuspend; |
102 | |
103 | // And if we place a hijack, we need to crawl a stack to do so. |
104 | MinMaxTot crawl; |
105 | |
106 | // And waiting can be a significant part of the total suspension time. |
107 | MinMaxTot wait; |
108 | |
109 | /////////////////////////////////////////////////////////////////////////////////////////////// |
110 | // There are some interesting events that are worth counting, because they show where the time is going: |
111 | |
112 | // number of times we waited on g_pGCSuspendEvent while trying to suspend the EE |
113 | int cntWaits; |
114 | |
115 | // and the number of times those Waits timed out rather than being signalled by a cooperating thread |
116 | int cntWaitTimeouts; |
117 | |
118 | // number of times we did an OS (or hosted) suspend or resume on a thread |
119 | int cntOSSuspendResume; |
120 | |
121 | // number of times we crawled a stack for a hijack |
122 | int cntHijackCrawl; |
123 | |
124 | // and the number of times the hijack actually trapped a thread for us |
125 | int cntHijackTrap; |
126 | |
127 | // the number of times we redirected a thread in fully interruptible code, by rewriting its EIP |
128 | // so it will throw to a blocking point |
129 | int cntRedirections; |
130 | |
131 | /////////////////////////////////////////////////////////////////////////////////////////////// |
132 | // And there are some "failure" cases that should never or almost never occur. |
133 | |
134 | // number of times we have a collision between e.g. Debugger suspension & GC suspension. |
135 | // In these cases, everyone yields to the GC but at some cost. |
136 | int cntCollideRetry; |
137 | |
138 | // number of times the OS or Host was unable to ::SuspendThread a thread for us. This count should be |
139 | // approximately 0. |
140 | int cntFailedSuspends; |
141 | |
142 | // number of times we were unable to redirect a thread by rewriting its register state in a |
143 | // suspended context. This count should be approximately 0. |
144 | int cntFailedRedirections; |
145 | |
146 | /////////////////////////////////////////////////////////////////////////////////////////////// |
147 | // Internal mechanism: |
148 | |
149 | virtual void Initialize(); |
150 | virtual void DisplayAndUpdate(); |
151 | |
152 | // Public API |
153 | |
154 | void StartSuspend(); |
155 | void EndSuspend(BOOL bForGC); |
156 | DWORD CurrentSuspend(); |
157 | |
158 | void StartRestart(); |
159 | void EndRestart(); |
160 | DWORD CurrentRestart(); |
161 | }; |
162 | |
163 | extern SuspendStatistics g_SuspendStatistics; |
164 | extern SuspendStatistics g_LastSuspendStatistics; |
165 | |
166 | #endif // TIME_SUSPEND |
167 | |
168 | BOOL EEGetThreadContext(Thread *pThread, CONTEXT *pContext); |
169 | BOOL EnsureThreadIsSuspended(HANDLE hThread, Thread* pThread); |
170 | |
171 | class ThreadSuspend |
172 | { |
173 | friend class Thread; |
174 | friend class ThreadStore; |
175 | |
176 | public: |
177 | typedef enum |
178 | { |
179 | SUSPEND_OTHER = 0, |
180 | SUSPEND_FOR_GC = 1, |
181 | SUSPEND_FOR_APPDOMAIN_SHUTDOWN = 2, |
182 | SUSPEND_FOR_REJIT = 3, |
183 | SUSPEND_FOR_SHUTDOWN = 4, |
184 | SUSPEND_FOR_DEBUGGER = 5, |
185 | SUSPEND_FOR_GC_PREP = 6, |
186 | SUSPEND_FOR_DEBUGGER_SWEEP = 7 // This must only be used in Thread::SysSweepThreadsForDebug |
187 | } SUSPEND_REASON; |
188 | |
189 | private: |
190 | static SUSPEND_REASON m_suspendReason; // This contains the reason |
191 | // that the runtime was suspended |
192 | static Thread* m_pThreadAttemptingSuspendForGC; |
193 | |
194 | public: |
195 | static HRESULT SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason); |
196 | static void ResumeRuntime(BOOL bFinishedGC, BOOL SuspendSucceded); |
197 | |
198 | // Initialize thread suspension support |
199 | static void Initialize(); |
200 | |
201 | private: |
202 | static CLREvent * g_pGCSuspendEvent; |
203 | |
204 | // This is true iff we're currently in the process of suspending threads. Once the |
205 | // threads have been suspended, this is false. This is set via an instance of |
206 | // SuspendRuntimeInProgressHolder placed in SuspendRuntime, SysStartSuspendForDebug, |
207 | // and SysSweepThreadsForDebug. Code outside Thread reads this via |
208 | // Thread::SysIsSuspendInProgress. |
209 | // |
210 | // *** THERE IS NO SYNCHRONIZATION AROUND SETTING OR READING THIS *** |
211 | // This value is only useful for code that can be more efficient if it has a good guess |
212 | // as to whether we're suspending the runtime. This is NOT to be used by code that |
213 | // *requires* this knowledge with 100% accuracy in order to behave correctly, unless |
214 | // you add synchronization yourself. An example of where Thread::SysIsSuspendInProgress |
215 | // is used is by the profiler API, in ProfToEEInterfaceImpl::DoStackSnapshot. The profiler |
216 | // API needs to suspend the target thread whose stack it is about to walk. But the profiler |
217 | // API should avoid this if the runtime is being suspended. Otherwise, the thread trying to |
218 | // suspend the runtime (thread A) might get stuck when it tries to suspend the thread |
219 | // executing ProfToEEInterfaceImpl::DoStackSnapshot (thread B), since thread B will be |
220 | // busy trying to suspend the target of the stack walk (thread C). Bad luck with timing |
221 | // could cause A to try to suspend B over and over again while B is busy suspending C, and |
222 | // then suspending D, etc., assuming the profiler does a lot of stack walks. This, in turn, |
223 | // could cause the deadlock detection assert in Thread::SuspendThread to fire. So the |
224 | // moral here is that, if B realizes the runtime is being suspended, it can just fail the stackwalk |
225 | // immediately without trying to do the suspend. But if B occasionally gets false positives or |
226 | // false negatives from calling Thread::SysIsSuspendInProgress, the worst is we might |
227 | // delay the EE suspension a little bit, or we might too eagerly fail from ProfToEEInterfaceImpl::DoStackSnapshot. |
228 | // But there won't be any corruption or AV. More details on the profiler API scenario in VsWhidbey bug 454936. |
229 | static bool s_fSuspendRuntimeInProgress; |
230 | |
231 | static void SetSuspendRuntimeInProgress(); |
232 | static void ResetSuspendRuntimeInProgress(); |
233 | |
234 | typedef StateHolder<ThreadSuspend::SetSuspendRuntimeInProgress, ThreadSuspend::ResetSuspendRuntimeInProgress> SuspendRuntimeInProgressHolder; |
235 | |
236 | public: |
237 | static bool SysIsSuspendInProgress() { return s_fSuspendRuntimeInProgress; } |
238 | |
239 | public: |
240 | //suspend all threads |
241 | static void SuspendEE(SUSPEND_REASON reason); |
242 | static void RestartEE(BOOL bFinishedGC, BOOL SuspendSucceded); //resume threads. |
243 | |
244 | static void LockThreadStore(ThreadSuspend::SUSPEND_REASON reason); |
245 | static void UnlockThreadStore(BOOL bThreadDestroyed = FALSE, |
246 | ThreadSuspend::SUSPEND_REASON reason = ThreadSuspend::SUSPEND_OTHER); |
247 | |
248 | static Thread * GetSuspensionThread() |
249 | { |
250 | LIMITED_METHOD_CONTRACT; |
251 | return g_pSuspensionThread; |
252 | } |
253 | |
254 | private: |
255 | // This is used to avoid thread starvation if non-GC threads are competing for |
256 | // the thread store lock when there is a real GC-thread waiting to get in. |
257 | // This is initialized lazily when the first non-GC thread backs out because of |
258 | // a waiting GC thread. The s_hAbortEvtCache is used to store the handle when |
259 | // it is not being used. |
260 | static CLREventBase *s_hAbortEvt; |
261 | static CLREventBase *s_hAbortEvtCache; |
262 | |
263 | static LONG m_DebugWillSyncCount; |
264 | }; |
265 | |
266 | class ThreadStoreLockHolderWithSuspendReason |
267 | { |
268 | public: |
269 | ThreadStoreLockHolderWithSuspendReason(ThreadSuspend::SUSPEND_REASON reason) |
270 | { |
271 | ThreadSuspend::LockThreadStore(reason); |
272 | } |
273 | ~ThreadStoreLockHolderWithSuspendReason() |
274 | { |
275 | ThreadSuspend::UnlockThreadStore(); |
276 | } |
277 | private: |
278 | ThreadSuspend::SUSPEND_REASON m_reason; |
279 | }; |
280 | |
281 | #endif // _THREAD_SUSPEND_H_ |
282 | |