1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4// threadsuspend.h
5
6#ifndef _THREAD_SUSPEND_H_
7#define _THREAD_SUSPEND_H_
8
9#if defined(TIME_SUSPEND) || defined(GC_STATS)
10
11enum timeUnit { usec, msec, sec };
12
13// running aggregations
14struct MinMaxTot
15{
16 DWORD minVal, maxVal, totVal;
17
18 void Accumulate(DWORD time)
19 {
20 LIMITED_METHOD_CONTRACT;
21 if (time < minVal || minVal == 0)
22 minVal = time;
23
24 if (time > maxVal)
25 maxVal = time;
26
27 // We are supposed to anticipate overflow and clear our totals
28 // However we still see this assert and for now, let's ignore it...
29 // _ASSERTE(((DWORD) (totVal + time)) > ((DWORD) totVal));
30 if (((DWORD) (totVal + time)) > ((DWORD) totVal))
31 totVal += time;
32 }
33
34 void Reset()
35 {
36 LIMITED_METHOD_CONTRACT;
37 minVal = maxVal = 0;
38 }
39
40 void DisplayAndUpdate(FILE* logFile, __in_z const char *pName, MinMaxTot *pLastOne, int fullCount, int priorCount, timeUnit=usec);
41};
42
43// A note about timings. We use QueryPerformanceCounter to measure all timings in units. During
44// Initialization, we compute a divisor to convert those timings into microseconds. This means
45// that we can accumulate about 4,000 seconds (over one hour) of GC time into 32-bit quantities
46// before we must reinitialize.
47
48// A note about performance: derived classes have taken a dependency on cntDisplay being the first
49// field of this class, following the vtable*. When this is violated a compile time assert will fire.
50struct StatisticsBase
51{
52 // display the statistics every so many seconds.
53 static DWORD secondsToDisplay;
54
55 // we must re-initialize after an hour of GC time, to avoid overflow. It's more convenient to
56 // re-initialize after an hour of wall-clock time, instead
57 int cntDisplay;
58
59 // convert all timings into microseconds
60 DWORD divisor;
61 DWORD GetTime();
62 static DWORD GetElapsed(DWORD start, DWORD stop);
63
64 // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval.
65 DWORD startTick;
66
67 // derived classes must call this regularly (from a logical "end of a cycle")
68 void RollOverIfNeeded();
69
70 virtual void Initialize() = 0;
71 virtual void DisplayAndUpdate() = 0;
72};
73
74#endif // defined(TIME_SUSPEND) || defined(GC_STATS)
75
76#ifdef TIME_SUSPEND
77
78struct SuspendStatistics
79 : public StatisticsBase
80{
81 static WCHAR* logFileName;
82
83 // number of times we call SuspendEE, RestartEE
84 int cntSuspends, cntRestarts;
85
86 int cntSuspendsInBGC, cntNonGCSuspends, cntNonGCSuspendsInBGC;
87
88 // Times for current suspension & restart
89 DWORD startSuspend, startRestart;
90
91 // min, max and total time spent performing a Suspend, a Restart, or Paused from the start of
92 // a Suspend to the end of a Restart. We can compute 'avg' using 'cnt' and 'tot' values.
93 MinMaxTot suspend, restart, paused;
94
95 // We know there can be contention on acquiring the ThreadStoreLock.
96 MinMaxTot acquireTSL, releaseTSL;
97
98 // And if we OS suspend a thread that is blocking or perhaps throwing an exception and is therefore
99 // stuck in the kernel, it could take approximately a second. So track the time taken for OS
100 // suspends
101 MinMaxTot osSuspend;
102
103 // And if we place a hijack, we need to crawl a stack to do so.
104 MinMaxTot crawl;
105
106 // And waiting can be a significant part of the total suspension time.
107 MinMaxTot wait;
108
109 ///////////////////////////////////////////////////////////////////////////////////////////////
110 // There are some interesting events that are worth counting, because they show where the time is going:
111
112 // number of times we waited on g_pGCSuspendEvent while trying to suspend the EE
113 int cntWaits;
114
115 // and the number of times those Waits timed out rather than being signalled by a cooperating thread
116 int cntWaitTimeouts;
117
118 // number of times we did an OS (or hosted) suspend or resume on a thread
119 int cntOSSuspendResume;
120
121 // number of times we crawled a stack for a hijack
122 int cntHijackCrawl;
123
124 // and the number of times the hijack actually trapped a thread for us
125 int cntHijackTrap;
126
127 // the number of times we redirected a thread in fully interruptible code, by rewriting its EIP
128 // so it will throw to a blocking point
129 int cntRedirections;
130
131 ///////////////////////////////////////////////////////////////////////////////////////////////
132 // And there are some "failure" cases that should never or almost never occur.
133
134 // number of times we have a collision between e.g. Debugger suspension & GC suspension.
135 // In these cases, everyone yields to the GC but at some cost.
136 int cntCollideRetry;
137
138 // number of times the OS or Host was unable to ::SuspendThread a thread for us. This count should be
139 // approximately 0.
140 int cntFailedSuspends;
141
142 // number of times we were unable to redirect a thread by rewriting its register state in a
143 // suspended context. This count should be approximately 0.
144 int cntFailedRedirections;
145
146 ///////////////////////////////////////////////////////////////////////////////////////////////
147 // Internal mechanism:
148
149 virtual void Initialize();
150 virtual void DisplayAndUpdate();
151
152 // Public API
153
154 void StartSuspend();
155 void EndSuspend(BOOL bForGC);
156 DWORD CurrentSuspend();
157
158 void StartRestart();
159 void EndRestart();
160 DWORD CurrentRestart();
161};
162
163extern SuspendStatistics g_SuspendStatistics;
164extern SuspendStatistics g_LastSuspendStatistics;
165
166#endif // TIME_SUSPEND
167
168BOOL EEGetThreadContext(Thread *pThread, CONTEXT *pContext);
169BOOL EnsureThreadIsSuspended(HANDLE hThread, Thread* pThread);
170
171class ThreadSuspend
172{
173 friend class Thread;
174 friend class ThreadStore;
175
176public:
177 typedef enum
178 {
179 SUSPEND_OTHER = 0,
180 SUSPEND_FOR_GC = 1,
181 SUSPEND_FOR_APPDOMAIN_SHUTDOWN = 2,
182 SUSPEND_FOR_REJIT = 3,
183 SUSPEND_FOR_SHUTDOWN = 4,
184 SUSPEND_FOR_DEBUGGER = 5,
185 SUSPEND_FOR_GC_PREP = 6,
186 SUSPEND_FOR_DEBUGGER_SWEEP = 7 // This must only be used in Thread::SysSweepThreadsForDebug
187 } SUSPEND_REASON;
188
189private:
190 static SUSPEND_REASON m_suspendReason; // This contains the reason
191 // that the runtime was suspended
192 static Thread* m_pThreadAttemptingSuspendForGC;
193
194public:
195 static HRESULT SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason);
196 static void ResumeRuntime(BOOL bFinishedGC, BOOL SuspendSucceded);
197
198 // Initialize thread suspension support
199 static void Initialize();
200
201private:
202 static CLREvent * g_pGCSuspendEvent;
203
204 // This is true iff we're currently in the process of suspending threads. Once the
205 // threads have been suspended, this is false. This is set via an instance of
206 // SuspendRuntimeInProgressHolder placed in SuspendRuntime, SysStartSuspendForDebug,
207 // and SysSweepThreadsForDebug. Code outside Thread reads this via
208 // Thread::SysIsSuspendInProgress.
209 //
210 // *** THERE IS NO SYNCHRONIZATION AROUND SETTING OR READING THIS ***
211 // This value is only useful for code that can be more efficient if it has a good guess
212 // as to whether we're suspending the runtime. This is NOT to be used by code that
213 // *requires* this knowledge with 100% accuracy in order to behave correctly, unless
214 // you add synchronization yourself. An example of where Thread::SysIsSuspendInProgress
215 // is used is by the profiler API, in ProfToEEInterfaceImpl::DoStackSnapshot. The profiler
216 // API needs to suspend the target thread whose stack it is about to walk. But the profiler
217 // API should avoid this if the runtime is being suspended. Otherwise, the thread trying to
218 // suspend the runtime (thread A) might get stuck when it tries to suspend the thread
219 // executing ProfToEEInterfaceImpl::DoStackSnapshot (thread B), since thread B will be
220 // busy trying to suspend the target of the stack walk (thread C). Bad luck with timing
221 // could cause A to try to suspend B over and over again while B is busy suspending C, and
222 // then suspending D, etc., assuming the profiler does a lot of stack walks. This, in turn,
223 // could cause the deadlock detection assert in Thread::SuspendThread to fire. So the
224 // moral here is that, if B realizes the runtime is being suspended, it can just fail the stackwalk
225 // immediately without trying to do the suspend. But if B occasionally gets false positives or
226 // false negatives from calling Thread::SysIsSuspendInProgress, the worst is we might
227 // delay the EE suspension a little bit, or we might too eagerly fail from ProfToEEInterfaceImpl::DoStackSnapshot.
228 // But there won't be any corruption or AV. More details on the profiler API scenario in VsWhidbey bug 454936.
229 static bool s_fSuspendRuntimeInProgress;
230
231 static void SetSuspendRuntimeInProgress();
232 static void ResetSuspendRuntimeInProgress();
233
234 typedef StateHolder<ThreadSuspend::SetSuspendRuntimeInProgress, ThreadSuspend::ResetSuspendRuntimeInProgress> SuspendRuntimeInProgressHolder;
235
236public:
237 static bool SysIsSuspendInProgress() { return s_fSuspendRuntimeInProgress; }
238
239public:
240 //suspend all threads
241 static void SuspendEE(SUSPEND_REASON reason);
242 static void RestartEE(BOOL bFinishedGC, BOOL SuspendSucceded); //resume threads.
243
244 static void LockThreadStore(ThreadSuspend::SUSPEND_REASON reason);
245 static void UnlockThreadStore(BOOL bThreadDestroyed = FALSE,
246 ThreadSuspend::SUSPEND_REASON reason = ThreadSuspend::SUSPEND_OTHER);
247
248 static Thread * GetSuspensionThread()
249 {
250 LIMITED_METHOD_CONTRACT;
251 return g_pSuspensionThread;
252 }
253
254private:
255 // This is used to avoid thread starvation if non-GC threads are competing for
256 // the thread store lock when there is a real GC-thread waiting to get in.
257 // This is initialized lazily when the first non-GC thread backs out because of
258 // a waiting GC thread. The s_hAbortEvtCache is used to store the handle when
259 // it is not being used.
260 static CLREventBase *s_hAbortEvt;
261 static CLREventBase *s_hAbortEvtCache;
262
263 static LONG m_DebugWillSyncCount;
264};
265
266class ThreadStoreLockHolderWithSuspendReason
267{
268public:
269 ThreadStoreLockHolderWithSuspendReason(ThreadSuspend::SUSPEND_REASON reason)
270 {
271 ThreadSuspend::LockThreadStore(reason);
272 }
273 ~ThreadStoreLockHolderWithSuspendReason()
274 {
275 ThreadSuspend::UnlockThreadStore();
276 }
277private:
278 ThreadSuspend::SUSPEND_REASON m_reason;
279};
280
281#endif // _THREAD_SUSPEND_H_
282