| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | // threadsuspend.h |
| 5 | |
| 6 | #ifndef _THREAD_SUSPEND_H_ |
| 7 | #define _THREAD_SUSPEND_H_ |
| 8 | |
| 9 | #if defined(TIME_SUSPEND) || defined(GC_STATS) |
| 10 | |
| 11 | enum timeUnit { usec, msec, sec }; |
| 12 | |
| 13 | // running aggregations |
| 14 | struct MinMaxTot |
| 15 | { |
| 16 | DWORD minVal, maxVal, totVal; |
| 17 | |
| 18 | void Accumulate(DWORD time) |
| 19 | { |
| 20 | LIMITED_METHOD_CONTRACT; |
| 21 | if (time < minVal || minVal == 0) |
| 22 | minVal = time; |
| 23 | |
| 24 | if (time > maxVal) |
| 25 | maxVal = time; |
| 26 | |
| 27 | // We are supposed to anticipate overflow and clear our totals |
| 28 | // However we still see this assert and for now, let's ignore it... |
| 29 | // _ASSERTE(((DWORD) (totVal + time)) > ((DWORD) totVal)); |
| 30 | if (((DWORD) (totVal + time)) > ((DWORD) totVal)) |
| 31 | totVal += time; |
| 32 | } |
| 33 | |
| 34 | void Reset() |
| 35 | { |
| 36 | LIMITED_METHOD_CONTRACT; |
| 37 | minVal = maxVal = 0; |
| 38 | } |
| 39 | |
| 40 | void DisplayAndUpdate(FILE* logFile, __in_z const char *pName, MinMaxTot *pLastOne, int fullCount, int priorCount, timeUnit=usec); |
| 41 | }; |
| 42 | |
| 43 | // A note about timings. We use QueryPerformanceCounter to measure all timings in units. During |
| 44 | // Initialization, we compute a divisor to convert those timings into microseconds. This means |
| 45 | // that we can accumulate about 4,000 seconds (over one hour) of GC time into 32-bit quantities |
| 46 | // before we must reinitialize. |
| 47 | |
| 48 | // A note about performance: derived classes have taken a dependency on cntDisplay being the first |
| 49 | // field of this class, following the vtable*. When this is violated a compile time assert will fire. |
| 50 | struct StatisticsBase |
| 51 | { |
| 52 | // display the statistics every so many seconds. |
| 53 | static DWORD secondsToDisplay; |
| 54 | |
| 55 | // we must re-initialize after an hour of GC time, to avoid overflow. It's more convenient to |
| 56 | // re-initialize after an hour of wall-clock time, instead |
| 57 | int cntDisplay; |
| 58 | |
| 59 | // convert all timings into microseconds |
| 60 | DWORD divisor; |
| 61 | DWORD GetTime(); |
| 62 | static DWORD GetElapsed(DWORD start, DWORD stop); |
| 63 | |
| 64 | // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval. |
| 65 | DWORD startTick; |
| 66 | |
| 67 | // derived classes must call this regularly (from a logical "end of a cycle") |
| 68 | void RollOverIfNeeded(); |
| 69 | |
| 70 | virtual void Initialize() = 0; |
| 71 | virtual void DisplayAndUpdate() = 0; |
| 72 | }; |
| 73 | |
| 74 | #endif // defined(TIME_SUSPEND) || defined(GC_STATS) |
| 75 | |
| 76 | #ifdef TIME_SUSPEND |
| 77 | |
| 78 | struct SuspendStatistics |
| 79 | : public StatisticsBase |
| 80 | { |
| 81 | static WCHAR* logFileName; |
| 82 | |
| 83 | // number of times we call SuspendEE, RestartEE |
| 84 | int cntSuspends, cntRestarts; |
| 85 | |
| 86 | int cntSuspendsInBGC, cntNonGCSuspends, cntNonGCSuspendsInBGC; |
| 87 | |
| 88 | // Times for current suspension & restart |
| 89 | DWORD startSuspend, startRestart; |
| 90 | |
| 91 | // min, max and total time spent performing a Suspend, a Restart, or Paused from the start of |
| 92 | // a Suspend to the end of a Restart. We can compute 'avg' using 'cnt' and 'tot' values. |
| 93 | MinMaxTot suspend, restart, paused; |
| 94 | |
| 95 | // We know there can be contention on acquiring the ThreadStoreLock. |
| 96 | MinMaxTot acquireTSL, releaseTSL; |
| 97 | |
| 98 | // And if we OS suspend a thread that is blocking or perhaps throwing an exception and is therefore |
| 99 | // stuck in the kernel, it could take approximately a second. So track the time taken for OS |
| 100 | // suspends |
| 101 | MinMaxTot osSuspend; |
| 102 | |
| 103 | // And if we place a hijack, we need to crawl a stack to do so. |
| 104 | MinMaxTot crawl; |
| 105 | |
| 106 | // And waiting can be a significant part of the total suspension time. |
| 107 | MinMaxTot wait; |
| 108 | |
| 109 | /////////////////////////////////////////////////////////////////////////////////////////////// |
| 110 | // There are some interesting events that are worth counting, because they show where the time is going: |
| 111 | |
| 112 | // number of times we waited on g_pGCSuspendEvent while trying to suspend the EE |
| 113 | int cntWaits; |
| 114 | |
| 115 | // and the number of times those Waits timed out rather than being signalled by a cooperating thread |
| 116 | int cntWaitTimeouts; |
| 117 | |
| 118 | // number of times we did an OS (or hosted) suspend or resume on a thread |
| 119 | int cntOSSuspendResume; |
| 120 | |
| 121 | // number of times we crawled a stack for a hijack |
| 122 | int cntHijackCrawl; |
| 123 | |
| 124 | // and the number of times the hijack actually trapped a thread for us |
| 125 | int cntHijackTrap; |
| 126 | |
| 127 | // the number of times we redirected a thread in fully interruptible code, by rewriting its EIP |
| 128 | // so it will throw to a blocking point |
| 129 | int cntRedirections; |
| 130 | |
| 131 | /////////////////////////////////////////////////////////////////////////////////////////////// |
| 132 | // And there are some "failure" cases that should never or almost never occur. |
| 133 | |
| 134 | // number of times we have a collision between e.g. Debugger suspension & GC suspension. |
| 135 | // In these cases, everyone yields to the GC but at some cost. |
| 136 | int cntCollideRetry; |
| 137 | |
| 138 | // number of times the OS or Host was unable to ::SuspendThread a thread for us. This count should be |
| 139 | // approximately 0. |
| 140 | int cntFailedSuspends; |
| 141 | |
| 142 | // number of times we were unable to redirect a thread by rewriting its register state in a |
| 143 | // suspended context. This count should be approximately 0. |
| 144 | int cntFailedRedirections; |
| 145 | |
| 146 | /////////////////////////////////////////////////////////////////////////////////////////////// |
| 147 | // Internal mechanism: |
| 148 | |
| 149 | virtual void Initialize(); |
| 150 | virtual void DisplayAndUpdate(); |
| 151 | |
| 152 | // Public API |
| 153 | |
| 154 | void StartSuspend(); |
| 155 | void EndSuspend(BOOL bForGC); |
| 156 | DWORD CurrentSuspend(); |
| 157 | |
| 158 | void StartRestart(); |
| 159 | void EndRestart(); |
| 160 | DWORD CurrentRestart(); |
| 161 | }; |
| 162 | |
| 163 | extern SuspendStatistics g_SuspendStatistics; |
| 164 | extern SuspendStatistics g_LastSuspendStatistics; |
| 165 | |
| 166 | #endif // TIME_SUSPEND |
| 167 | |
| 168 | BOOL EEGetThreadContext(Thread *pThread, CONTEXT *pContext); |
| 169 | BOOL EnsureThreadIsSuspended(HANDLE hThread, Thread* pThread); |
| 170 | |
| 171 | class ThreadSuspend |
| 172 | { |
| 173 | friend class Thread; |
| 174 | friend class ThreadStore; |
| 175 | |
| 176 | public: |
| 177 | typedef enum |
| 178 | { |
| 179 | SUSPEND_OTHER = 0, |
| 180 | SUSPEND_FOR_GC = 1, |
| 181 | SUSPEND_FOR_APPDOMAIN_SHUTDOWN = 2, |
| 182 | SUSPEND_FOR_REJIT = 3, |
| 183 | SUSPEND_FOR_SHUTDOWN = 4, |
| 184 | SUSPEND_FOR_DEBUGGER = 5, |
| 185 | SUSPEND_FOR_GC_PREP = 6, |
| 186 | SUSPEND_FOR_DEBUGGER_SWEEP = 7 // This must only be used in Thread::SysSweepThreadsForDebug |
| 187 | } SUSPEND_REASON; |
| 188 | |
| 189 | private: |
| 190 | static SUSPEND_REASON m_suspendReason; // This contains the reason |
| 191 | // that the runtime was suspended |
| 192 | static Thread* m_pThreadAttemptingSuspendForGC; |
| 193 | |
| 194 | public: |
| 195 | static HRESULT SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason); |
| 196 | static void ResumeRuntime(BOOL bFinishedGC, BOOL SuspendSucceded); |
| 197 | |
| 198 | // Initialize thread suspension support |
| 199 | static void Initialize(); |
| 200 | |
| 201 | private: |
| 202 | static CLREvent * g_pGCSuspendEvent; |
| 203 | |
| 204 | // This is true iff we're currently in the process of suspending threads. Once the |
| 205 | // threads have been suspended, this is false. This is set via an instance of |
| 206 | // SuspendRuntimeInProgressHolder placed in SuspendRuntime, SysStartSuspendForDebug, |
| 207 | // and SysSweepThreadsForDebug. Code outside Thread reads this via |
| 208 | // Thread::SysIsSuspendInProgress. |
| 209 | // |
| 210 | // *** THERE IS NO SYNCHRONIZATION AROUND SETTING OR READING THIS *** |
| 211 | // This value is only useful for code that can be more efficient if it has a good guess |
| 212 | // as to whether we're suspending the runtime. This is NOT to be used by code that |
| 213 | // *requires* this knowledge with 100% accuracy in order to behave correctly, unless |
| 214 | // you add synchronization yourself. An example of where Thread::SysIsSuspendInProgress |
| 215 | // is used is by the profiler API, in ProfToEEInterfaceImpl::DoStackSnapshot. The profiler |
| 216 | // API needs to suspend the target thread whose stack it is about to walk. But the profiler |
| 217 | // API should avoid this if the runtime is being suspended. Otherwise, the thread trying to |
| 218 | // suspend the runtime (thread A) might get stuck when it tries to suspend the thread |
| 219 | // executing ProfToEEInterfaceImpl::DoStackSnapshot (thread B), since thread B will be |
| 220 | // busy trying to suspend the target of the stack walk (thread C). Bad luck with timing |
| 221 | // could cause A to try to suspend B over and over again while B is busy suspending C, and |
| 222 | // then suspending D, etc., assuming the profiler does a lot of stack walks. This, in turn, |
| 223 | // could cause the deadlock detection assert in Thread::SuspendThread to fire. So the |
| 224 | // moral here is that, if B realizes the runtime is being suspended, it can just fail the stackwalk |
| 225 | // immediately without trying to do the suspend. But if B occasionally gets false positives or |
| 226 | // false negatives from calling Thread::SysIsSuspendInProgress, the worst is we might |
| 227 | // delay the EE suspension a little bit, or we might too eagerly fail from ProfToEEInterfaceImpl::DoStackSnapshot. |
| 228 | // But there won't be any corruption or AV. More details on the profiler API scenario in VsWhidbey bug 454936. |
| 229 | static bool s_fSuspendRuntimeInProgress; |
| 230 | |
| 231 | static void SetSuspendRuntimeInProgress(); |
| 232 | static void ResetSuspendRuntimeInProgress(); |
| 233 | |
| 234 | typedef StateHolder<ThreadSuspend::SetSuspendRuntimeInProgress, ThreadSuspend::ResetSuspendRuntimeInProgress> SuspendRuntimeInProgressHolder; |
| 235 | |
| 236 | public: |
| 237 | static bool SysIsSuspendInProgress() { return s_fSuspendRuntimeInProgress; } |
| 238 | |
| 239 | public: |
| 240 | //suspend all threads |
| 241 | static void SuspendEE(SUSPEND_REASON reason); |
| 242 | static void RestartEE(BOOL bFinishedGC, BOOL SuspendSucceded); //resume threads. |
| 243 | |
| 244 | static void LockThreadStore(ThreadSuspend::SUSPEND_REASON reason); |
| 245 | static void UnlockThreadStore(BOOL bThreadDestroyed = FALSE, |
| 246 | ThreadSuspend::SUSPEND_REASON reason = ThreadSuspend::SUSPEND_OTHER); |
| 247 | |
| 248 | static Thread * GetSuspensionThread() |
| 249 | { |
| 250 | LIMITED_METHOD_CONTRACT; |
| 251 | return g_pSuspensionThread; |
| 252 | } |
| 253 | |
| 254 | private: |
| 255 | // This is used to avoid thread starvation if non-GC threads are competing for |
| 256 | // the thread store lock when there is a real GC-thread waiting to get in. |
| 257 | // This is initialized lazily when the first non-GC thread backs out because of |
| 258 | // a waiting GC thread. The s_hAbortEvtCache is used to store the handle when |
| 259 | // it is not being used. |
| 260 | static CLREventBase *s_hAbortEvt; |
| 261 | static CLREventBase *s_hAbortEvtCache; |
| 262 | |
| 263 | static LONG m_DebugWillSyncCount; |
| 264 | }; |
| 265 | |
| 266 | class ThreadStoreLockHolderWithSuspendReason |
| 267 | { |
| 268 | public: |
| 269 | ThreadStoreLockHolderWithSuspendReason(ThreadSuspend::SUSPEND_REASON reason) |
| 270 | { |
| 271 | ThreadSuspend::LockThreadStore(reason); |
| 272 | } |
| 273 | ~ThreadStoreLockHolderWithSuspendReason() |
| 274 | { |
| 275 | ThreadSuspend::UnlockThreadStore(); |
| 276 | } |
| 277 | private: |
| 278 | ThreadSuspend::SUSPEND_REASON m_reason; |
| 279 | }; |
| 280 | |
| 281 | #endif // _THREAD_SUSPEND_H_ |
| 282 | |