| 1 | // Licensed to the .NET Foundation under one or more agreements. | 
|---|
| 2 | // The .NET Foundation licenses this file to you under the MIT license. | 
|---|
| 3 | // See the LICENSE file in the project root for more information. | 
|---|
| 4 | // threadsuspend.h | 
|---|
| 5 |  | 
|---|
| 6 | #ifndef _THREAD_SUSPEND_H_ | 
|---|
| 7 | #define _THREAD_SUSPEND_H_ | 
|---|
| 8 |  | 
|---|
| 9 | #if defined(TIME_SUSPEND) || defined(GC_STATS) | 
|---|
| 10 |  | 
|---|
| 11 | enum timeUnit { usec, msec, sec }; | 
|---|
| 12 |  | 
|---|
| 13 | // running aggregations | 
|---|
| 14 | struct MinMaxTot | 
|---|
| 15 | { | 
|---|
| 16 | DWORD minVal, maxVal, totVal; | 
|---|
| 17 |  | 
|---|
| 18 | void Accumulate(DWORD time) | 
|---|
| 19 | { | 
|---|
| 20 | LIMITED_METHOD_CONTRACT; | 
|---|
| 21 | if (time < minVal || minVal == 0) | 
|---|
| 22 | minVal = time; | 
|---|
| 23 |  | 
|---|
| 24 | if (time > maxVal) | 
|---|
| 25 | maxVal = time; | 
|---|
| 26 |  | 
|---|
| 27 | // We are supposed to anticipate overflow and clear our totals | 
|---|
| 28 | // However we still see this assert and for now, let's ignore it... | 
|---|
| 29 | // _ASSERTE(((DWORD) (totVal + time)) > ((DWORD) totVal)); | 
|---|
| 30 | if (((DWORD) (totVal + time)) > ((DWORD) totVal)) | 
|---|
| 31 | totVal += time; | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 | void Reset() | 
|---|
| 35 | { | 
|---|
| 36 | LIMITED_METHOD_CONTRACT; | 
|---|
| 37 | minVal = maxVal = 0; | 
|---|
| 38 | } | 
|---|
| 39 |  | 
|---|
| 40 | void DisplayAndUpdate(FILE* logFile, __in_z const char *pName, MinMaxTot *pLastOne, int fullCount, int priorCount, timeUnit=usec); | 
|---|
| 41 | }; | 
|---|
| 42 |  | 
|---|
| 43 | // A note about timings.  We use QueryPerformanceCounter to measure all timings in units.  During | 
|---|
| 44 | // Initialization, we compute a divisor to convert those timings into microseconds.  This means | 
|---|
| 45 | // that we can accumulate about 4,000 seconds (over one hour) of GC time into 32-bit quantities | 
|---|
| 46 | // before we must reinitialize. | 
|---|
| 47 |  | 
|---|
| 48 | // A note about performance: derived classes have taken a dependency on cntDisplay being the first | 
|---|
| 49 | // field of this class, following the vtable*. When this is violated a compile time assert will fire. | 
|---|
| 50 | struct StatisticsBase | 
|---|
| 51 | { | 
|---|
| 52 | // display the statistics every so many seconds. | 
|---|
| 53 | static DWORD secondsToDisplay; | 
|---|
| 54 |  | 
|---|
| 55 | // we must re-initialize after an hour of GC time, to avoid overflow.  It's more convenient to | 
|---|
| 56 | // re-initialize after an hour of wall-clock time, instead | 
|---|
| 57 | int cntDisplay; | 
|---|
| 58 |  | 
|---|
| 59 | // convert all timings into microseconds | 
|---|
| 60 | DWORD divisor; | 
|---|
| 61 | DWORD GetTime(); | 
|---|
| 62 | static DWORD GetElapsed(DWORD start, DWORD stop); | 
|---|
| 63 |  | 
|---|
| 64 | // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval. | 
|---|
| 65 | DWORD startTick; | 
|---|
| 66 |  | 
|---|
| 67 | // derived classes must call this regularly (from a logical "end of a cycle") | 
|---|
| 68 | void RollOverIfNeeded(); | 
|---|
| 69 |  | 
|---|
| 70 | virtual void Initialize() = 0; | 
|---|
| 71 | virtual void DisplayAndUpdate() = 0; | 
|---|
| 72 | }; | 
|---|
| 73 |  | 
|---|
| 74 | #endif // defined(TIME_SUSPEND) || defined(GC_STATS) | 
|---|
| 75 |  | 
|---|
| 76 | #ifdef TIME_SUSPEND | 
|---|
| 77 |  | 
|---|
| 78 | struct SuspendStatistics | 
|---|
| 79 | : public StatisticsBase | 
|---|
| 80 | { | 
|---|
| 81 | static WCHAR* logFileName; | 
|---|
| 82 |  | 
|---|
| 83 | // number of times we call SuspendEE, RestartEE | 
|---|
| 84 | int cntSuspends, cntRestarts; | 
|---|
| 85 |  | 
|---|
| 86 | int cntSuspendsInBGC, cntNonGCSuspends, cntNonGCSuspendsInBGC; | 
|---|
| 87 |  | 
|---|
| 88 | // Times for current suspension & restart | 
|---|
| 89 | DWORD startSuspend, startRestart; | 
|---|
| 90 |  | 
|---|
| 91 | // min, max and total time spent performing a Suspend, a Restart, or Paused from the start of | 
|---|
| 92 | // a Suspend to the end of a Restart.  We can compute 'avg' using 'cnt' and 'tot' values. | 
|---|
| 93 | MinMaxTot suspend, restart, paused; | 
|---|
| 94 |  | 
|---|
| 95 | // We know there can be contention on acquiring the ThreadStoreLock. | 
|---|
| 96 | MinMaxTot acquireTSL, releaseTSL; | 
|---|
| 97 |  | 
|---|
| 98 | // And if we OS suspend a thread that is blocking or perhaps throwing an exception and is therefore | 
|---|
| 99 | // stuck in the kernel, it could take approximately a second.  So track the time taken for OS | 
|---|
| 100 | // suspends | 
|---|
| 101 | MinMaxTot osSuspend; | 
|---|
| 102 |  | 
|---|
| 103 | // And if we place a hijack, we need to crawl a stack to do so. | 
|---|
| 104 | MinMaxTot crawl; | 
|---|
| 105 |  | 
|---|
| 106 | // And waiting can be a significant part of the total suspension time. | 
|---|
| 107 | MinMaxTot wait; | 
|---|
| 108 |  | 
|---|
| 109 | /////////////////////////////////////////////////////////////////////////////////////////////// | 
|---|
| 110 | // There are some interesting events that are worth counting, because they show where the time is going: | 
|---|
| 111 |  | 
|---|
| 112 | // number of times we waited on g_pGCSuspendEvent while trying to suspend the EE | 
|---|
| 113 | int cntWaits; | 
|---|
| 114 |  | 
|---|
| 115 | // and the number of times those Waits timed out rather than being signalled by a cooperating thread | 
|---|
| 116 | int cntWaitTimeouts; | 
|---|
| 117 |  | 
|---|
| 118 | // number of times we did an OS (or hosted) suspend or resume on a thread | 
|---|
| 119 | int cntOSSuspendResume; | 
|---|
| 120 |  | 
|---|
| 121 | // number of times we crawled a stack for a hijack | 
|---|
| 122 | int cntHijackCrawl; | 
|---|
| 123 |  | 
|---|
| 124 | // and the number of times the hijack actually trapped a thread for us | 
|---|
| 125 | int cntHijackTrap; | 
|---|
| 126 |  | 
|---|
| 127 | // the number of times we redirected a thread in fully interruptible code, by rewriting its EIP | 
|---|
| 128 | // so it will throw to a blocking point | 
|---|
| 129 | int cntRedirections; | 
|---|
| 130 |  | 
|---|
| 131 | /////////////////////////////////////////////////////////////////////////////////////////////// | 
|---|
| 132 | // And there are some "failure" cases that should never or almost never occur. | 
|---|
| 133 |  | 
|---|
| 134 | // number of times we have a collision between e.g. Debugger suspension & GC suspension. | 
|---|
| 135 | // In these cases, everyone yields to the GC but at some cost. | 
|---|
| 136 | int cntCollideRetry; | 
|---|
| 137 |  | 
|---|
| 138 | // number of times the OS or Host was unable to ::SuspendThread a thread for us.  This count should be | 
|---|
| 139 | // approximately 0. | 
|---|
| 140 | int cntFailedSuspends; | 
|---|
| 141 |  | 
|---|
| 142 | // number of times we were unable to redirect a thread by rewriting its register state in a | 
|---|
| 143 | // suspended context.  This count should be approximately 0. | 
|---|
| 144 | int cntFailedRedirections; | 
|---|
| 145 |  | 
|---|
| 146 | /////////////////////////////////////////////////////////////////////////////////////////////// | 
|---|
| 147 | // Internal mechanism: | 
|---|
| 148 |  | 
|---|
| 149 | virtual void Initialize(); | 
|---|
| 150 | virtual void DisplayAndUpdate(); | 
|---|
| 151 |  | 
|---|
| 152 | // Public API | 
|---|
| 153 |  | 
|---|
| 154 | void StartSuspend(); | 
|---|
| 155 | void EndSuspend(BOOL bForGC); | 
|---|
| 156 | DWORD CurrentSuspend(); | 
|---|
| 157 |  | 
|---|
| 158 | void StartRestart(); | 
|---|
| 159 | void EndRestart(); | 
|---|
| 160 | DWORD CurrentRestart(); | 
|---|
| 161 | }; | 
|---|
| 162 |  | 
|---|
| 163 | extern SuspendStatistics g_SuspendStatistics; | 
|---|
| 164 | extern SuspendStatistics g_LastSuspendStatistics; | 
|---|
| 165 |  | 
|---|
| 166 | #endif // TIME_SUSPEND | 
|---|
| 167 |  | 
|---|
| 168 | BOOL EEGetThreadContext(Thread *pThread, CONTEXT *pContext); | 
|---|
| 169 | BOOL EnsureThreadIsSuspended(HANDLE hThread, Thread* pThread); | 
|---|
| 170 |  | 
|---|
| 171 | class ThreadSuspend | 
|---|
| 172 | { | 
|---|
| 173 | friend class Thread; | 
|---|
| 174 | friend class ThreadStore; | 
|---|
| 175 |  | 
|---|
| 176 | public: | 
|---|
| 177 | typedef enum | 
|---|
| 178 | { | 
|---|
| 179 | SUSPEND_OTHER                   = 0, | 
|---|
| 180 | SUSPEND_FOR_GC                  = 1, | 
|---|
| 181 | SUSPEND_FOR_APPDOMAIN_SHUTDOWN  = 2, | 
|---|
| 182 | SUSPEND_FOR_REJIT               = 3, | 
|---|
| 183 | SUSPEND_FOR_SHUTDOWN            = 4, | 
|---|
| 184 | SUSPEND_FOR_DEBUGGER            = 5, | 
|---|
| 185 | SUSPEND_FOR_GC_PREP             = 6, | 
|---|
| 186 | SUSPEND_FOR_DEBUGGER_SWEEP      = 7     // This must only be used in Thread::SysSweepThreadsForDebug | 
|---|
| 187 | } SUSPEND_REASON; | 
|---|
| 188 |  | 
|---|
| 189 | private: | 
|---|
| 190 | static SUSPEND_REASON    m_suspendReason;    // This contains the reason | 
|---|
| 191 | // that the runtime was suspended | 
|---|
| 192 | static Thread* m_pThreadAttemptingSuspendForGC; | 
|---|
| 193 |  | 
|---|
| 194 | public: | 
|---|
| 195 | static HRESULT SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason); | 
|---|
| 196 | static void    ResumeRuntime(BOOL bFinishedGC, BOOL SuspendSucceded); | 
|---|
| 197 |  | 
|---|
| 198 | // Initialize thread suspension support | 
|---|
| 199 | static void    Initialize(); | 
|---|
| 200 |  | 
|---|
| 201 | private: | 
|---|
| 202 | static CLREvent * g_pGCSuspendEvent; | 
|---|
| 203 |  | 
|---|
| 204 | // This is true iff we're currently in the process of suspending threads.  Once the | 
|---|
| 205 | // threads have been suspended, this is false.  This is set via an instance of | 
|---|
| 206 | // SuspendRuntimeInProgressHolder placed in SuspendRuntime, SysStartSuspendForDebug, | 
|---|
| 207 | // and SysSweepThreadsForDebug.  Code outside Thread reads this via | 
|---|
| 208 | // Thread::SysIsSuspendInProgress. | 
|---|
| 209 | // | 
|---|
| 210 | // *** THERE IS NO SYNCHRONIZATION AROUND SETTING OR READING THIS *** | 
|---|
| 211 | // This value is only useful for code that can be more efficient if it has a good guess | 
|---|
| 212 | // as to whether we're suspending the runtime.  This is NOT to be used by code that | 
|---|
| 213 | // *requires* this knowledge with 100% accuracy in order to behave correctly, unless | 
|---|
| 214 | // you add synchronization yourself.  An example of where Thread::SysIsSuspendInProgress | 
|---|
| 215 | // is used is by the profiler API, in ProfToEEInterfaceImpl::DoStackSnapshot.  The profiler | 
|---|
| 216 | // API needs to suspend the target thread whose stack it is about to walk.  But the profiler | 
|---|
| 217 | // API should avoid this if the runtime is being suspended.  Otherwise, the thread trying to | 
|---|
| 218 | // suspend the runtime (thread A) might get stuck when it tries to suspend the thread | 
|---|
| 219 | // executing ProfToEEInterfaceImpl::DoStackSnapshot (thread B), since thread B will be | 
|---|
| 220 | // busy trying to suspend the target of the stack walk (thread C).  Bad luck with timing | 
|---|
| 221 | // could cause A to try to suspend B over and over again while B is busy suspending C, and | 
|---|
| 222 | // then suspending D, etc., assuming the profiler does a lot of stack walks.  This, in turn, | 
|---|
| 223 | // could cause the deadlock detection assert in Thread::SuspendThread to fire.  So the | 
|---|
| 224 | // moral here is that, if B realizes the runtime is being suspended, it can just fail the stackwalk | 
|---|
| 225 | // immediately without trying to do the suspend.  But if B occasionally gets false positives or | 
|---|
| 226 | // false negatives from calling Thread::SysIsSuspendInProgress, the worst is we might | 
|---|
| 227 | // delay the EE suspension a little bit, or we might too eagerly fail from ProfToEEInterfaceImpl::DoStackSnapshot. | 
|---|
| 228 | // But there won't be any corruption or AV.  More details on the profiler API scenario in VsWhidbey bug 454936. | 
|---|
| 229 | static bool     s_fSuspendRuntimeInProgress; | 
|---|
| 230 |  | 
|---|
| 231 | static void SetSuspendRuntimeInProgress(); | 
|---|
| 232 | static void ResetSuspendRuntimeInProgress(); | 
|---|
| 233 |  | 
|---|
| 234 | typedef StateHolder<ThreadSuspend::SetSuspendRuntimeInProgress, ThreadSuspend::ResetSuspendRuntimeInProgress> SuspendRuntimeInProgressHolder; | 
|---|
| 235 |  | 
|---|
| 236 | public: | 
|---|
| 237 | static bool SysIsSuspendInProgress() { return s_fSuspendRuntimeInProgress; } | 
|---|
| 238 |  | 
|---|
| 239 | public: | 
|---|
| 240 | //suspend all threads | 
|---|
| 241 | static void SuspendEE(SUSPEND_REASON reason); | 
|---|
| 242 | static void RestartEE(BOOL bFinishedGC, BOOL SuspendSucceded); //resume threads. | 
|---|
| 243 |  | 
|---|
| 244 | static void LockThreadStore(ThreadSuspend::SUSPEND_REASON reason); | 
|---|
| 245 | static void UnlockThreadStore(BOOL bThreadDestroyed = FALSE, | 
|---|
| 246 | ThreadSuspend::SUSPEND_REASON reason = ThreadSuspend::SUSPEND_OTHER); | 
|---|
| 247 |  | 
|---|
| 248 | static Thread * GetSuspensionThread() | 
|---|
| 249 | { | 
|---|
| 250 | LIMITED_METHOD_CONTRACT; | 
|---|
| 251 | return g_pSuspensionThread; | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | private: | 
|---|
| 255 | // This is used to avoid thread starvation if non-GC threads are competing for | 
|---|
| 256 | // the thread store lock when there is a real GC-thread waiting to get in. | 
|---|
| 257 | // This is initialized lazily when the first non-GC thread backs out because of | 
|---|
| 258 | // a waiting GC thread.  The s_hAbortEvtCache is used to store the handle when | 
|---|
| 259 | // it is not being used. | 
|---|
| 260 | static CLREventBase *s_hAbortEvt; | 
|---|
| 261 | static CLREventBase *s_hAbortEvtCache; | 
|---|
| 262 |  | 
|---|
| 263 | static LONG m_DebugWillSyncCount; | 
|---|
| 264 | }; | 
|---|
| 265 |  | 
|---|
| 266 | class ThreadStoreLockHolderWithSuspendReason | 
|---|
| 267 | { | 
|---|
| 268 | public: | 
|---|
| 269 | ThreadStoreLockHolderWithSuspendReason(ThreadSuspend::SUSPEND_REASON reason) | 
|---|
| 270 | { | 
|---|
| 271 | ThreadSuspend::LockThreadStore(reason); | 
|---|
| 272 | } | 
|---|
| 273 | ~ThreadStoreLockHolderWithSuspendReason() | 
|---|
| 274 | { | 
|---|
| 275 | ThreadSuspend::UnlockThreadStore(); | 
|---|
| 276 | } | 
|---|
| 277 | private: | 
|---|
| 278 | ThreadSuspend::SUSPEND_REASON m_reason; | 
|---|
| 279 | }; | 
|---|
| 280 |  | 
|---|
| 281 | #endif // _THREAD_SUSPEND_H_ | 
|---|
| 282 |  | 
|---|