1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | //***************************************************************************** |
5 | // File: Canary.cpp |
6 | // |
7 | |
8 | // |
9 | // Canary for debugger helper thread. This will sniff out if it's safe to take locks. |
10 | // |
11 | //***************************************************************************** |
12 | |
13 | #include "stdafx.h" |
14 | |
15 | |
16 | //----------------------------------------------------------------------------- |
17 | // Ctor for HelperCanary class |
18 | //----------------------------------------------------------------------------- |
19 | HelperCanary::HelperCanary() |
20 | { |
21 | m_hCanaryThread = NULL; |
22 | m_CanaryThreadId = 0; |
23 | m_RequestCounter = 0; |
24 | m_AnswerCounter = 0; |
25 | m_fStop = false; |
26 | |
27 | m_fCachedValid = false; |
28 | m_fCachedAnswer = false; |
29 | m_initialized = false; |
30 | } |
31 | |
32 | //----------------------------------------------------------------------------- |
33 | // Dtor for class |
34 | //----------------------------------------------------------------------------- |
35 | HelperCanary::~HelperCanary() |
36 | { |
37 | // Since we're deleting this memory, we need to kill the canary thread. |
38 | m_fStop = true; |
39 | SetEvent(m_hPingEvent); |
40 | |
41 | // m_hPingEvent dtor will close handle |
42 | WaitForSingleObject(m_hCanaryThread, INFINITE); |
43 | } |
44 | |
45 | //----------------------------------------------------------------------------- |
46 | // Clear the cached value for AreLocksAvailable(); |
47 | //----------------------------------------------------------------------------- |
48 | void HelperCanary::ClearCache() |
49 | { |
50 | _ASSERTE(ThisIsHelperThreadWorker()); |
51 | m_fCachedValid = false; |
52 | } |
53 | |
54 | //----------------------------------------------------------------------------- |
55 | // The helper thread can call this to determine if it can safely take a certain |
56 | // set of locks (mainly the heap lock(s)). The canary thread will go off and |
57 | // try and take these and report back to the helper w/o ever blocking the |
58 | // helper. |
59 | // |
60 | // Returns 'true' if it's safe for helper to take locks; else false. |
61 | // We err on the side of safety (returning false). |
62 | //----------------------------------------------------------------------------- |
63 | bool HelperCanary::AreLocksAvailable() |
64 | { |
65 | // If we're not on the helper thread, then we're guaranteed safe. |
66 | // We check this to support MaybeHelperThread code. |
67 | if (!ThisIsHelperThreadWorker()) |
68 | { |
69 | return true; |
70 | } |
71 | |
72 | if (m_fCachedValid) |
73 | { |
74 | return m_fCachedAnswer; |
75 | } |
76 | |
77 | // Cache the answer. |
78 | m_fCachedAnswer = AreLocksAvailableWorker(); |
79 | m_fCachedValid = true; |
80 | |
81 | #ifdef _DEBUG |
82 | // For managed-only debugging, we should always be safe. |
83 | if (!g_pRCThread->GetDCB()->m_rightSideIsWin32Debugger) |
84 | { |
85 | _ASSERTE(m_fCachedAnswer || !"Canary returned false in Managed-debugger" ); |
86 | } |
87 | |
88 | // For debug, nice to be able to enable an assert that tells us if this situation is actually happening. |
89 | if (!m_fCachedAnswer) |
90 | { |
91 | static BOOL shouldBreak = -1; |
92 | if (shouldBreak == -1) |
93 | { |
94 | shouldBreak = UnsafeGetConfigDWORD(CLRConfig::INTERNAL_DbgBreakIfLocksUnavailable); |
95 | } |
96 | if (shouldBreak) |
97 | { |
98 | _ASSERTE(!"Potential deadlock detected.\nLocks that the helper thread may need are currently held by other threads." ); |
99 | } |
100 | } |
101 | #endif // _DEBUG |
102 | |
103 | return m_fCachedAnswer; |
104 | } |
105 | |
106 | //----------------------------------------------------------------------------- |
107 | // Creates the canary thread and signaling events. |
108 | //----------------------------------------------------------------------------- |
109 | void HelperCanary::Init() |
110 | { |
111 | // You can only run the init code once. The debugger attempts to lazy-init |
112 | // the canary at several points but if the canary is already inited then |
113 | // we just eagerly return. See issue 841005 for more details. |
114 | if(m_initialized) |
115 | { |
116 | return; |
117 | } |
118 | else |
119 | { |
120 | m_initialized = true; |
121 | } |
122 | |
123 | m_hPingEvent = WszCreateEvent(NULL, (BOOL) kAutoResetEvent, FALSE, NULL); |
124 | if (m_hPingEvent == NULL) |
125 | { |
126 | STRESS_LOG1(LF_CORDB, LL_ALWAYS, "Canary failed to create ping event. gle=%d\n" , GetLastError()); |
127 | // in the past if we failed to start the thread we just assumed it was unsafe |
128 | // so I am preserving that behavior. However I am going to assert that this |
129 | // doesn't really happen |
130 | _ASSERTE(!"Canary failed to create ping event" ); |
131 | return; |
132 | } |
133 | |
134 | m_hWaitEvent = WszCreateEvent(NULL, (BOOL) kManualResetEvent, FALSE, NULL); |
135 | if (m_hWaitEvent == NULL) |
136 | { |
137 | STRESS_LOG1(LF_CORDB, LL_ALWAYS, "Canary failed to create wait event. gle=%d\n" , GetLastError()); |
138 | // in the past if we failed to start the thread we just assumed it was unsafe |
139 | // so I am preserving that behavior. However I am going to assert that this |
140 | // doesn't really happen |
141 | _ASSERTE(!"Canary failed to create wait event" ); |
142 | return; |
143 | } |
144 | |
145 | // Spin up the canary. This will call dllmain, but that's ok because it just |
146 | // degenerates to our timeout case. |
147 | const DWORD flags = CREATE_SUSPENDED; |
148 | m_hCanaryThread = CreateThread(NULL, 0, |
149 | HelperCanary::ThreadProc, this, |
150 | flags, &m_CanaryThreadId); |
151 | |
152 | // in the past if we failed to start the thread we just assumed it was unsafe |
153 | // so I am preserving that behavior. However I am going to assert that this |
154 | // doesn't really happen |
155 | if(m_hCanaryThread == NULL) |
156 | { |
157 | _ASSERTE(!"CreateThread() failed to create Canary thread" ); |
158 | return; |
159 | } |
160 | |
161 | // Capture the Canary thread's TID so that the RS can mark it as a can't-stop region. |
162 | // This is essential so that the RS doesn't view it as some external thread to be suspended when we hit |
163 | // debug events. |
164 | _ASSERTE(g_pRCThread != NULL); |
165 | g_pRCThread->GetDCB()->m_CanaryThreadId = m_CanaryThreadId; |
166 | |
167 | ResumeThread(m_hCanaryThread); |
168 | } |
169 | |
170 | |
171 | //----------------------------------------------------------------------------- |
172 | // Does real work for AreLocksAvailable(), minus caching. |
173 | //----------------------------------------------------------------------------- |
174 | bool HelperCanary::AreLocksAvailableWorker() |
175 | { |
176 | #if _DEBUG |
177 | // For debugging, allow a way to force the canary to fail, and thus test our |
178 | // failure paths. |
179 | static BOOL fShortcut= -1; |
180 | if (fShortcut == -1) |
181 | { |
182 | fShortcut = UnsafeGetConfigDWORD(CLRConfig::INTERNAL_DbgShortcutCanary); |
183 | } |
184 | if (fShortcut == 1) |
185 | { |
186 | return false; |
187 | } |
188 | if (fShortcut == 2) |
189 | { |
190 | return true; |
191 | } |
192 | #endif |
193 | |
194 | // We used to do lazy init but that is dangerous... CreateThread |
195 | // allocates some memory which can block on a lock, exactly the |
196 | // situation we are attempting to detect and not block on. |
197 | // Instead we spin up the canary in advance and if that failed then |
198 | // assume unsafe |
199 | if(m_CanaryThreadId == 0) |
200 | { |
201 | _ASSERTE(!"We shouldn't be lazy initing the canary anymore" ); |
202 | return false; |
203 | } |
204 | |
205 | // Canary will take the locks of interest and then set the Answer counter equal to our request counter. |
206 | m_RequestCounter = m_RequestCounter + 1; |
207 | ResetEvent(m_hWaitEvent); |
208 | SetEvent(m_hPingEvent); |
209 | |
210 | // Spin waiting for answer. If canary gets back to us, then the locks must be free and so it's safe for helper-thread. |
211 | // If we timeout, then we err on the side of safety and assume canary blocked on a lock and so it's not safe |
212 | // for the helper thread to take those locks. |
213 | // We explicitly have a simple spin-wait instead of using win32 events because we want something simple and |
214 | // provably correct. Since we already need the spin-wait for the counters, adding an extra win32 event |
215 | // to get rid of the sleep would be additional complexity and race windows without a clear benefit. |
216 | |
217 | // We need to track what iteration of "AreLocksAvailable" the helper is on. Say canary sniffs two locks, now Imagine if: |
218 | // 1) Helper calls AreLocksAvailable, |
219 | // 2) the canary does get blocked on lock #1, |
220 | // 3) process resumes, canary now gets + releases lock #1, |
221 | // 4) another random thread takes lock #1 |
222 | // 5) then helper calls AreLocksAvailable again later |
223 | // 6) then the canary finally finishes. Note it's never tested lock #1 on the 2nd iteration. |
224 | // We don't want the canary's response initiated from the 1st request to impact the Helper's 2nd request. |
225 | // Thus we keep a request / answer counter to make sure that the canary tests all locks on the same iteration. |
226 | DWORD retry = 0; |
227 | |
228 | const DWORD msSleepSteadyState = 150; // sleep time in ms |
229 | const DWORD maxRetry = 15; // number of times to try. |
230 | DWORD msSleep = 80; // how much to sleep on first iteration. |
231 | |
232 | while(m_RequestCounter != m_AnswerCounter) |
233 | { |
234 | retry ++; |
235 | if (retry > maxRetry) |
236 | { |
237 | STRESS_LOG0(LF_CORDB, LL_ALWAYS, "Canary timed out!\n" ); |
238 | return false; |
239 | } |
240 | |
241 | // We'll either timeout (in which case it's like a Sleep(), or |
242 | // get the event, which shortcuts the sleep. |
243 | WaitForSingleObject(m_hWaitEvent, msSleep); |
244 | |
245 | // In case a stale answer sets the wait event high, reset it now to avoid us doing |
246 | // a live spin-lock. |
247 | ResetEvent(m_hWaitEvent); |
248 | |
249 | |
250 | msSleep = msSleepSteadyState; |
251 | } |
252 | |
253 | // Canary made it on same Request iteration, so it must be safe! |
254 | return true; |
255 | } |
256 | |
257 | //----------------------------------------------------------------------------- |
258 | // Real OS thread proc for Canary thread. |
259 | // param - 'this' pointer for HelperCanary |
260 | // return value - meaningless, but threads need to return something. |
261 | //----------------------------------------------------------------------------- |
262 | DWORD HelperCanary::ThreadProc(LPVOID param) |
263 | { |
264 | _ASSERTE(!ThisIsHelperThreadWorker()); |
265 | |
266 | STRESS_LOG0(LF_CORDB, LL_ALWAYS, "Canary thread spun up\n" ); |
267 | HelperCanary * pThis = reinterpret_cast<HelperCanary*> (param); |
268 | pThis->ThreadProc(); |
269 | _ASSERTE(pThis->m_fStop); |
270 | STRESS_LOG0(LF_CORDB, LL_ALWAYS, "Canary thread exiting\n" ); |
271 | |
272 | return 0; |
273 | } |
274 | |
275 | //----------------------------------------------------------------------------- |
276 | // Real implementation of Canary Thread. |
277 | // Single canary thread is reused after creation. |
278 | //----------------------------------------------------------------------------- |
279 | void HelperCanary::ThreadProc() |
280 | { |
281 | _ASSERTE(m_CanaryThreadId == GetCurrentThreadId()); |
282 | |
283 | while(true) |
284 | { |
285 | WaitForSingleObject(m_hPingEvent, INFINITE); |
286 | |
287 | m_AnswerCounter = 0; |
288 | DWORD dwRequest = m_RequestCounter; |
289 | |
290 | if (m_fStop) |
291 | { |
292 | return; |
293 | } |
294 | STRESS_LOG2(LF_CORDB, LL_ALWAYS, "stage:%d,req:%d" , 0, dwRequest); |
295 | |
296 | // Now take the locks of interest. This could block indefinitely. If this blocks, we may even get multiple requests. |
297 | TakeLocks(); |
298 | |
299 | m_AnswerCounter = dwRequest; |
300 | |
301 | // Set wait event to let Requesting thread shortcut its spin lock. This is purely an |
302 | // optimization because requesting thread will still check Answer/Request counters. |
303 | // That protects us from recyling bugs. |
304 | SetEvent(m_hWaitEvent); |
305 | } |
306 | } |
307 | |
308 | //----------------------------------------------------------------------------- |
309 | // Try and take locks. |
310 | //----------------------------------------------------------------------------- |
311 | void HelperCanary::TakeLocks() |
312 | { |
313 | _ASSERTE(::GetThread() == NULL); // Canary Thread should always be outside the runtime. |
314 | _ASSERTE(m_CanaryThreadId == GetCurrentThreadId()); |
315 | |
316 | // Call new, which will take whatever standard heap locks there are. |
317 | // We don't care about what memory we get; we just want to take the heap lock(s). |
318 | DWORD * p = new (nothrow) DWORD(); |
319 | delete p; |
320 | |
321 | STRESS_LOG1(LF_CORDB, LL_ALWAYS, "canary stage:%d\n" , 1); |
322 | } |
323 | |
324 | |
325 | |