1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4//*****************************************************************************
5// File: Canary.cpp
6//
7
8//
9// Canary for debugger helper thread. This will sniff out if it's safe to take locks.
10//
11//*****************************************************************************
12
13#include "stdafx.h"
14
15
16//-----------------------------------------------------------------------------
17// Ctor for HelperCanary class
18//-----------------------------------------------------------------------------
19HelperCanary::HelperCanary()
20{
21 m_hCanaryThread = NULL;
22 m_CanaryThreadId = 0;
23 m_RequestCounter = 0;
24 m_AnswerCounter = 0;
25 m_fStop = false;
26
27 m_fCachedValid = false;
28 m_fCachedAnswer = false;
29 m_initialized = false;
30}
31
32//-----------------------------------------------------------------------------
33// Dtor for class
34//-----------------------------------------------------------------------------
35HelperCanary::~HelperCanary()
36{
37 // Since we're deleting this memory, we need to kill the canary thread.
38 m_fStop = true;
39 SetEvent(m_hPingEvent);
40
41 // m_hPingEvent dtor will close handle
42 WaitForSingleObject(m_hCanaryThread, INFINITE);
43}
44
45//-----------------------------------------------------------------------------
46// Clear the cached value for AreLocksAvailable();
47//-----------------------------------------------------------------------------
48void HelperCanary::ClearCache()
49{
50 _ASSERTE(ThisIsHelperThreadWorker());
51 m_fCachedValid = false;
52}
53
54//-----------------------------------------------------------------------------
55// The helper thread can call this to determine if it can safely take a certain
56// set of locks (mainly the heap lock(s)). The canary thread will go off and
57// try and take these and report back to the helper w/o ever blocking the
58// helper.
59//
60// Returns 'true' if it's safe for helper to take locks; else false.
61// We err on the side of safety (returning false).
62//-----------------------------------------------------------------------------
63bool HelperCanary::AreLocksAvailable()
64{
65 // If we're not on the helper thread, then we're guaranteed safe.
66 // We check this to support MaybeHelperThread code.
67 if (!ThisIsHelperThreadWorker())
68 {
69 return true;
70 }
71
72 if (m_fCachedValid)
73 {
74 return m_fCachedAnswer;
75 }
76
77 // Cache the answer.
78 m_fCachedAnswer = AreLocksAvailableWorker();
79 m_fCachedValid = true;
80
81#ifdef _DEBUG
82 // For managed-only debugging, we should always be safe.
83 if (!g_pRCThread->GetDCB()->m_rightSideIsWin32Debugger)
84 {
85 _ASSERTE(m_fCachedAnswer || !"Canary returned false in Managed-debugger");
86 }
87
88 // For debug, nice to be able to enable an assert that tells us if this situation is actually happening.
89 if (!m_fCachedAnswer)
90 {
91 static BOOL shouldBreak = -1;
92 if (shouldBreak == -1)
93 {
94 shouldBreak = UnsafeGetConfigDWORD(CLRConfig::INTERNAL_DbgBreakIfLocksUnavailable);
95 }
96 if (shouldBreak)
97 {
98 _ASSERTE(!"Potential deadlock detected.\nLocks that the helper thread may need are currently held by other threads.");
99 }
100 }
101#endif // _DEBUG
102
103 return m_fCachedAnswer;
104}
105
106//-----------------------------------------------------------------------------
107// Creates the canary thread and signaling events.
108//-----------------------------------------------------------------------------
109void HelperCanary::Init()
110{
111 // You can only run the init code once. The debugger attempts to lazy-init
112 // the canary at several points but if the canary is already inited then
113 // we just eagerly return. See issue 841005 for more details.
114 if(m_initialized)
115 {
116 return;
117 }
118 else
119 {
120 m_initialized = true;
121 }
122
123 m_hPingEvent = WszCreateEvent(NULL, (BOOL) kAutoResetEvent, FALSE, NULL);
124 if (m_hPingEvent == NULL)
125 {
126 STRESS_LOG1(LF_CORDB, LL_ALWAYS, "Canary failed to create ping event. gle=%d\n", GetLastError());
127 // in the past if we failed to start the thread we just assumed it was unsafe
128 // so I am preserving that behavior. However I am going to assert that this
129 // doesn't really happen
130 _ASSERTE(!"Canary failed to create ping event");
131 return;
132 }
133
134 m_hWaitEvent = WszCreateEvent(NULL, (BOOL) kManualResetEvent, FALSE, NULL);
135 if (m_hWaitEvent == NULL)
136 {
137 STRESS_LOG1(LF_CORDB, LL_ALWAYS, "Canary failed to create wait event. gle=%d\n", GetLastError());
138 // in the past if we failed to start the thread we just assumed it was unsafe
139 // so I am preserving that behavior. However I am going to assert that this
140 // doesn't really happen
141 _ASSERTE(!"Canary failed to create wait event");
142 return;
143 }
144
145 // Spin up the canary. This will call dllmain, but that's ok because it just
146 // degenerates to our timeout case.
147 const DWORD flags = CREATE_SUSPENDED;
148 m_hCanaryThread = CreateThread(NULL, 0,
149 HelperCanary::ThreadProc, this,
150 flags, &m_CanaryThreadId);
151
152 // in the past if we failed to start the thread we just assumed it was unsafe
153 // so I am preserving that behavior. However I am going to assert that this
154 // doesn't really happen
155 if(m_hCanaryThread == NULL)
156 {
157 _ASSERTE(!"CreateThread() failed to create Canary thread");
158 return;
159 }
160
161 // Capture the Canary thread's TID so that the RS can mark it as a can't-stop region.
162 // This is essential so that the RS doesn't view it as some external thread to be suspended when we hit
163 // debug events.
164 _ASSERTE(g_pRCThread != NULL);
165 g_pRCThread->GetDCB()->m_CanaryThreadId = m_CanaryThreadId;
166
167 ResumeThread(m_hCanaryThread);
168}
169
170
171//-----------------------------------------------------------------------------
172// Does real work for AreLocksAvailable(), minus caching.
173//-----------------------------------------------------------------------------
174bool HelperCanary::AreLocksAvailableWorker()
175{
176#if _DEBUG
177 // For debugging, allow a way to force the canary to fail, and thus test our
178 // failure paths.
179 static BOOL fShortcut= -1;
180 if (fShortcut == -1)
181 {
182 fShortcut = UnsafeGetConfigDWORD(CLRConfig::INTERNAL_DbgShortcutCanary);
183 }
184 if (fShortcut == 1)
185 {
186 return false;
187 }
188 if (fShortcut == 2)
189 {
190 return true;
191 }
192#endif
193
194 // We used to do lazy init but that is dangerous... CreateThread
195 // allocates some memory which can block on a lock, exactly the
196 // situation we are attempting to detect and not block on.
197 // Instead we spin up the canary in advance and if that failed then
198 // assume unsafe
199 if(m_CanaryThreadId == 0)
200 {
201 _ASSERTE(!"We shouldn't be lazy initing the canary anymore");
202 return false;
203 }
204
205 // Canary will take the locks of interest and then set the Answer counter equal to our request counter.
206 m_RequestCounter = m_RequestCounter + 1;
207 ResetEvent(m_hWaitEvent);
208 SetEvent(m_hPingEvent);
209
210 // Spin waiting for answer. If canary gets back to us, then the locks must be free and so it's safe for helper-thread.
211 // If we timeout, then we err on the side of safety and assume canary blocked on a lock and so it's not safe
212 // for the helper thread to take those locks.
213 // We explicitly have a simple spin-wait instead of using win32 events because we want something simple and
214 // provably correct. Since we already need the spin-wait for the counters, adding an extra win32 event
215 // to get rid of the sleep would be additional complexity and race windows without a clear benefit.
216
217 // We need to track what iteration of "AreLocksAvailable" the helper is on. Say canary sniffs two locks, now Imagine if:
218 // 1) Helper calls AreLocksAvailable,
219 // 2) the canary does get blocked on lock #1,
220 // 3) process resumes, canary now gets + releases lock #1,
221 // 4) another random thread takes lock #1
222 // 5) then helper calls AreLocksAvailable again later
223 // 6) then the canary finally finishes. Note it's never tested lock #1 on the 2nd iteration.
224 // We don't want the canary's response initiated from the 1st request to impact the Helper's 2nd request.
225 // Thus we keep a request / answer counter to make sure that the canary tests all locks on the same iteration.
226 DWORD retry = 0;
227
228 const DWORD msSleepSteadyState = 150; // sleep time in ms
229 const DWORD maxRetry = 15; // number of times to try.
230 DWORD msSleep = 80; // how much to sleep on first iteration.
231
232 while(m_RequestCounter != m_AnswerCounter)
233 {
234 retry ++;
235 if (retry > maxRetry)
236 {
237 STRESS_LOG0(LF_CORDB, LL_ALWAYS, "Canary timed out!\n");
238 return false;
239 }
240
241 // We'll either timeout (in which case it's like a Sleep(), or
242 // get the event, which shortcuts the sleep.
243 WaitForSingleObject(m_hWaitEvent, msSleep);
244
245 // In case a stale answer sets the wait event high, reset it now to avoid us doing
246 // a live spin-lock.
247 ResetEvent(m_hWaitEvent);
248
249
250 msSleep = msSleepSteadyState;
251 }
252
253 // Canary made it on same Request iteration, so it must be safe!
254 return true;
255}
256
257//-----------------------------------------------------------------------------
258// Real OS thread proc for Canary thread.
259// param - 'this' pointer for HelperCanary
260// return value - meaningless, but threads need to return something.
261//-----------------------------------------------------------------------------
262DWORD HelperCanary::ThreadProc(LPVOID param)
263{
264 _ASSERTE(!ThisIsHelperThreadWorker());
265
266 STRESS_LOG0(LF_CORDB, LL_ALWAYS, "Canary thread spun up\n");
267 HelperCanary * pThis = reinterpret_cast<HelperCanary*> (param);
268 pThis->ThreadProc();
269 _ASSERTE(pThis->m_fStop);
270 STRESS_LOG0(LF_CORDB, LL_ALWAYS, "Canary thread exiting\n");
271
272 return 0;
273}
274
275//-----------------------------------------------------------------------------
276// Real implementation of Canary Thread.
277// Single canary thread is reused after creation.
278//-----------------------------------------------------------------------------
279void HelperCanary::ThreadProc()
280{
281 _ASSERTE(m_CanaryThreadId == GetCurrentThreadId());
282
283 while(true)
284 {
285 WaitForSingleObject(m_hPingEvent, INFINITE);
286
287 m_AnswerCounter = 0;
288 DWORD dwRequest = m_RequestCounter;
289
290 if (m_fStop)
291 {
292 return;
293 }
294 STRESS_LOG2(LF_CORDB, LL_ALWAYS, "stage:%d,req:%d", 0, dwRequest);
295
296 // Now take the locks of interest. This could block indefinitely. If this blocks, we may even get multiple requests.
297 TakeLocks();
298
299 m_AnswerCounter = dwRequest;
300
301 // Set wait event to let Requesting thread shortcut its spin lock. This is purely an
302 // optimization because requesting thread will still check Answer/Request counters.
303 // That protects us from recyling bugs.
304 SetEvent(m_hWaitEvent);
305 }
306}
307
308//-----------------------------------------------------------------------------
309// Try and take locks.
310//-----------------------------------------------------------------------------
311void HelperCanary::TakeLocks()
312{
313 _ASSERTE(::GetThread() == NULL); // Canary Thread should always be outside the runtime.
314 _ASSERTE(m_CanaryThreadId == GetCurrentThreadId());
315
316 // Call new, which will take whatever standard heap locks there are.
317 // We don't care about what memory we get; we just want to take the heap lock(s).
318 DWORD * p = new (nothrow) DWORD();
319 delete p;
320
321 STRESS_LOG1(LF_CORDB, LL_ALWAYS, "canary stage:%d\n", 1);
322}
323
324
325