1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | |
6 | /*++ |
7 | |
8 | Module Name: |
9 | |
10 | Win32ThreadPool.h |
11 | |
12 | Abstract: |
13 | |
14 | This module is the header file for thread pools using Win32 APIs. |
15 | |
16 | Revision History: |
17 | |
18 | |
19 | --*/ |
20 | |
21 | #ifndef _WIN32THREADPOOL_H |
22 | #define _WIN32THREADPOOL_H |
23 | |
24 | #include "delegateinfo.h" |
25 | #include "util.hpp" |
26 | #include "nativeoverlapped.h" |
27 | #include "hillclimbing.h" |
28 | |
29 | #define MAX_WAITHANDLES 64 |
30 | |
31 | #define MAX_CACHED_EVENTS 40 // upper limit on number of wait events cached |
32 | |
33 | #define WAIT_REGISTERED 0x01 |
34 | #define WAIT_ACTIVE 0x02 |
35 | #define WAIT_DELETE 0x04 |
36 | |
37 | #define TIMER_REGISTERED 0x01 |
38 | #define TIMER_ACTIVE 0x02 |
39 | #define TIMER_DELETE 0x04 |
40 | |
41 | #define WAIT_SINGLE_EXECUTION 0x00000001 |
42 | #define WAIT_FREE_CONTEXT 0x00000002 |
43 | #define WAIT_INTERNAL_COMPLETION 0x00000004 |
44 | |
45 | #define QUEUE_ONLY 0x00000000 // do not attempt to call on the thread |
46 | #define CALL_OR_QUEUE 0x00000001 // call on the same thread if not too busy, else queue |
47 | |
48 | const int MaxLimitThreadsPerCPU=250; // upper limit on number of cp threads per CPU |
49 | const int MaxFreeCPThreadsPerCPU=2; // upper limit on number of free cp threads per CPU |
50 | |
51 | const int CpuUtilizationHigh=95; // remove threads when above this |
52 | const int CpuUtilizationLow =80; // inject more threads if below this |
53 | |
54 | #ifndef FEATURE_PAL |
55 | extern HANDLE (WINAPI *g_pufnCreateIoCompletionPort)(HANDLE FileHandle, |
56 | HANDLE ExistingCompletionPort, |
57 | ULONG_PTR CompletionKey, |
58 | DWORD NumberOfConcurrentThreads); |
59 | |
60 | extern int (WINAPI *g_pufnNtQueryInformationThread) (HANDLE ThreadHandle, |
61 | THREADINFOCLASS ThreadInformationClass, |
62 | PVOID ThreadInformation, |
63 | ULONG ThreadInformationLength, |
64 | PULONG ReturnLength); |
65 | |
66 | extern int (WINAPI * g_pufnNtQuerySystemInformation) (SYSTEM_INFORMATION_CLASS SystemInformationClass, |
67 | PVOID SystemInformation, |
68 | ULONG SystemInformationLength, |
69 | PULONG ReturnLength OPTIONAL); |
70 | #endif // !FEATURE_PAL |
71 | |
72 | #define FILETIME_TO_INT64(t) (*(__int64*)&(t)) |
73 | #define MILLI_TO_100NANO(x) (x * 10000) // convert from milliseond to 100 nanosecond unit |
74 | |
75 | /** |
76 | * This type is supposed to be private to ThreadpoolMgr. |
77 | * It's at global scope because Strike needs to be able to access its |
78 | * definition. |
79 | */ |
80 | struct WorkRequest { |
81 | WorkRequest* next; |
82 | LPTHREAD_START_ROUTINE Function; |
83 | PVOID Context; |
84 | |
85 | }; |
86 | |
87 | typedef struct _IOCompletionContext |
88 | { |
89 | DWORD ErrorCode; |
90 | DWORD numBytesTransferred; |
91 | LPOVERLAPPED lpOverlapped; |
92 | size_t key; |
93 | } IOCompletionContext, *PIOCompletionContext; |
94 | |
95 | typedef DPTR(WorkRequest) PTR_WorkRequest; |
96 | class ThreadpoolMgr |
97 | { |
98 | friend class ClrDataAccess; |
99 | friend struct DelegateInfo; |
100 | friend class ThreadPoolNative; |
101 | friend class TimerNative; |
102 | friend class UnManagedPerAppDomainTPCount; |
103 | friend class ManagedPerAppDomainTPCount; |
104 | friend class PerAppDomainTPCountList; |
105 | friend class HillClimbing; |
106 | friend struct _DacGlobals; |
107 | |
108 | public: |
109 | struct ThreadCounter |
110 | { |
111 | static const int MaxPossibleCount = 0x7fff; |
112 | |
113 | // padding to ensure we get our own cache line |
114 | BYTE padding1[MAX_CACHE_LINE_SIZE]; |
115 | |
116 | union Counts |
117 | { |
118 | struct |
119 | { |
120 | // |
121 | // Note: these are signed rather than unsigned to allow us to detect under/overflow. |
122 | // |
123 | int MaxWorking : 16; //Determined by HillClimbing; adjusted elsewhere for timeouts, etc. |
124 | int NumActive : 16; //Active means working or waiting on WorkerSemaphore. These are "warm/hot" threads. |
125 | int NumWorking : 16; //Trying to get work from various queues. Not waiting on either semaphore. |
126 | int NumRetired : 16; //Not trying to get work; waiting on RetiredWorkerSemaphore. These are "cold" threads. |
127 | |
128 | // Note: the only reason we need "retired" threads at all is that it allows some threads to eventually time out |
129 | // even if other threads are getting work. If we ever make WorkerSemaphore a true LIFO semaphore, we will no longer |
130 | // need the concept of "retirement" - instead, the very "coldest" threads will naturally be the first to time out. |
131 | }; |
132 | |
133 | LONGLONG AsLongLong; |
134 | |
135 | bool operator==(Counts other) {LIMITED_METHOD_CONTRACT; return AsLongLong == other.AsLongLong;} |
136 | } counts; |
137 | |
138 | // padding to ensure we get our own cache line |
139 | BYTE padding2[MAX_CACHE_LINE_SIZE]; |
140 | |
141 | Counts GetCleanCounts() |
142 | { |
143 | LIMITED_METHOD_CONTRACT; |
144 | #ifdef _WIN64 |
145 | // VolatileLoad x64 bit read is atomic |
146 | return DangerousGetDirtyCounts(); |
147 | #else // !_WIN64 |
148 | // VolatileLoad may result in torn read |
149 | Counts result; |
150 | #ifndef DACCESS_COMPILE |
151 | result.AsLongLong = FastInterlockCompareExchangeLong(&counts.AsLongLong, 0, 0); |
152 | ValidateCounts(result); |
153 | #else |
154 | result.AsLongLong = 0; //prevents prefast warning for DAC builds |
155 | #endif |
156 | return result; |
157 | #endif // !_WIN64 |
158 | } |
159 | |
160 | // |
161 | // This does a non-atomic read of the counts. The returned value is suitable only |
162 | // for use inside of a read-compare-exchange loop, where the compare-exhcange must succeed |
163 | // before any action is taken. Use GetCleanWorkerCounts for other needs, but keep in mind |
164 | // it's much slower. |
165 | // |
166 | Counts DangerousGetDirtyCounts() |
167 | { |
168 | LIMITED_METHOD_CONTRACT; |
169 | Counts result; |
170 | #ifndef DACCESS_COMPILE |
171 | result.AsLongLong = VolatileLoad(&counts.AsLongLong); |
172 | #else |
173 | result.AsLongLong = 0; //prevents prefast warning for DAC builds |
174 | #endif |
175 | return result; |
176 | } |
177 | |
178 | |
179 | Counts CompareExchangeCounts(Counts newCounts, Counts oldCounts) |
180 | { |
181 | LIMITED_METHOD_CONTRACT; |
182 | Counts result; |
183 | #ifndef DACCESS_COMPILE |
184 | result.AsLongLong = FastInterlockCompareExchangeLong(&counts.AsLongLong, newCounts.AsLongLong, oldCounts.AsLongLong); |
185 | if (result == oldCounts) |
186 | { |
187 | // can only do validation on success; if we failed, it may have been due to a previous |
188 | // dirty read, which may contain invalid values. |
189 | ValidateCounts(result); |
190 | ValidateCounts(newCounts); |
191 | } |
192 | #else |
193 | result.AsLongLong = 0; //prevents prefast warning for DAC builds |
194 | #endif |
195 | return result; |
196 | } |
197 | |
198 | private: |
199 | static void ValidateCounts(Counts counts) |
200 | { |
201 | LIMITED_METHOD_CONTRACT; |
202 | _ASSERTE(counts.MaxWorking > 0); |
203 | _ASSERTE(counts.NumActive >= 0); |
204 | _ASSERTE(counts.NumWorking >= 0); |
205 | _ASSERTE(counts.NumRetired >= 0); |
206 | _ASSERTE(counts.NumWorking <= counts.NumActive); |
207 | } |
208 | }; |
209 | |
210 | public: |
211 | |
212 | static void ReportThreadStatus(bool isWorking); |
213 | |
214 | // enumeration of different kinds of memory blocks that are recycled |
215 | enum MemType |
216 | { |
217 | MEMTYPE_AsyncCallback = 0, |
218 | MEMTYPE_DelegateInfo = 1, |
219 | MEMTYPE_WorkRequest = 2, |
220 | MEMTYPE_COUNT = 3, |
221 | }; |
222 | |
223 | typedef struct { |
224 | ADID AppDomainId; |
225 | INT32 TimerId; |
226 | } TimerInfoContext; |
227 | |
228 | static BOOL Initialize(); |
229 | |
230 | static BOOL SetMaxThreadsHelper(DWORD MaxWorkerThreads, |
231 | DWORD MaxIOCompletionThreads); |
232 | |
233 | static BOOL SetMaxThreads(DWORD MaxWorkerThreads, |
234 | DWORD MaxIOCompletionThreads); |
235 | |
236 | static BOOL GetMaxThreads(DWORD* MaxWorkerThreads, |
237 | DWORD* MaxIOCompletionThreads); |
238 | |
239 | static BOOL SetMinThreads(DWORD MinWorkerThreads, |
240 | DWORD MinIOCompletionThreads); |
241 | |
242 | static BOOL GetMinThreads(DWORD* MinWorkerThreads, |
243 | DWORD* MinIOCompletionThreads); |
244 | |
245 | static BOOL GetAvailableThreads(DWORD* AvailableWorkerThreads, |
246 | DWORD* AvailableIOCompletionThreads); |
247 | |
248 | static BOOL QueueUserWorkItem(LPTHREAD_START_ROUTINE Function, |
249 | PVOID Context, |
250 | ULONG Flags, |
251 | BOOL UnmanagedTPRequest=TRUE); |
252 | |
253 | static BOOL PostQueuedCompletionStatus(LPOVERLAPPED lpOverlapped, |
254 | LPOVERLAPPED_COMPLETION_ROUTINE Function); |
255 | |
256 | inline static BOOL IsCompletionPortInitialized() |
257 | { |
258 | LIMITED_METHOD_CONTRACT; |
259 | return GlobalCompletionPort != NULL; |
260 | } |
261 | |
262 | static BOOL DrainCompletionPortQueue(); |
263 | |
264 | static BOOL RegisterWaitForSingleObject(PHANDLE phNewWaitObject, |
265 | HANDLE hWaitObject, |
266 | WAITORTIMERCALLBACK Callback, |
267 | PVOID Context, |
268 | ULONG timeout, |
269 | DWORD dwFlag); |
270 | |
271 | static BOOL UnregisterWaitEx(HANDLE hWaitObject,HANDLE CompletionEvent); |
272 | static void WaitHandleCleanup(HANDLE hWaitObject); |
273 | |
274 | static BOOL WINAPI BindIoCompletionCallback(HANDLE FileHandle, |
275 | LPOVERLAPPED_COMPLETION_ROUTINE Function, |
276 | ULONG Flags, |
277 | DWORD& errorCode); |
278 | |
279 | static void WINAPI WaitIOCompletionCallback(DWORD dwErrorCode, |
280 | DWORD numBytesTransferred, |
281 | LPOVERLAPPED lpOverlapped); |
282 | |
283 | static VOID WINAPI CallbackForInitiateDrainageOfCompletionPortQueue( |
284 | DWORD dwErrorCode, |
285 | DWORD dwNumberOfBytesTransfered, |
286 | LPOVERLAPPED lpOverlapped |
287 | ); |
288 | |
289 | static VOID WINAPI CallbackForContinueDrainageOfCompletionPortQueue( |
290 | DWORD dwErrorCode, |
291 | DWORD dwNumberOfBytesTransfered, |
292 | LPOVERLAPPED lpOverlapped |
293 | ); |
294 | |
295 | static BOOL SetAppDomainRequestsActive(BOOL UnmanagedTP = FALSE); |
296 | static void ClearAppDomainRequestsActive(BOOL UnmanagedTP = FALSE, BOOL AdUnloading = FALSE, LONG index = -1); |
297 | |
298 | static inline void UpdateLastDequeueTime() |
299 | { |
300 | LIMITED_METHOD_CONTRACT; |
301 | VolatileStore(&LastDequeueTime, (unsigned int)GetTickCount()); |
302 | } |
303 | |
304 | static BOOL CreateTimerQueueTimer(PHANDLE phNewTimer, |
305 | WAITORTIMERCALLBACK Callback, |
306 | PVOID Parameter, |
307 | DWORD DueTime, |
308 | DWORD Period, |
309 | ULONG Flags); |
310 | |
311 | static BOOL ChangeTimerQueueTimer(HANDLE Timer, |
312 | ULONG DueTime, |
313 | ULONG Period); |
314 | static BOOL DeleteTimerQueueTimer(HANDLE Timer, |
315 | HANDLE CompletionEvent); |
316 | |
317 | static void RecycleMemory(LPVOID mem, enum MemType memType); |
318 | |
319 | static void FlushQueueOfTimerInfos(); |
320 | |
321 | static BOOL HaveTimerInfosToFlush() { return TimerInfosToBeRecycled != NULL; } |
322 | |
323 | #ifndef FEATURE_PAL |
324 | static LPOVERLAPPED CompletionPortDispatchWorkWithinAppDomain(Thread* pThread, DWORD* pErrorCode, DWORD* pNumBytes, size_t* pKey, DWORD adid); |
325 | static void StoreOverlappedInfoInThread(Thread* pThread, DWORD dwErrorCode, DWORD dwNumBytes, size_t key, LPOVERLAPPED lpOverlapped); |
326 | #endif // !FEATURE_PAL |
327 | |
328 | // Enable filtering of correlation ETW events for cases handled at a higher abstraction level |
329 | |
330 | #ifndef DACCESS_COMPILE |
331 | static FORCEINLINE BOOL AreEtwQueueEventsSpeciallyHandled(LPTHREAD_START_ROUTINE Function) |
332 | { |
333 | // Timer events are handled at a higher abstraction level: in the managed Timer class |
334 | return (Function == ThreadpoolMgr::AsyncTimerCallbackCompletion); |
335 | } |
336 | |
337 | static FORCEINLINE BOOL AreEtwIOQueueEventsSpeciallyHandled(LPOVERLAPPED_COMPLETION_ROUTINE Function) |
338 | { |
339 | // We ignore drainage events b/c they are uninteresting |
340 | // We handle registered waits at a higher abstraction level |
341 | return (Function == ThreadpoolMgr::CallbackForInitiateDrainageOfCompletionPortQueue |
342 | || Function == ThreadpoolMgr::CallbackForContinueDrainageOfCompletionPortQueue |
343 | || Function == ThreadpoolMgr::WaitIOCompletionCallback); |
344 | } |
345 | #endif |
346 | |
347 | private: |
348 | |
349 | #ifndef DACCESS_COMPILE |
350 | |
351 | inline static void FreeWorkRequest(WorkRequest* workRequest) |
352 | { |
353 | RecycleMemory( workRequest, MEMTYPE_WorkRequest ); //delete workRequest; |
354 | } |
355 | |
356 | inline static WorkRequest* MakeWorkRequest(LPTHREAD_START_ROUTINE function, PVOID context) |
357 | { |
358 | CONTRACTL |
359 | { |
360 | THROWS; |
361 | GC_NOTRIGGER; |
362 | MODE_ANY; |
363 | } |
364 | CONTRACTL_END;; |
365 | |
366 | WorkRequest* wr = (WorkRequest*) GetRecycledMemory(MEMTYPE_WorkRequest); |
367 | _ASSERTE(wr); |
368 | if (NULL == wr) |
369 | return NULL; |
370 | wr->Function = function; |
371 | wr->Context = context; |
372 | wr->next = NULL; |
373 | return wr; |
374 | } |
375 | |
376 | #endif // #ifndef DACCESS_COMPILE |
377 | |
378 | typedef struct { |
379 | DWORD numBytes; |
380 | ULONG_PTR *key; |
381 | LPOVERLAPPED pOverlapped; |
382 | DWORD errorCode; |
383 | } QueuedStatus; |
384 | |
385 | typedef DPTR(struct _LIST_ENTRY) PTR_LIST_ENTRY; |
386 | typedef struct _LIST_ENTRY { |
387 | struct _LIST_ENTRY *Flink; |
388 | struct _LIST_ENTRY *Blink; |
389 | } LIST_ENTRY, *PLIST_ENTRY; |
390 | |
391 | struct WaitInfo; |
392 | |
393 | typedef struct { |
394 | HANDLE threadHandle; |
395 | DWORD threadId; |
396 | CLREvent startEvent; |
397 | LONG NumWaitHandles; // number of wait objects registered to the thread <=64 |
398 | LONG NumActiveWaits; // number of objects, thread is actually waiting on (this may be less than |
399 | // NumWaitHandles since the thread may not have activated some waits |
400 | HANDLE waitHandle[MAX_WAITHANDLES]; // array of wait handles (copied from waitInfo since |
401 | // we need them to be contiguous) |
402 | LIST_ENTRY waitPointer[MAX_WAITHANDLES]; // array of doubly linked list of corresponding waitinfo |
403 | } ThreadCB; |
404 | |
405 | |
406 | typedef struct { |
407 | ULONG startTime; // time at which wait was started |
408 | // endTime = startTime+timeout |
409 | ULONG remainingTime; // endTime - currentTime |
410 | } WaitTimerInfo; |
411 | |
412 | struct WaitInfo { |
413 | LIST_ENTRY link; // Win9x does not allow duplicate waithandles, so we need to |
414 | // group all waits on a single waithandle using this linked list |
415 | HANDLE waitHandle; |
416 | WAITORTIMERCALLBACK Callback; |
417 | PVOID Context; |
418 | ULONG timeout; |
419 | WaitTimerInfo timer; |
420 | DWORD flag; |
421 | DWORD state; |
422 | ThreadCB* threadCB; |
423 | LONG refCount; // when this reaches 0, the waitInfo can be safely deleted |
424 | CLREvent PartialCompletionEvent; // used to synchronize deactivation of a wait |
425 | CLREvent InternalCompletionEvent; // only one of InternalCompletion or ExternalCompletion is used |
426 | // but I cant make a union since CLREvent has a non-default constructor |
427 | HANDLE ExternalCompletionEvent; // they are signalled when all callbacks have completed (refCount=0) |
428 | ADID handleOwningAD; |
429 | OBJECTHANDLE ExternalEventSafeHandle; |
430 | |
431 | } ; |
432 | |
433 | // structure used to maintain global information about wait threads. Protected by WaitThreadsCriticalSection |
434 | typedef struct WaitThreadTag { |
435 | LIST_ENTRY link; |
436 | ThreadCB* threadCB; |
437 | } WaitThreadInfo; |
438 | |
439 | |
440 | struct AsyncCallback{ |
441 | WaitInfo* wait; |
442 | BOOL waitTimedOut; |
443 | } ; |
444 | |
445 | #ifndef DACCESS_COMPILE |
446 | |
447 | static VOID |
448 | AcquireAsyncCallback(AsyncCallback *pAsyncCB) |
449 | { |
450 | LIMITED_METHOD_CONTRACT; |
451 | } |
452 | |
453 | static VOID |
454 | ReleaseAsyncCallback(AsyncCallback *pAsyncCB) |
455 | { |
456 | CONTRACTL |
457 | { |
458 | THROWS; |
459 | GC_TRIGGERS; |
460 | MODE_ANY; |
461 | } |
462 | CONTRACTL_END; |
463 | |
464 | WaitInfo *waitInfo = pAsyncCB->wait; |
465 | ThreadpoolMgr::RecycleMemory((LPVOID*)pAsyncCB, ThreadpoolMgr::MEMTYPE_AsyncCallback); |
466 | |
467 | // if this was a single execution, we now need to stop rooting registeredWaitHandle |
468 | // in a GC handle. This will cause the finalizer to pick it up and call the cleanup |
469 | // routine. |
470 | if ( (waitInfo->flag & WAIT_SINGLE_EXECUTION) && (waitInfo->flag & WAIT_FREE_CONTEXT)) |
471 | { |
472 | |
473 | DelegateInfo* pDelegate = (DelegateInfo*) waitInfo->Context; |
474 | |
475 | _ASSERTE(pDelegate->m_registeredWaitHandle); |
476 | |
477 | { |
478 | GCX_COOP(); |
479 | StoreObjectInHandle(pDelegate->m_registeredWaitHandle, NULL); |
480 | } |
481 | } |
482 | |
483 | if (InterlockedDecrement(&waitInfo->refCount) == 0) |
484 | ThreadpoolMgr::DeleteWait(waitInfo); |
485 | |
486 | } |
487 | |
488 | typedef Holder<AsyncCallback *, ThreadpoolMgr::AcquireAsyncCallback, ThreadpoolMgr::ReleaseAsyncCallback> AsyncCallbackHolder; |
489 | inline static AsyncCallback* MakeAsyncCallback() |
490 | { |
491 | WRAPPER_NO_CONTRACT; |
492 | return (AsyncCallback*) GetRecycledMemory(MEMTYPE_AsyncCallback); |
493 | } |
494 | |
495 | static VOID ReleaseInfo(OBJECTHANDLE& hndSafeHandle, |
496 | ADID& owningAD, |
497 | HANDLE hndNativeHandle) |
498 | { |
499 | CONTRACTL |
500 | { |
501 | NOTHROW; |
502 | MODE_ANY; |
503 | GC_TRIGGERS; |
504 | } |
505 | CONTRACTL_END |
506 | |
507 | // Use of EX_TRY, GCPROTECT etc in the same function is causing prefast to complain about local variables with |
508 | // same name masking each other (#246). The error could not be suppressed with "#pragma PREFAST_SUPPRESS" |
509 | #ifndef _PREFAST_ |
510 | |
511 | if (hndSafeHandle != NULL) |
512 | { |
513 | |
514 | SAFEHANDLEREF refSH = NULL; |
515 | |
516 | GCX_COOP(); |
517 | GCPROTECT_BEGIN(refSH); |
518 | |
519 | { |
520 | EX_TRY |
521 | { |
522 | ENTER_DOMAIN_ID(owningAD); |
523 | { |
524 | // Read the GC handle |
525 | refSH = (SAFEHANDLEREF) ObjectToOBJECTREF(ObjectFromHandle(hndSafeHandle)); |
526 | |
527 | // Destroy the GC handle |
528 | DestroyHandle(hndSafeHandle); |
529 | |
530 | if (refSH != NULL) |
531 | { |
532 | SafeHandleHolder h(&refSH); |
533 | |
534 | HANDLE hEvent = refSH->GetHandle(); |
535 | if (hEvent != INVALID_HANDLE_VALUE) |
536 | { |
537 | UnsafeSetEvent(hEvent); |
538 | } |
539 | } |
540 | } |
541 | END_DOMAIN_TRANSITION; |
542 | } |
543 | EX_CATCH |
544 | { |
545 | } |
546 | EX_END_CATCH(SwallowAllExceptions); |
547 | } |
548 | |
549 | GCPROTECT_END(); |
550 | |
551 | hndSafeHandle = NULL; |
552 | owningAD = (ADID) 0; |
553 | } |
554 | #endif |
555 | } |
556 | |
557 | #endif // #ifndef DACCESS_COMPILE |
558 | |
559 | typedef struct { |
560 | LIST_ENTRY link; |
561 | HANDLE Handle; |
562 | } WaitEvent ; |
563 | |
564 | // Timer |
565 | typedef struct { |
566 | LIST_ENTRY link; // doubly linked list of timers |
567 | ULONG FiringTime; // TickCount of when to fire next |
568 | WAITORTIMERCALLBACK Function; // Function to call when timer fires |
569 | PVOID Context; // Context to pass to function when timer fires |
570 | ULONG Period; |
571 | DWORD flag; // How do we deal with the context |
572 | DWORD state; |
573 | LONG refCount; |
574 | HANDLE ExternalCompletionEvent; // only one of this is used, but cant do a union since CLREvent has a non-default constructor |
575 | CLREvent InternalCompletionEvent; // flags indicates which one is being used |
576 | OBJECTHANDLE ExternalEventSafeHandle; |
577 | ADID handleOwningAD; |
578 | } TimerInfo; |
579 | |
580 | static VOID AcquireWaitInfo(WaitInfo *pInfo) |
581 | { |
582 | } |
583 | static VOID ReleaseWaitInfo(WaitInfo *pInfo) |
584 | { |
585 | WRAPPER_NO_CONTRACT; |
586 | #ifndef DACCESS_COMPILE |
587 | ReleaseInfo(pInfo->ExternalEventSafeHandle, |
588 | pInfo->handleOwningAD, |
589 | pInfo->ExternalCompletionEvent); |
590 | #endif |
591 | } |
592 | static VOID AcquireTimerInfo(TimerInfo *pInfo) |
593 | { |
594 | } |
595 | static VOID ReleaseTimerInfo(TimerInfo *pInfo) |
596 | { |
597 | WRAPPER_NO_CONTRACT; |
598 | #ifndef DACCESS_COMPILE |
599 | ReleaseInfo(pInfo->ExternalEventSafeHandle, |
600 | pInfo->handleOwningAD, |
601 | pInfo->ExternalCompletionEvent); |
602 | #endif |
603 | } |
604 | |
605 | typedef Holder<WaitInfo *, ThreadpoolMgr::AcquireWaitInfo, ThreadpoolMgr::ReleaseWaitInfo> WaitInfoHolder; |
606 | typedef Holder<TimerInfo *, ThreadpoolMgr::AcquireTimerInfo, ThreadpoolMgr::ReleaseTimerInfo> TimerInfoHolder; |
607 | |
608 | typedef struct { |
609 | TimerInfo* Timer; // timer to be updated |
610 | ULONG DueTime ; // new due time |
611 | ULONG Period ; // new period |
612 | } TimerUpdateInfo; |
613 | |
614 | // Definitions and data structures to support recycling of high-frequency |
615 | // memory blocks. We use a spin-lock to access the list |
616 | |
617 | class RecycledListInfo |
618 | { |
619 | static const unsigned int MaxCachedEntries = 40; |
620 | |
621 | struct Entry |
622 | { |
623 | Entry* next; |
624 | }; |
625 | |
626 | Volatile<LONG> lock; // this is the spin lock |
627 | DWORD count; // count of number of elements in the list |
628 | Entry* root; // ptr to first element of recycled list |
629 | #ifndef _WIN64 |
630 | DWORD filler; // Pad the structure to a multiple of the 16. |
631 | #endif |
632 | |
633 | //--// |
634 | |
635 | public: |
636 | RecycledListInfo() |
637 | { |
638 | LIMITED_METHOD_CONTRACT; |
639 | |
640 | lock = 0; |
641 | root = NULL; |
642 | count = 0; |
643 | } |
644 | |
645 | FORCEINLINE bool CanInsert() |
646 | { |
647 | LIMITED_METHOD_CONTRACT; |
648 | |
649 | return count < MaxCachedEntries; |
650 | } |
651 | |
652 | FORCEINLINE LPVOID Remove() |
653 | { |
654 | LIMITED_METHOD_CONTRACT; |
655 | |
656 | if(root == NULL) return NULL; // No need for acquiring the lock, there's nothing to remove. |
657 | |
658 | AcquireLock(); |
659 | |
660 | Entry* ret = (Entry*)root; |
661 | |
662 | if(ret) |
663 | { |
664 | root = ret->next; |
665 | count -= 1; |
666 | } |
667 | |
668 | ReleaseLock(); |
669 | |
670 | return ret; |
671 | } |
672 | |
673 | FORCEINLINE void Insert( LPVOID mem ) |
674 | { |
675 | LIMITED_METHOD_CONTRACT; |
676 | |
677 | AcquireLock(); |
678 | |
679 | Entry* entry = (Entry*)mem; |
680 | |
681 | entry->next = root; |
682 | |
683 | root = entry; |
684 | count += 1; |
685 | |
686 | ReleaseLock(); |
687 | } |
688 | |
689 | private: |
690 | FORCEINLINE void AcquireLock() |
691 | { |
692 | LIMITED_METHOD_CONTRACT; |
693 | |
694 | unsigned int rounds = 0; |
695 | |
696 | DWORD dwSwitchCount = 0; |
697 | |
698 | while(lock != 0 || FastInterlockExchange( &lock, 1 ) != 0) |
699 | { |
700 | YieldProcessor(); // indicate to the processor that we are spinning |
701 | |
702 | rounds++; |
703 | |
704 | if((rounds % 32) == 0) |
705 | { |
706 | __SwitchToThread( 0, ++dwSwitchCount ); |
707 | } |
708 | } |
709 | } |
710 | |
711 | FORCEINLINE void ReleaseLock() |
712 | { |
713 | LIMITED_METHOD_CONTRACT; |
714 | |
715 | lock = 0; |
716 | } |
717 | }; |
718 | |
719 | // |
720 | // It's critical that we ensure these pointers are allocated by the linker away from |
721 | // variables that are modified a lot at runtime. |
722 | // |
723 | // The use of the CacheGuard is a temporary solution, |
724 | // the thread pool has to be refactor away from static variable and |
725 | // toward a single global structure, where we can control the locality of variables. |
726 | // |
727 | class RecycledListsWrapper |
728 | { |
729 | DWORD CacheGuardPre[MAX_CACHE_LINE_SIZE/sizeof(DWORD)]; |
730 | |
731 | RecycledListInfo (*pRecycledListPerProcessor)[MEMTYPE_COUNT]; // RecycledListInfo [numProc][MEMTYPE_COUNT] |
732 | |
733 | DWORD CacheGuardPost[MAX_CACHE_LINE_SIZE/sizeof(DWORD)]; |
734 | |
735 | public: |
736 | void Initialize( unsigned int numProcs ); |
737 | |
738 | FORCEINLINE bool IsInitialized() |
739 | { |
740 | LIMITED_METHOD_CONTRACT; |
741 | |
742 | return pRecycledListPerProcessor != NULL; |
743 | } |
744 | |
745 | FORCEINLINE RecycledListInfo& GetRecycleMemoryInfo( enum MemType memType ) |
746 | { |
747 | LIMITED_METHOD_CONTRACT; |
748 | |
749 | if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups()) |
750 | return pRecycledListPerProcessor[CPUGroupInfo::CalculateCurrentProcessorNumber()][memType]; |
751 | else |
752 | // Turns out GetCurrentProcessorNumber can return a value greater than the number of processors reported by |
753 | // GetSystemInfo, if we're running in WOW64 on a machine with >32 processors. |
754 | return pRecycledListPerProcessor[GetCurrentProcessorNumber()%NumberOfProcessors][memType]; |
755 | } |
756 | }; |
757 | |
758 | #define GATE_THREAD_STATUS_NOT_RUNNING 0 // There is no gate thread |
759 | #define GATE_THREAD_STATUS_REQUESTED 1 // There is a gate thread, and someone has asked it to stick around recently |
760 | #define GATE_THREAD_STATUS_WAITING_FOR_REQUEST 2 // There is a gate thread, but nobody has asked it to stay. It may die soon |
761 | |
762 | // Private methods |
763 | |
764 | static DWORD WINAPI intermediateThreadProc(PVOID arg); |
765 | |
766 | typedef struct { |
767 | LPTHREAD_START_ROUTINE lpThreadFunction; |
768 | PVOID lpArg; |
769 | } intermediateThreadParam; |
770 | |
771 | static Thread* CreateUnimpersonatedThread(LPTHREAD_START_ROUTINE lpStartAddress, LPVOID lpArgs, BOOL *pIsCLRThread); |
772 | |
773 | static BOOL CreateWorkerThread(); |
774 | |
775 | static void EnqueueWorkRequest(WorkRequest* wr); |
776 | |
777 | static WorkRequest* DequeueWorkRequest(); |
778 | |
779 | static void ExecuteWorkRequest(bool* foundWork, bool* wasNotRecalled); |
780 | |
781 | static DWORD WINAPI ExecuteHostRequest(PVOID pArg); |
782 | |
783 | #ifndef DACCESS_COMPILE |
784 | |
785 | inline static void AppendWorkRequest(WorkRequest* entry) |
786 | { |
787 | CONTRACTL |
788 | { |
789 | NOTHROW; |
790 | MODE_ANY; |
791 | GC_NOTRIGGER; |
792 | } |
793 | CONTRACTL_END; |
794 | |
795 | if (WorkRequestTail) |
796 | { |
797 | _ASSERTE(WorkRequestHead != NULL); |
798 | WorkRequestTail->next = entry; |
799 | } |
800 | else |
801 | { |
802 | _ASSERTE(WorkRequestHead == NULL); |
803 | WorkRequestHead = entry; |
804 | } |
805 | |
806 | WorkRequestTail = entry; |
807 | _ASSERTE(WorkRequestTail->next == NULL); |
808 | } |
809 | |
810 | inline static WorkRequest* RemoveWorkRequest() |
811 | { |
812 | CONTRACTL |
813 | { |
814 | NOTHROW; |
815 | MODE_ANY; |
816 | GC_NOTRIGGER; |
817 | } |
818 | CONTRACTL_END; |
819 | |
820 | WorkRequest* entry = NULL; |
821 | if (WorkRequestHead) |
822 | { |
823 | entry = WorkRequestHead; |
824 | WorkRequestHead = entry->next; |
825 | if (WorkRequestHead == NULL) |
826 | WorkRequestTail = NULL; |
827 | } |
828 | return entry; |
829 | } |
830 | |
831 | static void EnsureInitialized(); |
832 | static void InitPlatformVariables(); |
833 | |
834 | inline static BOOL IsInitialized() |
835 | { |
836 | LIMITED_METHOD_CONTRACT; |
837 | return Initialization == -1; |
838 | } |
839 | |
840 | static void MaybeAddWorkingWorker(); |
841 | |
842 | static void NotifyWorkItemCompleted() |
843 | { |
844 | WRAPPER_NO_CONTRACT; |
845 | Thread::IncrementThreadPoolCompletionCount(); |
846 | UpdateLastDequeueTime(); |
847 | } |
848 | |
849 | static bool ShouldAdjustMaxWorkersActive() |
850 | { |
851 | WRAPPER_NO_CONTRACT; |
852 | |
853 | DWORD priorTime = PriorCompletedWorkRequestsTime; |
854 | MemoryBarrier(); // read fresh value for NextCompletedWorkRequestsTime below |
855 | DWORD requiredInterval = NextCompletedWorkRequestsTime - priorTime; |
856 | DWORD elapsedInterval = GetTickCount() - priorTime; |
857 | if (elapsedInterval >= requiredInterval) |
858 | { |
859 | ThreadCounter::Counts counts = WorkerCounter.GetCleanCounts(); |
860 | if (counts.NumActive <= counts.MaxWorking) |
861 | return !IsHillClimbingDisabled; |
862 | } |
863 | |
864 | return false; |
865 | } |
866 | |
867 | static void AdjustMaxWorkersActive(); |
868 | static bool ShouldWorkerKeepRunning(); |
869 | |
870 | static BOOL SuspendProcessing(); |
871 | |
872 | static DWORD SafeWait(CLREvent * ev, DWORD sleepTime, BOOL alertable); |
873 | |
874 | static DWORD WINAPI WorkerThreadStart(LPVOID lpArgs); |
875 | |
876 | static BOOL AddWaitRequest(HANDLE waitHandle, WaitInfo* waitInfo); |
877 | |
878 | |
879 | static ThreadCB* FindWaitThread(); // returns a wait thread that can accomodate another wait request |
880 | |
881 | static BOOL CreateWaitThread(); |
882 | |
883 | static void WINAPI InsertNewWaitForSelf(WaitInfo* pArg); |
884 | |
885 | static int FindWaitIndex(const ThreadCB* threadCB, const HANDLE waitHandle); |
886 | |
887 | static DWORD MinimumRemainingWait(LIST_ENTRY* waitInfo, unsigned int numWaits); |
888 | |
889 | static void ProcessWaitCompletion( WaitInfo* waitInfo, |
890 | unsigned index, // array index |
891 | BOOL waitTimedOut); |
892 | |
893 | static DWORD WINAPI WaitThreadStart(LPVOID lpArgs); |
894 | |
895 | static DWORD WINAPI AsyncCallbackCompletion(PVOID pArgs); |
896 | |
897 | static void QueueTimerInfoForRelease(TimerInfo *pTimerInfo); |
898 | |
899 | static void DeactivateWait(WaitInfo* waitInfo); |
900 | static void DeactivateNthWait(WaitInfo* waitInfo, DWORD index); |
901 | |
902 | static void DeleteWait(WaitInfo* waitInfo); |
903 | |
904 | |
905 | inline static void ShiftWaitArray( ThreadCB* threadCB, |
906 | ULONG SrcIndex, |
907 | ULONG DestIndex, |
908 | ULONG count) |
909 | { |
910 | LIMITED_METHOD_CONTRACT; |
911 | memmove(&threadCB->waitHandle[DestIndex], |
912 | &threadCB->waitHandle[SrcIndex], |
913 | count * sizeof(HANDLE)); |
914 | memmove(&threadCB->waitPointer[DestIndex], |
915 | &threadCB->waitPointer[SrcIndex], |
916 | count * sizeof(LIST_ENTRY)); |
917 | } |
918 | |
919 | static void WINAPI DeregisterWait(WaitInfo* pArgs); |
920 | |
921 | #ifndef FEATURE_PAL |
922 | // holds the aggregate of system cpu usage of all processors |
923 | typedef struct _PROCESS_CPU_INFORMATION |
924 | { |
925 | LARGE_INTEGER idleTime; |
926 | LARGE_INTEGER kernelTime; |
927 | LARGE_INTEGER userTime; |
928 | DWORD_PTR affinityMask; |
929 | int numberOfProcessors; |
930 | SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION* usageBuffer; |
931 | int usageBufferSize; |
932 | } PROCESS_CPU_INFORMATION; |
933 | |
934 | static int GetCPUBusyTime_NT(PROCESS_CPU_INFORMATION* pOldInfo); |
935 | static BOOL CreateCompletionPortThread(LPVOID lpArgs); |
936 | static DWORD WINAPI CompletionPortThreadStart(LPVOID lpArgs); |
937 | public: |
938 | inline static bool HaveNativeWork() |
939 | { |
940 | LIMITED_METHOD_CONTRACT; |
941 | return WorkRequestHead != NULL; |
942 | } |
943 | |
944 | static void GrowCompletionPortThreadpoolIfNeeded(); |
945 | static BOOL ShouldGrowCompletionPortThreadpool(ThreadCounter::Counts counts); |
946 | #else |
947 | static int GetCPUBusyTime_NT(PAL_IOCP_CPU_INFORMATION* pOldInfo); |
948 | |
949 | #endif // !FEATURE_PAL |
950 | |
951 | private: |
952 | static BOOL IsIoPending(); |
953 | |
954 | static BOOL CreateGateThread(); |
955 | static void EnsureGateThreadRunning(); |
956 | static bool ShouldGateThreadKeepRunning(); |
957 | static DWORD WINAPI GateThreadStart(LPVOID lpArgs); |
958 | static BOOL SufficientDelaySinceLastSample(unsigned int LastThreadCreationTime, |
959 | unsigned NumThreads, // total number of threads of that type (worker or CP) |
960 | double throttleRate=0.0 // the delay is increased by this percentage for each extra thread |
961 | ); |
962 | static BOOL SufficientDelaySinceLastDequeue(); |
963 | |
964 | static LPVOID GetRecycledMemory(enum MemType memType); |
965 | |
966 | static DWORD WINAPI TimerThreadStart(LPVOID args); |
967 | static void TimerThreadFire(); // helper method used by TimerThreadStart |
968 | static void WINAPI InsertNewTimer(TimerInfo* pArg); |
969 | static DWORD FireTimers(); |
970 | static DWORD WINAPI AsyncTimerCallbackCompletion(PVOID pArgs); |
971 | static void DeactivateTimer(TimerInfo* timerInfo); |
972 | static DWORD WINAPI AsyncDeleteTimer(PVOID pArgs); |
973 | static void DeleteTimer(TimerInfo* timerInfo); |
974 | static void WINAPI UpdateTimer(TimerUpdateInfo* pArgs); |
975 | |
976 | static void WINAPI DeregisterTimer(TimerInfo* pArgs); |
977 | |
978 | inline static DWORD QueueDeregisterWait(HANDLE waitThread, WaitInfo* waitInfo) |
979 | { |
980 | CONTRACTL |
981 | { |
982 | NOTHROW; |
983 | MODE_ANY; |
984 | GC_NOTRIGGER; |
985 | } |
986 | CONTRACTL_END; |
987 | |
988 | DWORD result = QueueUserAPC(reinterpret_cast<PAPCFUNC>(DeregisterWait), waitThread, reinterpret_cast<ULONG_PTR>(waitInfo)); |
989 | SetWaitThreadAPCPending(); |
990 | return result; |
991 | } |
992 | |
993 | |
994 | inline static void SetWaitThreadAPCPending() {IsApcPendingOnWaitThread = TRUE;} |
995 | inline static void ResetWaitThreadAPCPending() {IsApcPendingOnWaitThread = FALSE;} |
996 | inline static BOOL IsWaitThreadAPCPending() {return IsApcPendingOnWaitThread;} |
997 | |
998 | #ifdef _DEBUG |
999 | inline static DWORD GetTickCount() |
1000 | { |
1001 | LIMITED_METHOD_CONTRACT; |
1002 | return ::GetTickCount() + TickCountAdjustment; |
1003 | } |
1004 | #endif |
1005 | |
1006 | #endif // #ifndef DACCESS_COMPILE |
1007 | // Private variables |
1008 | |
1009 | static LONG Initialization; // indicator of whether the threadpool is initialized. |
1010 | |
1011 | SVAL_DECL(LONG,MinLimitTotalWorkerThreads); // same as MinLimitTotalCPThreads |
1012 | SVAL_DECL(LONG,MaxLimitTotalWorkerThreads); // same as MaxLimitTotalCPThreads |
1013 | |
1014 | DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static unsigned int LastDequeueTime; // used to determine if work items are getting thread starved |
1015 | |
1016 | static HillClimbing HillClimbingInstance; |
1017 | |
1018 | DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static LONG PriorCompletedWorkRequests; |
1019 | static DWORD PriorCompletedWorkRequestsTime; |
1020 | static DWORD NextCompletedWorkRequestsTime; |
1021 | |
1022 | static LARGE_INTEGER CurrentSampleStartTime; |
1023 | |
1024 | static unsigned int WorkerThreadSpinLimit; |
1025 | static bool IsHillClimbingDisabled; |
1026 | static int ThreadAdjustmentInterval; |
1027 | |
1028 | SPTR_DECL(WorkRequest,WorkRequestHead); // Head of work request queue |
1029 | SPTR_DECL(WorkRequest,WorkRequestTail); // Head of work request queue |
1030 | |
1031 | static unsigned int LastCPThreadCreation; // last time a completion port thread was created |
1032 | static unsigned int NumberOfProcessors; // = NumberOfWorkerThreads - no. of blocked threads |
1033 | |
1034 | static BOOL IsApcPendingOnWaitThread; // Indicates if an APC is pending on the wait thread |
1035 | |
1036 | // This needs to be non-hosted, because worker threads can run prior to EE startup. |
1037 | static DangerousNonHostedSpinLock ThreadAdjustmentLock; |
1038 | |
1039 | public: |
1040 | static CrstStatic WorkerCriticalSection; |
1041 | |
1042 | private: |
1043 | static const DWORD WorkerTimeout = 20 * 1000; |
1044 | static const DWORD WorkerTimeoutAppX = 5 * 1000; // shorter timeout to allow threads to exit prior to app suspension |
1045 | |
1046 | DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) SVAL_DECL(ThreadCounter,WorkerCounter); |
1047 | |
1048 | // |
1049 | // WorkerSemaphore is an UnfairSemaphore because: |
1050 | // 1) Threads enter and exit this semaphore very frequently, and thus benefit greatly from the spinning done by UnfairSemaphore |
1051 | // 2) There is no functional reason why any particular thread should be preferred when waking workers. This only impacts performance, |
1052 | // and un-fairness helps performance in this case. |
1053 | // |
1054 | static CLRLifoSemaphore* WorkerSemaphore; |
1055 | |
1056 | // |
1057 | // RetiredWorkerSemaphore is a regular CLRSemaphore, not an UnfairSemaphore, because if a thread waits on this semaphore is it almost certainly |
1058 | // NOT going to be released soon, so the spinning done in UnfairSemaphore only burns valuable CPU time. However, if UnfairSemaphore is ever |
1059 | // implemented in terms of a Win32 IO Completion Port, we should reconsider this. The IOCP's LIFO unblocking behavior could help keep working set |
1060 | // down, by constantly re-using the same small set of retired workers rather than round-robining between all of them as CLRSemaphore will do. |
1061 | // If we go that route, we should add a "no-spin" option to UnfairSemaphore.Wait to avoid wasting CPU. |
1062 | // |
1063 | static CLRLifoSemaphore* RetiredWorkerSemaphore; |
1064 | |
1065 | static CLREvent * RetiredCPWakeupEvent; |
1066 | |
1067 | static CrstStatic WaitThreadsCriticalSection; |
1068 | static LIST_ENTRY WaitThreadsHead; // queue of wait threads, each thread can handle upto 64 waits |
1069 | |
1070 | static TimerInfo *TimerInfosToBeRecycled; // list of delegate infos associated with deleted timers |
1071 | static CrstStatic TimerQueueCriticalSection; // critical section to synchronize timer queue access |
1072 | SVAL_DECL(LIST_ENTRY,TimerQueue); // queue of timers |
1073 | static HANDLE TimerThread; // Currently we only have one timer thread |
1074 | static Thread* pTimerThread; |
1075 | DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static DWORD LastTickCount; // the count just before timer thread goes to sleep |
1076 | |
1077 | static BOOL InitCompletionPortThreadpool; // flag indicating whether completion port threadpool has been initialized |
1078 | static HANDLE GlobalCompletionPort; // used for binding io completions on file handles |
1079 | |
1080 | public: |
1081 | SVAL_DECL(ThreadCounter,CPThreadCounter); |
1082 | |
1083 | private: |
1084 | SVAL_DECL(LONG,MaxLimitTotalCPThreads); // = MaxLimitCPThreadsPerCPU * number of CPUS |
1085 | SVAL_DECL(LONG,MinLimitTotalCPThreads); |
1086 | SVAL_DECL(LONG,MaxFreeCPThreads); // = MaxFreeCPThreadsPerCPU * Number of CPUS |
1087 | |
1088 | DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static LONG GateThreadStatus; // See GateThreadStatus enumeration |
1089 | |
1090 | static Volatile<LONG> NumCPInfrastructureThreads; // number of threads currently busy handling draining cycle |
1091 | |
1092 | SVAL_DECL(LONG,cpuUtilization); |
1093 | static LONG cpuUtilizationAverage; |
1094 | |
1095 | DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static RecycledListsWrapper RecycledLists; |
1096 | |
1097 | #ifdef _DEBUG |
1098 | static DWORD TickCountAdjustment; // add this value to value returned by GetTickCount |
1099 | #endif |
1100 | |
1101 | DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static int offset_counter; |
1102 | static const int offset_multiplier = 128; |
1103 | }; |
1104 | |
1105 | |
1106 | |
1107 | |
1108 | #endif // _WIN32THREADPOOL_H |
1109 | |