1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5
6/*++
7
8Module Name:
9
10 Win32ThreadPool.h
11
12Abstract:
13
14 This module is the header file for thread pools using Win32 APIs.
15
16Revision History:
17
18
19--*/
20
21#ifndef _WIN32THREADPOOL_H
22#define _WIN32THREADPOOL_H
23
24#include "delegateinfo.h"
25#include "util.hpp"
26#include "nativeoverlapped.h"
27#include "hillclimbing.h"
28
29#define MAX_WAITHANDLES 64
30
31#define MAX_CACHED_EVENTS 40 // upper limit on number of wait events cached
32
33#define WAIT_REGISTERED 0x01
34#define WAIT_ACTIVE 0x02
35#define WAIT_DELETE 0x04
36
37#define TIMER_REGISTERED 0x01
38#define TIMER_ACTIVE 0x02
39#define TIMER_DELETE 0x04
40
41#define WAIT_SINGLE_EXECUTION 0x00000001
42#define WAIT_FREE_CONTEXT 0x00000002
43#define WAIT_INTERNAL_COMPLETION 0x00000004
44
45#define QUEUE_ONLY 0x00000000 // do not attempt to call on the thread
46#define CALL_OR_QUEUE 0x00000001 // call on the same thread if not too busy, else queue
47
48const int MaxLimitThreadsPerCPU=250; // upper limit on number of cp threads per CPU
49const int MaxFreeCPThreadsPerCPU=2; // upper limit on number of free cp threads per CPU
50
51const int CpuUtilizationHigh=95; // remove threads when above this
52const int CpuUtilizationLow =80; // inject more threads if below this
53
54#ifndef FEATURE_PAL
55extern HANDLE (WINAPI *g_pufnCreateIoCompletionPort)(HANDLE FileHandle,
56 HANDLE ExistingCompletionPort,
57 ULONG_PTR CompletionKey,
58 DWORD NumberOfConcurrentThreads);
59
60extern int (WINAPI *g_pufnNtQueryInformationThread) (HANDLE ThreadHandle,
61 THREADINFOCLASS ThreadInformationClass,
62 PVOID ThreadInformation,
63 ULONG ThreadInformationLength,
64 PULONG ReturnLength);
65
66extern int (WINAPI * g_pufnNtQuerySystemInformation) (SYSTEM_INFORMATION_CLASS SystemInformationClass,
67 PVOID SystemInformation,
68 ULONG SystemInformationLength,
69 PULONG ReturnLength OPTIONAL);
70#endif // !FEATURE_PAL
71
72#define FILETIME_TO_INT64(t) (*(__int64*)&(t))
73#define MILLI_TO_100NANO(x) (x * 10000) // convert from milliseond to 100 nanosecond unit
74
75/**
76 * This type is supposed to be private to ThreadpoolMgr.
77 * It's at global scope because Strike needs to be able to access its
78 * definition.
79 */
80struct WorkRequest {
81 WorkRequest* next;
82 LPTHREAD_START_ROUTINE Function;
83 PVOID Context;
84
85};
86
87typedef struct _IOCompletionContext
88{
89 DWORD ErrorCode;
90 DWORD numBytesTransferred;
91 LPOVERLAPPED lpOverlapped;
92 size_t key;
93} IOCompletionContext, *PIOCompletionContext;
94
95typedef DPTR(WorkRequest) PTR_WorkRequest;
96class ThreadpoolMgr
97{
98 friend class ClrDataAccess;
99 friend struct DelegateInfo;
100 friend class ThreadPoolNative;
101 friend class TimerNative;
102 friend class UnManagedPerAppDomainTPCount;
103 friend class ManagedPerAppDomainTPCount;
104 friend class PerAppDomainTPCountList;
105 friend class HillClimbing;
106 friend struct _DacGlobals;
107
108public:
109 struct ThreadCounter
110 {
111 static const int MaxPossibleCount = 0x7fff;
112
113 // padding to ensure we get our own cache line
114 BYTE padding1[MAX_CACHE_LINE_SIZE];
115
116 union Counts
117 {
118 struct
119 {
120 //
121 // Note: these are signed rather than unsigned to allow us to detect under/overflow.
122 //
123 int MaxWorking : 16; //Determined by HillClimbing; adjusted elsewhere for timeouts, etc.
124 int NumActive : 16; //Active means working or waiting on WorkerSemaphore. These are "warm/hot" threads.
125 int NumWorking : 16; //Trying to get work from various queues. Not waiting on either semaphore.
126 int NumRetired : 16; //Not trying to get work; waiting on RetiredWorkerSemaphore. These are "cold" threads.
127
128 // Note: the only reason we need "retired" threads at all is that it allows some threads to eventually time out
129 // even if other threads are getting work. If we ever make WorkerSemaphore a true LIFO semaphore, we will no longer
130 // need the concept of "retirement" - instead, the very "coldest" threads will naturally be the first to time out.
131 };
132
133 LONGLONG AsLongLong;
134
135 bool operator==(Counts other) {LIMITED_METHOD_CONTRACT; return AsLongLong == other.AsLongLong;}
136 } counts;
137
138 // padding to ensure we get our own cache line
139 BYTE padding2[MAX_CACHE_LINE_SIZE];
140
141 Counts GetCleanCounts()
142 {
143 LIMITED_METHOD_CONTRACT;
144#ifdef _WIN64
145 // VolatileLoad x64 bit read is atomic
146 return DangerousGetDirtyCounts();
147#else // !_WIN64
148 // VolatileLoad may result in torn read
149 Counts result;
150#ifndef DACCESS_COMPILE
151 result.AsLongLong = FastInterlockCompareExchangeLong(&counts.AsLongLong, 0, 0);
152 ValidateCounts(result);
153#else
154 result.AsLongLong = 0; //prevents prefast warning for DAC builds
155#endif
156 return result;
157#endif // !_WIN64
158 }
159
160 //
161 // This does a non-atomic read of the counts. The returned value is suitable only
162 // for use inside of a read-compare-exchange loop, where the compare-exhcange must succeed
163 // before any action is taken. Use GetCleanWorkerCounts for other needs, but keep in mind
164 // it's much slower.
165 //
166 Counts DangerousGetDirtyCounts()
167 {
168 LIMITED_METHOD_CONTRACT;
169 Counts result;
170#ifndef DACCESS_COMPILE
171 result.AsLongLong = VolatileLoad(&counts.AsLongLong);
172#else
173 result.AsLongLong = 0; //prevents prefast warning for DAC builds
174#endif
175 return result;
176 }
177
178
179 Counts CompareExchangeCounts(Counts newCounts, Counts oldCounts)
180 {
181 LIMITED_METHOD_CONTRACT;
182 Counts result;
183#ifndef DACCESS_COMPILE
184 result.AsLongLong = FastInterlockCompareExchangeLong(&counts.AsLongLong, newCounts.AsLongLong, oldCounts.AsLongLong);
185 if (result == oldCounts)
186 {
187 // can only do validation on success; if we failed, it may have been due to a previous
188 // dirty read, which may contain invalid values.
189 ValidateCounts(result);
190 ValidateCounts(newCounts);
191 }
192#else
193 result.AsLongLong = 0; //prevents prefast warning for DAC builds
194#endif
195 return result;
196 }
197
198 private:
199 static void ValidateCounts(Counts counts)
200 {
201 LIMITED_METHOD_CONTRACT;
202 _ASSERTE(counts.MaxWorking > 0);
203 _ASSERTE(counts.NumActive >= 0);
204 _ASSERTE(counts.NumWorking >= 0);
205 _ASSERTE(counts.NumRetired >= 0);
206 _ASSERTE(counts.NumWorking <= counts.NumActive);
207 }
208 };
209
210public:
211
212 static void ReportThreadStatus(bool isWorking);
213
214 // enumeration of different kinds of memory blocks that are recycled
215 enum MemType
216 {
217 MEMTYPE_AsyncCallback = 0,
218 MEMTYPE_DelegateInfo = 1,
219 MEMTYPE_WorkRequest = 2,
220 MEMTYPE_COUNT = 3,
221 };
222
223 typedef struct {
224 ADID AppDomainId;
225 INT32 TimerId;
226 } TimerInfoContext;
227
228 static BOOL Initialize();
229
230 static BOOL SetMaxThreadsHelper(DWORD MaxWorkerThreads,
231 DWORD MaxIOCompletionThreads);
232
233 static BOOL SetMaxThreads(DWORD MaxWorkerThreads,
234 DWORD MaxIOCompletionThreads);
235
236 static BOOL GetMaxThreads(DWORD* MaxWorkerThreads,
237 DWORD* MaxIOCompletionThreads);
238
239 static BOOL SetMinThreads(DWORD MinWorkerThreads,
240 DWORD MinIOCompletionThreads);
241
242 static BOOL GetMinThreads(DWORD* MinWorkerThreads,
243 DWORD* MinIOCompletionThreads);
244
245 static BOOL GetAvailableThreads(DWORD* AvailableWorkerThreads,
246 DWORD* AvailableIOCompletionThreads);
247
248 static BOOL QueueUserWorkItem(LPTHREAD_START_ROUTINE Function,
249 PVOID Context,
250 ULONG Flags,
251 BOOL UnmanagedTPRequest=TRUE);
252
253 static BOOL PostQueuedCompletionStatus(LPOVERLAPPED lpOverlapped,
254 LPOVERLAPPED_COMPLETION_ROUTINE Function);
255
256 inline static BOOL IsCompletionPortInitialized()
257 {
258 LIMITED_METHOD_CONTRACT;
259 return GlobalCompletionPort != NULL;
260 }
261
262 static BOOL DrainCompletionPortQueue();
263
264 static BOOL RegisterWaitForSingleObject(PHANDLE phNewWaitObject,
265 HANDLE hWaitObject,
266 WAITORTIMERCALLBACK Callback,
267 PVOID Context,
268 ULONG timeout,
269 DWORD dwFlag);
270
271 static BOOL UnregisterWaitEx(HANDLE hWaitObject,HANDLE CompletionEvent);
272 static void WaitHandleCleanup(HANDLE hWaitObject);
273
274 static BOOL WINAPI BindIoCompletionCallback(HANDLE FileHandle,
275 LPOVERLAPPED_COMPLETION_ROUTINE Function,
276 ULONG Flags,
277 DWORD& errorCode);
278
279 static void WINAPI WaitIOCompletionCallback(DWORD dwErrorCode,
280 DWORD numBytesTransferred,
281 LPOVERLAPPED lpOverlapped);
282
283 static VOID WINAPI CallbackForInitiateDrainageOfCompletionPortQueue(
284 DWORD dwErrorCode,
285 DWORD dwNumberOfBytesTransfered,
286 LPOVERLAPPED lpOverlapped
287 );
288
289 static VOID WINAPI CallbackForContinueDrainageOfCompletionPortQueue(
290 DWORD dwErrorCode,
291 DWORD dwNumberOfBytesTransfered,
292 LPOVERLAPPED lpOverlapped
293 );
294
295 static BOOL SetAppDomainRequestsActive(BOOL UnmanagedTP = FALSE);
296 static void ClearAppDomainRequestsActive(BOOL UnmanagedTP = FALSE, BOOL AdUnloading = FALSE, LONG index = -1);
297
298 static inline void UpdateLastDequeueTime()
299 {
300 LIMITED_METHOD_CONTRACT;
301 VolatileStore(&LastDequeueTime, (unsigned int)GetTickCount());
302 }
303
304 static BOOL CreateTimerQueueTimer(PHANDLE phNewTimer,
305 WAITORTIMERCALLBACK Callback,
306 PVOID Parameter,
307 DWORD DueTime,
308 DWORD Period,
309 ULONG Flags);
310
311 static BOOL ChangeTimerQueueTimer(HANDLE Timer,
312 ULONG DueTime,
313 ULONG Period);
314 static BOOL DeleteTimerQueueTimer(HANDLE Timer,
315 HANDLE CompletionEvent);
316
317 static void RecycleMemory(LPVOID mem, enum MemType memType);
318
319 static void FlushQueueOfTimerInfos();
320
321 static BOOL HaveTimerInfosToFlush() { return TimerInfosToBeRecycled != NULL; }
322
323#ifndef FEATURE_PAL
324 static LPOVERLAPPED CompletionPortDispatchWorkWithinAppDomain(Thread* pThread, DWORD* pErrorCode, DWORD* pNumBytes, size_t* pKey, DWORD adid);
325 static void StoreOverlappedInfoInThread(Thread* pThread, DWORD dwErrorCode, DWORD dwNumBytes, size_t key, LPOVERLAPPED lpOverlapped);
326#endif // !FEATURE_PAL
327
328 // Enable filtering of correlation ETW events for cases handled at a higher abstraction level
329
330#ifndef DACCESS_COMPILE
331 static FORCEINLINE BOOL AreEtwQueueEventsSpeciallyHandled(LPTHREAD_START_ROUTINE Function)
332 {
333 // Timer events are handled at a higher abstraction level: in the managed Timer class
334 return (Function == ThreadpoolMgr::AsyncTimerCallbackCompletion);
335 }
336
337 static FORCEINLINE BOOL AreEtwIOQueueEventsSpeciallyHandled(LPOVERLAPPED_COMPLETION_ROUTINE Function)
338 {
339 // We ignore drainage events b/c they are uninteresting
340 // We handle registered waits at a higher abstraction level
341 return (Function == ThreadpoolMgr::CallbackForInitiateDrainageOfCompletionPortQueue
342 || Function == ThreadpoolMgr::CallbackForContinueDrainageOfCompletionPortQueue
343 || Function == ThreadpoolMgr::WaitIOCompletionCallback);
344 }
345#endif
346
347private:
348
349#ifndef DACCESS_COMPILE
350
351 inline static void FreeWorkRequest(WorkRequest* workRequest)
352 {
353 RecycleMemory( workRequest, MEMTYPE_WorkRequest ); //delete workRequest;
354 }
355
356 inline static WorkRequest* MakeWorkRequest(LPTHREAD_START_ROUTINE function, PVOID context)
357 {
358 CONTRACTL
359 {
360 THROWS;
361 GC_NOTRIGGER;
362 MODE_ANY;
363 }
364 CONTRACTL_END;;
365
366 WorkRequest* wr = (WorkRequest*) GetRecycledMemory(MEMTYPE_WorkRequest);
367 _ASSERTE(wr);
368 if (NULL == wr)
369 return NULL;
370 wr->Function = function;
371 wr->Context = context;
372 wr->next = NULL;
373 return wr;
374 }
375
376#endif // #ifndef DACCESS_COMPILE
377
378 typedef struct {
379 DWORD numBytes;
380 ULONG_PTR *key;
381 LPOVERLAPPED pOverlapped;
382 DWORD errorCode;
383 } QueuedStatus;
384
385 typedef DPTR(struct _LIST_ENTRY) PTR_LIST_ENTRY;
386 typedef struct _LIST_ENTRY {
387 struct _LIST_ENTRY *Flink;
388 struct _LIST_ENTRY *Blink;
389 } LIST_ENTRY, *PLIST_ENTRY;
390
391 struct WaitInfo;
392
393 typedef struct {
394 HANDLE threadHandle;
395 DWORD threadId;
396 CLREvent startEvent;
397 LONG NumWaitHandles; // number of wait objects registered to the thread <=64
398 LONG NumActiveWaits; // number of objects, thread is actually waiting on (this may be less than
399 // NumWaitHandles since the thread may not have activated some waits
400 HANDLE waitHandle[MAX_WAITHANDLES]; // array of wait handles (copied from waitInfo since
401 // we need them to be contiguous)
402 LIST_ENTRY waitPointer[MAX_WAITHANDLES]; // array of doubly linked list of corresponding waitinfo
403 } ThreadCB;
404
405
406 typedef struct {
407 ULONG startTime; // time at which wait was started
408 // endTime = startTime+timeout
409 ULONG remainingTime; // endTime - currentTime
410 } WaitTimerInfo;
411
412 struct WaitInfo {
413 LIST_ENTRY link; // Win9x does not allow duplicate waithandles, so we need to
414 // group all waits on a single waithandle using this linked list
415 HANDLE waitHandle;
416 WAITORTIMERCALLBACK Callback;
417 PVOID Context;
418 ULONG timeout;
419 WaitTimerInfo timer;
420 DWORD flag;
421 DWORD state;
422 ThreadCB* threadCB;
423 LONG refCount; // when this reaches 0, the waitInfo can be safely deleted
424 CLREvent PartialCompletionEvent; // used to synchronize deactivation of a wait
425 CLREvent InternalCompletionEvent; // only one of InternalCompletion or ExternalCompletion is used
426 // but I cant make a union since CLREvent has a non-default constructor
427 HANDLE ExternalCompletionEvent; // they are signalled when all callbacks have completed (refCount=0)
428 ADID handleOwningAD;
429 OBJECTHANDLE ExternalEventSafeHandle;
430
431 } ;
432
433 // structure used to maintain global information about wait threads. Protected by WaitThreadsCriticalSection
434 typedef struct WaitThreadTag {
435 LIST_ENTRY link;
436 ThreadCB* threadCB;
437 } WaitThreadInfo;
438
439
440 struct AsyncCallback{
441 WaitInfo* wait;
442 BOOL waitTimedOut;
443 } ;
444
445#ifndef DACCESS_COMPILE
446
447 static VOID
448 AcquireAsyncCallback(AsyncCallback *pAsyncCB)
449 {
450 LIMITED_METHOD_CONTRACT;
451 }
452
453 static VOID
454 ReleaseAsyncCallback(AsyncCallback *pAsyncCB)
455 {
456 CONTRACTL
457 {
458 THROWS;
459 GC_TRIGGERS;
460 MODE_ANY;
461 }
462 CONTRACTL_END;
463
464 WaitInfo *waitInfo = pAsyncCB->wait;
465 ThreadpoolMgr::RecycleMemory((LPVOID*)pAsyncCB, ThreadpoolMgr::MEMTYPE_AsyncCallback);
466
467 // if this was a single execution, we now need to stop rooting registeredWaitHandle
468 // in a GC handle. This will cause the finalizer to pick it up and call the cleanup
469 // routine.
470 if ( (waitInfo->flag & WAIT_SINGLE_EXECUTION) && (waitInfo->flag & WAIT_FREE_CONTEXT))
471 {
472
473 DelegateInfo* pDelegate = (DelegateInfo*) waitInfo->Context;
474
475 _ASSERTE(pDelegate->m_registeredWaitHandle);
476
477 {
478 GCX_COOP();
479 StoreObjectInHandle(pDelegate->m_registeredWaitHandle, NULL);
480 }
481 }
482
483 if (InterlockedDecrement(&waitInfo->refCount) == 0)
484 ThreadpoolMgr::DeleteWait(waitInfo);
485
486 }
487
488 typedef Holder<AsyncCallback *, ThreadpoolMgr::AcquireAsyncCallback, ThreadpoolMgr::ReleaseAsyncCallback> AsyncCallbackHolder;
489 inline static AsyncCallback* MakeAsyncCallback()
490 {
491 WRAPPER_NO_CONTRACT;
492 return (AsyncCallback*) GetRecycledMemory(MEMTYPE_AsyncCallback);
493 }
494
495 static VOID ReleaseInfo(OBJECTHANDLE& hndSafeHandle,
496 ADID& owningAD,
497 HANDLE hndNativeHandle)
498 {
499 CONTRACTL
500 {
501 NOTHROW;
502 MODE_ANY;
503 GC_TRIGGERS;
504 }
505 CONTRACTL_END
506
507// Use of EX_TRY, GCPROTECT etc in the same function is causing prefast to complain about local variables with
508// same name masking each other (#246). The error could not be suppressed with "#pragma PREFAST_SUPPRESS"
509#ifndef _PREFAST_
510
511 if (hndSafeHandle != NULL)
512 {
513
514 SAFEHANDLEREF refSH = NULL;
515
516 GCX_COOP();
517 GCPROTECT_BEGIN(refSH);
518
519 {
520 EX_TRY
521 {
522 ENTER_DOMAIN_ID(owningAD);
523 {
524 // Read the GC handle
525 refSH = (SAFEHANDLEREF) ObjectToOBJECTREF(ObjectFromHandle(hndSafeHandle));
526
527 // Destroy the GC handle
528 DestroyHandle(hndSafeHandle);
529
530 if (refSH != NULL)
531 {
532 SafeHandleHolder h(&refSH);
533
534 HANDLE hEvent = refSH->GetHandle();
535 if (hEvent != INVALID_HANDLE_VALUE)
536 {
537 UnsafeSetEvent(hEvent);
538 }
539 }
540 }
541 END_DOMAIN_TRANSITION;
542 }
543 EX_CATCH
544 {
545 }
546 EX_END_CATCH(SwallowAllExceptions);
547 }
548
549 GCPROTECT_END();
550
551 hndSafeHandle = NULL;
552 owningAD = (ADID) 0;
553 }
554#endif
555 }
556
557#endif // #ifndef DACCESS_COMPILE
558
559 typedef struct {
560 LIST_ENTRY link;
561 HANDLE Handle;
562 } WaitEvent ;
563
564 // Timer
565 typedef struct {
566 LIST_ENTRY link; // doubly linked list of timers
567 ULONG FiringTime; // TickCount of when to fire next
568 WAITORTIMERCALLBACK Function; // Function to call when timer fires
569 PVOID Context; // Context to pass to function when timer fires
570 ULONG Period;
571 DWORD flag; // How do we deal with the context
572 DWORD state;
573 LONG refCount;
574 HANDLE ExternalCompletionEvent; // only one of this is used, but cant do a union since CLREvent has a non-default constructor
575 CLREvent InternalCompletionEvent; // flags indicates which one is being used
576 OBJECTHANDLE ExternalEventSafeHandle;
577 ADID handleOwningAD;
578 } TimerInfo;
579
580 static VOID AcquireWaitInfo(WaitInfo *pInfo)
581 {
582 }
583 static VOID ReleaseWaitInfo(WaitInfo *pInfo)
584 {
585 WRAPPER_NO_CONTRACT;
586#ifndef DACCESS_COMPILE
587 ReleaseInfo(pInfo->ExternalEventSafeHandle,
588 pInfo->handleOwningAD,
589 pInfo->ExternalCompletionEvent);
590#endif
591 }
592 static VOID AcquireTimerInfo(TimerInfo *pInfo)
593 {
594 }
595 static VOID ReleaseTimerInfo(TimerInfo *pInfo)
596 {
597 WRAPPER_NO_CONTRACT;
598#ifndef DACCESS_COMPILE
599 ReleaseInfo(pInfo->ExternalEventSafeHandle,
600 pInfo->handleOwningAD,
601 pInfo->ExternalCompletionEvent);
602#endif
603 }
604
605 typedef Holder<WaitInfo *, ThreadpoolMgr::AcquireWaitInfo, ThreadpoolMgr::ReleaseWaitInfo> WaitInfoHolder;
606 typedef Holder<TimerInfo *, ThreadpoolMgr::AcquireTimerInfo, ThreadpoolMgr::ReleaseTimerInfo> TimerInfoHolder;
607
608 typedef struct {
609 TimerInfo* Timer; // timer to be updated
610 ULONG DueTime ; // new due time
611 ULONG Period ; // new period
612 } TimerUpdateInfo;
613
614 // Definitions and data structures to support recycling of high-frequency
615 // memory blocks. We use a spin-lock to access the list
616
617 class RecycledListInfo
618 {
619 static const unsigned int MaxCachedEntries = 40;
620
621 struct Entry
622 {
623 Entry* next;
624 };
625
626 Volatile<LONG> lock; // this is the spin lock
627 DWORD count; // count of number of elements in the list
628 Entry* root; // ptr to first element of recycled list
629#ifndef _WIN64
630 DWORD filler; // Pad the structure to a multiple of the 16.
631#endif
632
633 //--//
634
635public:
636 RecycledListInfo()
637 {
638 LIMITED_METHOD_CONTRACT;
639
640 lock = 0;
641 root = NULL;
642 count = 0;
643 }
644
645 FORCEINLINE bool CanInsert()
646 {
647 LIMITED_METHOD_CONTRACT;
648
649 return count < MaxCachedEntries;
650 }
651
652 FORCEINLINE LPVOID Remove()
653 {
654 LIMITED_METHOD_CONTRACT;
655
656 if(root == NULL) return NULL; // No need for acquiring the lock, there's nothing to remove.
657
658 AcquireLock();
659
660 Entry* ret = (Entry*)root;
661
662 if(ret)
663 {
664 root = ret->next;
665 count -= 1;
666 }
667
668 ReleaseLock();
669
670 return ret;
671 }
672
673 FORCEINLINE void Insert( LPVOID mem )
674 {
675 LIMITED_METHOD_CONTRACT;
676
677 AcquireLock();
678
679 Entry* entry = (Entry*)mem;
680
681 entry->next = root;
682
683 root = entry;
684 count += 1;
685
686 ReleaseLock();
687 }
688
689 private:
690 FORCEINLINE void AcquireLock()
691 {
692 LIMITED_METHOD_CONTRACT;
693
694 unsigned int rounds = 0;
695
696 DWORD dwSwitchCount = 0;
697
698 while(lock != 0 || FastInterlockExchange( &lock, 1 ) != 0)
699 {
700 YieldProcessor(); // indicate to the processor that we are spinning
701
702 rounds++;
703
704 if((rounds % 32) == 0)
705 {
706 __SwitchToThread( 0, ++dwSwitchCount );
707 }
708 }
709 }
710
711 FORCEINLINE void ReleaseLock()
712 {
713 LIMITED_METHOD_CONTRACT;
714
715 lock = 0;
716 }
717 };
718
719 //
720 // It's critical that we ensure these pointers are allocated by the linker away from
721 // variables that are modified a lot at runtime.
722 //
723 // The use of the CacheGuard is a temporary solution,
724 // the thread pool has to be refactor away from static variable and
725 // toward a single global structure, where we can control the locality of variables.
726 //
727 class RecycledListsWrapper
728 {
729 DWORD CacheGuardPre[MAX_CACHE_LINE_SIZE/sizeof(DWORD)];
730
731 RecycledListInfo (*pRecycledListPerProcessor)[MEMTYPE_COUNT]; // RecycledListInfo [numProc][MEMTYPE_COUNT]
732
733 DWORD CacheGuardPost[MAX_CACHE_LINE_SIZE/sizeof(DWORD)];
734
735 public:
736 void Initialize( unsigned int numProcs );
737
738 FORCEINLINE bool IsInitialized()
739 {
740 LIMITED_METHOD_CONTRACT;
741
742 return pRecycledListPerProcessor != NULL;
743 }
744
745 FORCEINLINE RecycledListInfo& GetRecycleMemoryInfo( enum MemType memType )
746 {
747 LIMITED_METHOD_CONTRACT;
748
749 if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
750 return pRecycledListPerProcessor[CPUGroupInfo::CalculateCurrentProcessorNumber()][memType];
751 else
752 // Turns out GetCurrentProcessorNumber can return a value greater than the number of processors reported by
753 // GetSystemInfo, if we're running in WOW64 on a machine with >32 processors.
754 return pRecycledListPerProcessor[GetCurrentProcessorNumber()%NumberOfProcessors][memType];
755 }
756 };
757
758#define GATE_THREAD_STATUS_NOT_RUNNING 0 // There is no gate thread
759#define GATE_THREAD_STATUS_REQUESTED 1 // There is a gate thread, and someone has asked it to stick around recently
760#define GATE_THREAD_STATUS_WAITING_FOR_REQUEST 2 // There is a gate thread, but nobody has asked it to stay. It may die soon
761
762 // Private methods
763
764 static DWORD WINAPI intermediateThreadProc(PVOID arg);
765
766 typedef struct {
767 LPTHREAD_START_ROUTINE lpThreadFunction;
768 PVOID lpArg;
769 } intermediateThreadParam;
770
771 static Thread* CreateUnimpersonatedThread(LPTHREAD_START_ROUTINE lpStartAddress, LPVOID lpArgs, BOOL *pIsCLRThread);
772
773 static BOOL CreateWorkerThread();
774
775 static void EnqueueWorkRequest(WorkRequest* wr);
776
777 static WorkRequest* DequeueWorkRequest();
778
779 static void ExecuteWorkRequest(bool* foundWork, bool* wasNotRecalled);
780
781 static DWORD WINAPI ExecuteHostRequest(PVOID pArg);
782
783#ifndef DACCESS_COMPILE
784
785 inline static void AppendWorkRequest(WorkRequest* entry)
786 {
787 CONTRACTL
788 {
789 NOTHROW;
790 MODE_ANY;
791 GC_NOTRIGGER;
792 }
793 CONTRACTL_END;
794
795 if (WorkRequestTail)
796 {
797 _ASSERTE(WorkRequestHead != NULL);
798 WorkRequestTail->next = entry;
799 }
800 else
801 {
802 _ASSERTE(WorkRequestHead == NULL);
803 WorkRequestHead = entry;
804 }
805
806 WorkRequestTail = entry;
807 _ASSERTE(WorkRequestTail->next == NULL);
808 }
809
810 inline static WorkRequest* RemoveWorkRequest()
811 {
812 CONTRACTL
813 {
814 NOTHROW;
815 MODE_ANY;
816 GC_NOTRIGGER;
817 }
818 CONTRACTL_END;
819
820 WorkRequest* entry = NULL;
821 if (WorkRequestHead)
822 {
823 entry = WorkRequestHead;
824 WorkRequestHead = entry->next;
825 if (WorkRequestHead == NULL)
826 WorkRequestTail = NULL;
827 }
828 return entry;
829 }
830
831 static void EnsureInitialized();
832 static void InitPlatformVariables();
833
834 inline static BOOL IsInitialized()
835 {
836 LIMITED_METHOD_CONTRACT;
837 return Initialization == -1;
838 }
839
840 static void MaybeAddWorkingWorker();
841
842 static void NotifyWorkItemCompleted()
843 {
844 WRAPPER_NO_CONTRACT;
845 Thread::IncrementThreadPoolCompletionCount();
846 UpdateLastDequeueTime();
847 }
848
849 static bool ShouldAdjustMaxWorkersActive()
850 {
851 WRAPPER_NO_CONTRACT;
852
853 DWORD priorTime = PriorCompletedWorkRequestsTime;
854 MemoryBarrier(); // read fresh value for NextCompletedWorkRequestsTime below
855 DWORD requiredInterval = NextCompletedWorkRequestsTime - priorTime;
856 DWORD elapsedInterval = GetTickCount() - priorTime;
857 if (elapsedInterval >= requiredInterval)
858 {
859 ThreadCounter::Counts counts = WorkerCounter.GetCleanCounts();
860 if (counts.NumActive <= counts.MaxWorking)
861 return !IsHillClimbingDisabled;
862 }
863
864 return false;
865 }
866
867 static void AdjustMaxWorkersActive();
868 static bool ShouldWorkerKeepRunning();
869
870 static BOOL SuspendProcessing();
871
872 static DWORD SafeWait(CLREvent * ev, DWORD sleepTime, BOOL alertable);
873
874 static DWORD WINAPI WorkerThreadStart(LPVOID lpArgs);
875
876 static BOOL AddWaitRequest(HANDLE waitHandle, WaitInfo* waitInfo);
877
878
879 static ThreadCB* FindWaitThread(); // returns a wait thread that can accomodate another wait request
880
881 static BOOL CreateWaitThread();
882
883 static void WINAPI InsertNewWaitForSelf(WaitInfo* pArg);
884
885 static int FindWaitIndex(const ThreadCB* threadCB, const HANDLE waitHandle);
886
887 static DWORD MinimumRemainingWait(LIST_ENTRY* waitInfo, unsigned int numWaits);
888
889 static void ProcessWaitCompletion( WaitInfo* waitInfo,
890 unsigned index, // array index
891 BOOL waitTimedOut);
892
893 static DWORD WINAPI WaitThreadStart(LPVOID lpArgs);
894
895 static DWORD WINAPI AsyncCallbackCompletion(PVOID pArgs);
896
897 static void QueueTimerInfoForRelease(TimerInfo *pTimerInfo);
898
899 static void DeactivateWait(WaitInfo* waitInfo);
900 static void DeactivateNthWait(WaitInfo* waitInfo, DWORD index);
901
902 static void DeleteWait(WaitInfo* waitInfo);
903
904
905 inline static void ShiftWaitArray( ThreadCB* threadCB,
906 ULONG SrcIndex,
907 ULONG DestIndex,
908 ULONG count)
909 {
910 LIMITED_METHOD_CONTRACT;
911 memmove(&threadCB->waitHandle[DestIndex],
912 &threadCB->waitHandle[SrcIndex],
913 count * sizeof(HANDLE));
914 memmove(&threadCB->waitPointer[DestIndex],
915 &threadCB->waitPointer[SrcIndex],
916 count * sizeof(LIST_ENTRY));
917 }
918
919 static void WINAPI DeregisterWait(WaitInfo* pArgs);
920
921#ifndef FEATURE_PAL
922 // holds the aggregate of system cpu usage of all processors
923 typedef struct _PROCESS_CPU_INFORMATION
924 {
925 LARGE_INTEGER idleTime;
926 LARGE_INTEGER kernelTime;
927 LARGE_INTEGER userTime;
928 DWORD_PTR affinityMask;
929 int numberOfProcessors;
930 SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION* usageBuffer;
931 int usageBufferSize;
932 } PROCESS_CPU_INFORMATION;
933
934 static int GetCPUBusyTime_NT(PROCESS_CPU_INFORMATION* pOldInfo);
935 static BOOL CreateCompletionPortThread(LPVOID lpArgs);
936 static DWORD WINAPI CompletionPortThreadStart(LPVOID lpArgs);
937public:
938 inline static bool HaveNativeWork()
939 {
940 LIMITED_METHOD_CONTRACT;
941 return WorkRequestHead != NULL;
942 }
943
944 static void GrowCompletionPortThreadpoolIfNeeded();
945 static BOOL ShouldGrowCompletionPortThreadpool(ThreadCounter::Counts counts);
946#else
947 static int GetCPUBusyTime_NT(PAL_IOCP_CPU_INFORMATION* pOldInfo);
948
949#endif // !FEATURE_PAL
950
951private:
952 static BOOL IsIoPending();
953
954 static BOOL CreateGateThread();
955 static void EnsureGateThreadRunning();
956 static bool ShouldGateThreadKeepRunning();
957 static DWORD WINAPI GateThreadStart(LPVOID lpArgs);
958 static BOOL SufficientDelaySinceLastSample(unsigned int LastThreadCreationTime,
959 unsigned NumThreads, // total number of threads of that type (worker or CP)
960 double throttleRate=0.0 // the delay is increased by this percentage for each extra thread
961 );
962 static BOOL SufficientDelaySinceLastDequeue();
963
964 static LPVOID GetRecycledMemory(enum MemType memType);
965
966 static DWORD WINAPI TimerThreadStart(LPVOID args);
967 static void TimerThreadFire(); // helper method used by TimerThreadStart
968 static void WINAPI InsertNewTimer(TimerInfo* pArg);
969 static DWORD FireTimers();
970 static DWORD WINAPI AsyncTimerCallbackCompletion(PVOID pArgs);
971 static void DeactivateTimer(TimerInfo* timerInfo);
972 static DWORD WINAPI AsyncDeleteTimer(PVOID pArgs);
973 static void DeleteTimer(TimerInfo* timerInfo);
974 static void WINAPI UpdateTimer(TimerUpdateInfo* pArgs);
975
976 static void WINAPI DeregisterTimer(TimerInfo* pArgs);
977
978 inline static DWORD QueueDeregisterWait(HANDLE waitThread, WaitInfo* waitInfo)
979 {
980 CONTRACTL
981 {
982 NOTHROW;
983 MODE_ANY;
984 GC_NOTRIGGER;
985 }
986 CONTRACTL_END;
987
988 DWORD result = QueueUserAPC(reinterpret_cast<PAPCFUNC>(DeregisterWait), waitThread, reinterpret_cast<ULONG_PTR>(waitInfo));
989 SetWaitThreadAPCPending();
990 return result;
991 }
992
993
994 inline static void SetWaitThreadAPCPending() {IsApcPendingOnWaitThread = TRUE;}
995 inline static void ResetWaitThreadAPCPending() {IsApcPendingOnWaitThread = FALSE;}
996 inline static BOOL IsWaitThreadAPCPending() {return IsApcPendingOnWaitThread;}
997
998#ifdef _DEBUG
999 inline static DWORD GetTickCount()
1000 {
1001 LIMITED_METHOD_CONTRACT;
1002 return ::GetTickCount() + TickCountAdjustment;
1003 }
1004#endif
1005
1006#endif // #ifndef DACCESS_COMPILE
1007 // Private variables
1008
1009 static LONG Initialization; // indicator of whether the threadpool is initialized.
1010
1011 SVAL_DECL(LONG,MinLimitTotalWorkerThreads); // same as MinLimitTotalCPThreads
1012 SVAL_DECL(LONG,MaxLimitTotalWorkerThreads); // same as MaxLimitTotalCPThreads
1013
1014 DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static unsigned int LastDequeueTime; // used to determine if work items are getting thread starved
1015
1016 static HillClimbing HillClimbingInstance;
1017
1018 DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static LONG PriorCompletedWorkRequests;
1019 static DWORD PriorCompletedWorkRequestsTime;
1020 static DWORD NextCompletedWorkRequestsTime;
1021
1022 static LARGE_INTEGER CurrentSampleStartTime;
1023
1024 static unsigned int WorkerThreadSpinLimit;
1025 static bool IsHillClimbingDisabled;
1026 static int ThreadAdjustmentInterval;
1027
1028 SPTR_DECL(WorkRequest,WorkRequestHead); // Head of work request queue
1029 SPTR_DECL(WorkRequest,WorkRequestTail); // Head of work request queue
1030
1031 static unsigned int LastCPThreadCreation; // last time a completion port thread was created
1032 static unsigned int NumberOfProcessors; // = NumberOfWorkerThreads - no. of blocked threads
1033
1034 static BOOL IsApcPendingOnWaitThread; // Indicates if an APC is pending on the wait thread
1035
1036 // This needs to be non-hosted, because worker threads can run prior to EE startup.
1037 static DangerousNonHostedSpinLock ThreadAdjustmentLock;
1038
1039public:
1040 static CrstStatic WorkerCriticalSection;
1041
1042private:
1043 static const DWORD WorkerTimeout = 20 * 1000;
1044 static const DWORD WorkerTimeoutAppX = 5 * 1000; // shorter timeout to allow threads to exit prior to app suspension
1045
1046 DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) SVAL_DECL(ThreadCounter,WorkerCounter);
1047
1048 //
1049 // WorkerSemaphore is an UnfairSemaphore because:
1050 // 1) Threads enter and exit this semaphore very frequently, and thus benefit greatly from the spinning done by UnfairSemaphore
1051 // 2) There is no functional reason why any particular thread should be preferred when waking workers. This only impacts performance,
1052 // and un-fairness helps performance in this case.
1053 //
1054 static CLRLifoSemaphore* WorkerSemaphore;
1055
1056 //
1057 // RetiredWorkerSemaphore is a regular CLRSemaphore, not an UnfairSemaphore, because if a thread waits on this semaphore is it almost certainly
1058 // NOT going to be released soon, so the spinning done in UnfairSemaphore only burns valuable CPU time. However, if UnfairSemaphore is ever
1059 // implemented in terms of a Win32 IO Completion Port, we should reconsider this. The IOCP's LIFO unblocking behavior could help keep working set
1060 // down, by constantly re-using the same small set of retired workers rather than round-robining between all of them as CLRSemaphore will do.
1061 // If we go that route, we should add a "no-spin" option to UnfairSemaphore.Wait to avoid wasting CPU.
1062 //
1063 static CLRLifoSemaphore* RetiredWorkerSemaphore;
1064
1065 static CLREvent * RetiredCPWakeupEvent;
1066
1067 static CrstStatic WaitThreadsCriticalSection;
1068 static LIST_ENTRY WaitThreadsHead; // queue of wait threads, each thread can handle upto 64 waits
1069
1070 static TimerInfo *TimerInfosToBeRecycled; // list of delegate infos associated with deleted timers
1071 static CrstStatic TimerQueueCriticalSection; // critical section to synchronize timer queue access
1072 SVAL_DECL(LIST_ENTRY,TimerQueue); // queue of timers
1073 static HANDLE TimerThread; // Currently we only have one timer thread
1074 static Thread* pTimerThread;
1075 DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static DWORD LastTickCount; // the count just before timer thread goes to sleep
1076
1077 static BOOL InitCompletionPortThreadpool; // flag indicating whether completion port threadpool has been initialized
1078 static HANDLE GlobalCompletionPort; // used for binding io completions on file handles
1079
1080public:
1081 SVAL_DECL(ThreadCounter,CPThreadCounter);
1082
1083private:
1084 SVAL_DECL(LONG,MaxLimitTotalCPThreads); // = MaxLimitCPThreadsPerCPU * number of CPUS
1085 SVAL_DECL(LONG,MinLimitTotalCPThreads);
1086 SVAL_DECL(LONG,MaxFreeCPThreads); // = MaxFreeCPThreadsPerCPU * Number of CPUS
1087
1088 DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static LONG GateThreadStatus; // See GateThreadStatus enumeration
1089
1090 static Volatile<LONG> NumCPInfrastructureThreads; // number of threads currently busy handling draining cycle
1091
1092 SVAL_DECL(LONG,cpuUtilization);
1093 static LONG cpuUtilizationAverage;
1094
1095 DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static RecycledListsWrapper RecycledLists;
1096
1097#ifdef _DEBUG
1098 static DWORD TickCountAdjustment; // add this value to value returned by GetTickCount
1099#endif
1100
1101 DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static int offset_counter;
1102 static const int offset_multiplier = 128;
1103};
1104
1105
1106
1107
1108#endif // _WIN32THREADPOOL_H
1109