1 | // Licensed to the .NET Foundation under one or more agreements. |
---|---|
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | #include "common.h" |
6 | |
7 | // Defaults are for when InitializeYieldProcessorNormalized has not yet been called or when no measurement is done, and are |
8 | // tuned for Skylake processors |
9 | unsigned int g_yieldsPerNormalizedYield = 1; // current value is for Skylake processors, this is expected to be ~9 for pre-Skylake |
10 | unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration = 7; |
11 | |
12 | static Volatile<bool> s_isYieldProcessorNormalizedInitialized = false; |
13 | static CrstStatic s_initializeYieldProcessorNormalizedCrst; |
14 | |
15 | void InitializeYieldProcessorNormalizedCrst() |
16 | { |
17 | WRAPPER_NO_CONTRACT; |
18 | s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock); |
19 | } |
20 | |
21 | static void InitializeYieldProcessorNormalized() |
22 | { |
23 | WRAPPER_NO_CONTRACT; |
24 | |
25 | CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst); |
26 | |
27 | if (s_isYieldProcessorNormalizedInitialized) |
28 | { |
29 | return; |
30 | } |
31 | |
32 | // Intel pre-Skylake processor: measured typically 14-17 cycles per yield |
33 | // Intel post-Skylake processor: measured typically 125-150 cycles per yield |
34 | const int MeasureDurationMs = 10; |
35 | const int NsPerSecond = 1000 * 1000 * 1000; |
36 | |
37 | LARGE_INTEGER li; |
38 | if (!QueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs) |
39 | { |
40 | // High precision clock not available or clock resolution is too low, resort to defaults |
41 | s_isYieldProcessorNormalizedInitialized = true; |
42 | return; |
43 | } |
44 | ULONGLONG ticksPerSecond = li.QuadPart; |
45 | |
46 | // Measure the nanosecond delay per yield |
47 | ULONGLONG measureDurationTicks = ticksPerSecond / (1000 / MeasureDurationMs); |
48 | unsigned int yieldCount = 0; |
49 | QueryPerformanceCounter(&li); |
50 | ULONGLONG startTicks = li.QuadPart; |
51 | ULONGLONG elapsedTicks; |
52 | do |
53 | { |
54 | // On some systems, querying the high performance counter has relatively significant overhead. Do enough yields to mask |
55 | // the timing overhead. Assuming one yield has a delay of MinNsPerNormalizedYield, 1000 yields would have a delay in the |
56 | // low microsecond range. |
57 | for (int i = 0; i < 1000; ++i) |
58 | { |
59 | YieldProcessor(); |
60 | } |
61 | yieldCount += 1000; |
62 | |
63 | QueryPerformanceCounter(&li); |
64 | ULONGLONG nowTicks = li.QuadPart; |
65 | elapsedTicks = nowTicks - startTicks; |
66 | } while (elapsedTicks < measureDurationTicks); |
67 | double nsPerYield = (double)elapsedTicks * NsPerSecond / ((double)yieldCount * ticksPerSecond); |
68 | if (nsPerYield < 1) |
69 | { |
70 | nsPerYield = 1; |
71 | } |
72 | |
73 | // Calculate the number of yields required to span the duration of a normalized yield. Since nsPerYield is at least 1, this |
74 | // value is naturally limited to MinNsPerNormalizedYield. |
75 | int yieldsPerNormalizedYield = (int)(MinNsPerNormalizedYield / nsPerYield + 0.5); |
76 | if (yieldsPerNormalizedYield < 1) |
77 | { |
78 | yieldsPerNormalizedYield = 1; |
79 | } |
80 | _ASSERTE(yieldsPerNormalizedYield <= MinNsPerNormalizedYield); |
81 | |
82 | // Calculate the maximum number of yields that would be optimal for a late spin iteration. Typically, we would not want to |
83 | // spend excessive amounts of time (thousands of cycles) doing only YieldProcessor, as SwitchToThread/Sleep would do a |
84 | // better job of allowing other work to run. |
85 | int optimalMaxNormalizedYieldsPerSpinIteration = |
86 | (int)(NsPerOptimalMaxSpinIterationDuration / (yieldsPerNormalizedYield * nsPerYield) + 0.5); |
87 | if (optimalMaxNormalizedYieldsPerSpinIteration < 1) |
88 | { |
89 | optimalMaxNormalizedYieldsPerSpinIteration = 1; |
90 | } |
91 | |
92 | g_yieldsPerNormalizedYield = yieldsPerNormalizedYield; |
93 | g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration; |
94 | s_isYieldProcessorNormalizedInitialized = true; |
95 | |
96 | GCHeapUtilities::GetGCHeap()->SetYieldProcessorScalingFactor((float)yieldsPerNormalizedYield); |
97 | } |
98 | |
99 | void EnsureYieldProcessorNormalizedInitialized() |
100 | { |
101 | WRAPPER_NO_CONTRACT; |
102 | |
103 | if (!s_isYieldProcessorNormalizedInitialized) |
104 | { |
105 | InitializeYieldProcessorNormalized(); |
106 | } |
107 | } |
108 |