1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5#include "common.h"
6
7// Defaults are for when InitializeYieldProcessorNormalized has not yet been called or when no measurement is done, and are
8// tuned for Skylake processors
9unsigned int g_yieldsPerNormalizedYield = 1; // current value is for Skylake processors, this is expected to be ~9 for pre-Skylake
10unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration = 7;
11
12static Volatile<bool> s_isYieldProcessorNormalizedInitialized = false;
13static CrstStatic s_initializeYieldProcessorNormalizedCrst;
14
15void InitializeYieldProcessorNormalizedCrst()
16{
17 WRAPPER_NO_CONTRACT;
18 s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock);
19}
20
21static void InitializeYieldProcessorNormalized()
22{
23 WRAPPER_NO_CONTRACT;
24
25 CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst);
26
27 if (s_isYieldProcessorNormalizedInitialized)
28 {
29 return;
30 }
31
32 // Intel pre-Skylake processor: measured typically 14-17 cycles per yield
33 // Intel post-Skylake processor: measured typically 125-150 cycles per yield
34 const int MeasureDurationMs = 10;
35 const int NsPerSecond = 1000 * 1000 * 1000;
36
37 LARGE_INTEGER li;
38 if (!QueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs)
39 {
40 // High precision clock not available or clock resolution is too low, resort to defaults
41 s_isYieldProcessorNormalizedInitialized = true;
42 return;
43 }
44 ULONGLONG ticksPerSecond = li.QuadPart;
45
46 // Measure the nanosecond delay per yield
47 ULONGLONG measureDurationTicks = ticksPerSecond / (1000 / MeasureDurationMs);
48 unsigned int yieldCount = 0;
49 QueryPerformanceCounter(&li);
50 ULONGLONG startTicks = li.QuadPart;
51 ULONGLONG elapsedTicks;
52 do
53 {
54 // On some systems, querying the high performance counter has relatively significant overhead. Do enough yields to mask
55 // the timing overhead. Assuming one yield has a delay of MinNsPerNormalizedYield, 1000 yields would have a delay in the
56 // low microsecond range.
57 for (int i = 0; i < 1000; ++i)
58 {
59 YieldProcessor();
60 }
61 yieldCount += 1000;
62
63 QueryPerformanceCounter(&li);
64 ULONGLONG nowTicks = li.QuadPart;
65 elapsedTicks = nowTicks - startTicks;
66 } while (elapsedTicks < measureDurationTicks);
67 double nsPerYield = (double)elapsedTicks * NsPerSecond / ((double)yieldCount * ticksPerSecond);
68 if (nsPerYield < 1)
69 {
70 nsPerYield = 1;
71 }
72
73 // Calculate the number of yields required to span the duration of a normalized yield. Since nsPerYield is at least 1, this
74 // value is naturally limited to MinNsPerNormalizedYield.
75 int yieldsPerNormalizedYield = (int)(MinNsPerNormalizedYield / nsPerYield + 0.5);
76 if (yieldsPerNormalizedYield < 1)
77 {
78 yieldsPerNormalizedYield = 1;
79 }
80 _ASSERTE(yieldsPerNormalizedYield <= MinNsPerNormalizedYield);
81
82 // Calculate the maximum number of yields that would be optimal for a late spin iteration. Typically, we would not want to
83 // spend excessive amounts of time (thousands of cycles) doing only YieldProcessor, as SwitchToThread/Sleep would do a
84 // better job of allowing other work to run.
85 int optimalMaxNormalizedYieldsPerSpinIteration =
86 (int)(NsPerOptimalMaxSpinIterationDuration / (yieldsPerNormalizedYield * nsPerYield) + 0.5);
87 if (optimalMaxNormalizedYieldsPerSpinIteration < 1)
88 {
89 optimalMaxNormalizedYieldsPerSpinIteration = 1;
90 }
91
92 g_yieldsPerNormalizedYield = yieldsPerNormalizedYield;
93 g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration;
94 s_isYieldProcessorNormalizedInitialized = true;
95
96 GCHeapUtilities::GetGCHeap()->SetYieldProcessorScalingFactor((float)yieldsPerNormalizedYield);
97}
98
99void EnsureYieldProcessorNormalizedInitialized()
100{
101 WRAPPER_NO_CONTRACT;
102
103 if (!s_isYieldProcessorNormalizedInitialized)
104 {
105 InitializeYieldProcessorNormalized();
106 }
107}
108