| 1 | // Licensed to the .NET Foundation under one or more agreements. |
|---|---|
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | |
| 5 | #include "common.h" |
| 6 | |
| 7 | // Defaults are for when InitializeYieldProcessorNormalized has not yet been called or when no measurement is done, and are |
| 8 | // tuned for Skylake processors |
| 9 | unsigned int g_yieldsPerNormalizedYield = 1; // current value is for Skylake processors, this is expected to be ~9 for pre-Skylake |
| 10 | unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration = 7; |
| 11 | |
| 12 | static Volatile<bool> s_isYieldProcessorNormalizedInitialized = false; |
| 13 | static CrstStatic s_initializeYieldProcessorNormalizedCrst; |
| 14 | |
| 15 | void InitializeYieldProcessorNormalizedCrst() |
| 16 | { |
| 17 | WRAPPER_NO_CONTRACT; |
| 18 | s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock); |
| 19 | } |
| 20 | |
| 21 | static void InitializeYieldProcessorNormalized() |
| 22 | { |
| 23 | WRAPPER_NO_CONTRACT; |
| 24 | |
| 25 | CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst); |
| 26 | |
| 27 | if (s_isYieldProcessorNormalizedInitialized) |
| 28 | { |
| 29 | return; |
| 30 | } |
| 31 | |
| 32 | // Intel pre-Skylake processor: measured typically 14-17 cycles per yield |
| 33 | // Intel post-Skylake processor: measured typically 125-150 cycles per yield |
| 34 | const int MeasureDurationMs = 10; |
| 35 | const int NsPerSecond = 1000 * 1000 * 1000; |
| 36 | |
| 37 | LARGE_INTEGER li; |
| 38 | if (!QueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs) |
| 39 | { |
| 40 | // High precision clock not available or clock resolution is too low, resort to defaults |
| 41 | s_isYieldProcessorNormalizedInitialized = true; |
| 42 | return; |
| 43 | } |
| 44 | ULONGLONG ticksPerSecond = li.QuadPart; |
| 45 | |
| 46 | // Measure the nanosecond delay per yield |
| 47 | ULONGLONG measureDurationTicks = ticksPerSecond / (1000 / MeasureDurationMs); |
| 48 | unsigned int yieldCount = 0; |
| 49 | QueryPerformanceCounter(&li); |
| 50 | ULONGLONG startTicks = li.QuadPart; |
| 51 | ULONGLONG elapsedTicks; |
| 52 | do |
| 53 | { |
| 54 | // On some systems, querying the high performance counter has relatively significant overhead. Do enough yields to mask |
| 55 | // the timing overhead. Assuming one yield has a delay of MinNsPerNormalizedYield, 1000 yields would have a delay in the |
| 56 | // low microsecond range. |
| 57 | for (int i = 0; i < 1000; ++i) |
| 58 | { |
| 59 | YieldProcessor(); |
| 60 | } |
| 61 | yieldCount += 1000; |
| 62 | |
| 63 | QueryPerformanceCounter(&li); |
| 64 | ULONGLONG nowTicks = li.QuadPart; |
| 65 | elapsedTicks = nowTicks - startTicks; |
| 66 | } while (elapsedTicks < measureDurationTicks); |
| 67 | double nsPerYield = (double)elapsedTicks * NsPerSecond / ((double)yieldCount * ticksPerSecond); |
| 68 | if (nsPerYield < 1) |
| 69 | { |
| 70 | nsPerYield = 1; |
| 71 | } |
| 72 | |
| 73 | // Calculate the number of yields required to span the duration of a normalized yield. Since nsPerYield is at least 1, this |
| 74 | // value is naturally limited to MinNsPerNormalizedYield. |
| 75 | int yieldsPerNormalizedYield = (int)(MinNsPerNormalizedYield / nsPerYield + 0.5); |
| 76 | if (yieldsPerNormalizedYield < 1) |
| 77 | { |
| 78 | yieldsPerNormalizedYield = 1; |
| 79 | } |
| 80 | _ASSERTE(yieldsPerNormalizedYield <= MinNsPerNormalizedYield); |
| 81 | |
| 82 | // Calculate the maximum number of yields that would be optimal for a late spin iteration. Typically, we would not want to |
| 83 | // spend excessive amounts of time (thousands of cycles) doing only YieldProcessor, as SwitchToThread/Sleep would do a |
| 84 | // better job of allowing other work to run. |
| 85 | int optimalMaxNormalizedYieldsPerSpinIteration = |
| 86 | (int)(NsPerOptimalMaxSpinIterationDuration / (yieldsPerNormalizedYield * nsPerYield) + 0.5); |
| 87 | if (optimalMaxNormalizedYieldsPerSpinIteration < 1) |
| 88 | { |
| 89 | optimalMaxNormalizedYieldsPerSpinIteration = 1; |
| 90 | } |
| 91 | |
| 92 | g_yieldsPerNormalizedYield = yieldsPerNormalizedYield; |
| 93 | g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration; |
| 94 | s_isYieldProcessorNormalizedInitialized = true; |
| 95 | |
| 96 | GCHeapUtilities::GetGCHeap()->SetYieldProcessorScalingFactor((float)yieldsPerNormalizedYield); |
| 97 | } |
| 98 | |
| 99 | void EnsureYieldProcessorNormalizedInitialized() |
| 100 | { |
| 101 | WRAPPER_NO_CONTRACT; |
| 102 | |
| 103 | if (!s_isYieldProcessorNormalizedInitialized) |
| 104 | { |
| 105 | InitializeYieldProcessorNormalized(); |
| 106 | } |
| 107 | } |
| 108 |