1// Copyright 2017 The Abseil Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "absl/base/internal/spinlock.h"
16
17#include <algorithm>
18#include <atomic>
19#include <limits>
20
21#include "absl/base/attributes.h"
22#include "absl/base/internal/atomic_hook.h"
23#include "absl/base/internal/cycleclock.h"
24#include "absl/base/internal/spinlock_wait.h"
25#include "absl/base/internal/sysinfo.h" /* For NumCPUs() */
26#include "absl/base/call_once.h"
27
28// Description of lock-word:
29// 31..00: [............................3][2][1][0]
30//
31// [0]: kSpinLockHeld
32// [1]: kSpinLockCooperative
33// [2]: kSpinLockDisabledScheduling
34// [31..3]: ONLY kSpinLockSleeper OR
35// Wait time in cycles >> PROFILE_TIMESTAMP_SHIFT
36//
37// Detailed descriptions:
38//
39// Bit [0]: The lock is considered held iff kSpinLockHeld is set.
40//
41// Bit [1]: Eligible waiters (e.g. Fibers) may co-operatively reschedule when
42// contended iff kSpinLockCooperative is set.
43//
44// Bit [2]: This bit is exclusive from bit [1]. It is used only by a
45// non-cooperative lock. When set, indicates that scheduling was
46// successfully disabled when the lock was acquired. May be unset,
47// even if non-cooperative, if a ThreadIdentity did not yet exist at
48// time of acquisition.
49//
50// Bit [3]: If this is the only upper bit ([31..3]) set then this lock was
51// acquired without contention, however, at least one waiter exists.
52//
53// Otherwise, bits [31..3] represent the time spent by the current lock
54// holder to acquire the lock. There may be outstanding waiter(s).
55
56namespace absl {
57namespace base_internal {
58
59ABSL_CONST_INIT static base_internal::AtomicHook<void (*)(const void *lock,
60 int64_t wait_cycles)>
61 submit_profile_data;
62
63void RegisterSpinLockProfiler(void (*fn)(const void *contendedlock,
64 int64_t wait_cycles)) {
65 submit_profile_data.Store(fn);
66}
67
68// Uncommon constructors.
69SpinLock::SpinLock(base_internal::SchedulingMode mode)
70 : lockword_(IsCooperative(mode) ? kSpinLockCooperative : 0) {
71 ABSL_TSAN_MUTEX_CREATE(this, __tsan_mutex_not_static);
72}
73
74SpinLock::SpinLock(base_internal::LinkerInitialized,
75 base_internal::SchedulingMode mode) {
76 ABSL_TSAN_MUTEX_CREATE(this, 0);
77 if (IsCooperative(mode)) {
78 InitLinkerInitializedAndCooperative();
79 }
80 // Otherwise, lockword_ is already initialized.
81}
82
83// Static (linker initialized) spinlocks always start life as functional
84// non-cooperative locks. When their static constructor does run, it will call
85// this initializer to augment the lockword with the cooperative bit. By
86// actually taking the lock when we do this we avoid the need for an atomic
87// operation in the regular unlock path.
88//
89// SlowLock() must be careful to re-test for this bit so that any outstanding
90// waiters may be upgraded to cooperative status.
91void SpinLock::InitLinkerInitializedAndCooperative() {
92 Lock();
93 lockword_.fetch_or(kSpinLockCooperative, std::memory_order_relaxed);
94 Unlock();
95}
96
97// Monitor the lock to see if its value changes within some time period
98// (adaptive_spin_count loop iterations). The last value read from the lock
99// is returned from the method.
100uint32_t SpinLock::SpinLoop() {
101 // We are already in the slow path of SpinLock, initialize the
102 // adaptive_spin_count here.
103 ABSL_CONST_INIT static absl::once_flag init_adaptive_spin_count;
104 ABSL_CONST_INIT static int adaptive_spin_count = 0;
105 base_internal::LowLevelCallOnce(&init_adaptive_spin_count, []() {
106 adaptive_spin_count = base_internal::NumCPUs() > 1 ? 1000 : 1;
107 });
108
109 int c = adaptive_spin_count;
110 uint32_t lock_value;
111 do {
112 lock_value = lockword_.load(std::memory_order_relaxed);
113 } while ((lock_value & kSpinLockHeld) != 0 && --c > 0);
114 return lock_value;
115}
116
117void SpinLock::SlowLock() {
118 uint32_t lock_value = SpinLoop();
119 lock_value = TryLockInternal(lock_value, 0);
120 if ((lock_value & kSpinLockHeld) == 0) {
121 return;
122 }
123 // The lock was not obtained initially, so this thread needs to wait for
124 // it. Record the current timestamp in the local variable wait_start_time
125 // so the total wait time can be stored in the lockword once this thread
126 // obtains the lock.
127 int64_t wait_start_time = CycleClock::Now();
128 uint32_t wait_cycles = 0;
129 int lock_wait_call_count = 0;
130 while ((lock_value & kSpinLockHeld) != 0) {
131 // If the lock is currently held, but not marked as having a sleeper, mark
132 // it as having a sleeper.
133 if ((lock_value & kWaitTimeMask) == 0) {
134 // Here, just "mark" that the thread is going to sleep. Don't store the
135 // lock wait time in the lock as that will cause the current lock
136 // owner to think it experienced contention.
137 if (lockword_.compare_exchange_strong(
138 lock_value, lock_value | kSpinLockSleeper,
139 std::memory_order_relaxed, std::memory_order_relaxed)) {
140 // Successfully transitioned to kSpinLockSleeper. Pass
141 // kSpinLockSleeper to the SpinLockWait routine to properly indicate
142 // the last lock_value observed.
143 lock_value |= kSpinLockSleeper;
144 } else if ((lock_value & kSpinLockHeld) == 0) {
145 // Lock is free again, so try and acquire it before sleeping. The
146 // new lock state will be the number of cycles this thread waited if
147 // this thread obtains the lock.
148 lock_value = TryLockInternal(lock_value, wait_cycles);
149 continue; // Skip the delay at the end of the loop.
150 }
151 }
152
153 base_internal::SchedulingMode scheduling_mode;
154 if ((lock_value & kSpinLockCooperative) != 0) {
155 scheduling_mode = base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL;
156 } else {
157 scheduling_mode = base_internal::SCHEDULE_KERNEL_ONLY;
158 }
159 // SpinLockDelay() calls into fiber scheduler, we need to see
160 // synchronization there to avoid false positives.
161 ABSL_TSAN_MUTEX_PRE_DIVERT(this, 0);
162 // Wait for an OS specific delay.
163 base_internal::SpinLockDelay(&lockword_, lock_value, ++lock_wait_call_count,
164 scheduling_mode);
165 ABSL_TSAN_MUTEX_POST_DIVERT(this, 0);
166 // Spin again after returning from the wait routine to give this thread
167 // some chance of obtaining the lock.
168 lock_value = SpinLoop();
169 wait_cycles = EncodeWaitCycles(wait_start_time, CycleClock::Now());
170 lock_value = TryLockInternal(lock_value, wait_cycles);
171 }
172}
173
174void SpinLock::SlowUnlock(uint32_t lock_value) {
175 base_internal::SpinLockWake(&lockword_,
176 false); // wake waiter if necessary
177
178 // If our acquisition was contended, collect contentionz profile info. We
179 // reserve a unitary wait time to represent that a waiter exists without our
180 // own acquisition having been contended.
181 if ((lock_value & kWaitTimeMask) != kSpinLockSleeper) {
182 const uint64_t wait_cycles = DecodeWaitCycles(lock_value);
183 ABSL_TSAN_MUTEX_PRE_DIVERT(this, 0);
184 submit_profile_data(this, wait_cycles);
185 ABSL_TSAN_MUTEX_POST_DIVERT(this, 0);
186 }
187}
188
189// We use the upper 29 bits of the lock word to store the time spent waiting to
190// acquire this lock. This is reported by contentionz profiling. Since the
191// lower bits of the cycle counter wrap very quickly on high-frequency
192// processors we divide to reduce the granularity to 2^PROFILE_TIMESTAMP_SHIFT
193// sized units. On a 4Ghz machine this will lose track of wait times greater
194// than (2^29/4 Ghz)*128 =~ 17.2 seconds. Such waits should be extremely rare.
195enum { PROFILE_TIMESTAMP_SHIFT = 7 };
196enum { LOCKWORD_RESERVED_SHIFT = 3 }; // We currently reserve the lower 3 bits.
197
198uint32_t SpinLock::EncodeWaitCycles(int64_t wait_start_time,
199 int64_t wait_end_time) {
200 static const int64_t kMaxWaitTime =
201 std::numeric_limits<uint32_t>::max() >> LOCKWORD_RESERVED_SHIFT;
202 int64_t scaled_wait_time =
203 (wait_end_time - wait_start_time) >> PROFILE_TIMESTAMP_SHIFT;
204
205 // Return a representation of the time spent waiting that can be stored in
206 // the lock word's upper bits.
207 uint32_t clamped = static_cast<uint32_t>(
208 std::min(scaled_wait_time, kMaxWaitTime) << LOCKWORD_RESERVED_SHIFT);
209
210 if (clamped == 0) {
211 return kSpinLockSleeper; // Just wake waiters, but don't record contention.
212 }
213 // Bump up value if necessary to avoid returning kSpinLockSleeper.
214 const uint32_t kMinWaitTime =
215 kSpinLockSleeper + (1 << LOCKWORD_RESERVED_SHIFT);
216 if (clamped == kSpinLockSleeper) {
217 return kMinWaitTime;
218 }
219 return clamped;
220}
221
222uint64_t SpinLock::DecodeWaitCycles(uint32_t lock_value) {
223 // Cast to uint32_t first to ensure bits [63:32] are cleared.
224 const uint64_t scaled_wait_time =
225 static_cast<uint32_t>(lock_value & kWaitTimeMask);
226 return scaled_wait_time
227 << (PROFILE_TIMESTAMP_SHIFT - LOCKWORD_RESERVED_SHIFT);
228}
229
230} // namespace base_internal
231} // namespace absl
232