1/*
2 * QemuLockCnt implementation
3 *
4 * Copyright Red Hat, Inc. 2017
5 *
6 * Author:
7 * Paolo Bonzini <pbonzini@redhat.com>
8 */
9#include "qemu/osdep.h"
10#include "qemu/thread.h"
11#include "qemu/atomic.h"
12#include "trace.h"
13
14#ifdef CONFIG_LINUX
15#include "qemu/futex.h"
16
17/* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter.
18 * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
19 * this is not the most relaxing citation I could make...). It is similar
20 * to mutex2 in the paper.
21 */
22
23#define QEMU_LOCKCNT_STATE_MASK 3
24#define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */
25#define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */
26#define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */
27
28#define QEMU_LOCKCNT_COUNT_STEP 4
29#define QEMU_LOCKCNT_COUNT_SHIFT 2
30
31void qemu_lockcnt_init(QemuLockCnt *lockcnt)
32{
33 lockcnt->count = 0;
34}
35
36void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
37{
38}
39
40/* *val is the current value of lockcnt->count.
41 *
42 * If the lock is free, try a cmpxchg from *val to new_if_free; return
43 * true and set *val to the old value found by the cmpxchg in
44 * lockcnt->count.
45 *
46 * If the lock is taken, wait for it to be released and return false
47 * *without trying again to take the lock*. Again, set *val to the
48 * new value of lockcnt->count.
49 *
50 * If *waited is true on return, new_if_free's bottom two bits must not
51 * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller
52 * does not know if there are other waiters. Furthermore, after *waited
53 * is set the caller has effectively acquired the lock. If it returns
54 * with the lock not taken, it must wake another futex waiter.
55 */
56static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
57 int new_if_free, bool *waited)
58{
59 /* Fast path for when the lock is free. */
60 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) {
61 int expected = *val;
62
63 trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free);
64 *val = atomic_cmpxchg(&lockcnt->count, expected, new_if_free);
65 if (*val == expected) {
66 trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free);
67 *val = new_if_free;
68 return true;
69 }
70 }
71
72 /* The slow path moves from locked to waiting if necessary, then
73 * does a futex wait. Both steps can be repeated ad nauseam,
74 * only getting out of the loop if we can have another shot at the
75 * fast path. Once we can, get out to compute the new destination
76 * value for the fast path.
77 */
78 while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) {
79 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) {
80 int expected = *val;
81 int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING;
82
83 trace_lockcnt_futex_wait_prepare(lockcnt, expected, new);
84 *val = atomic_cmpxchg(&lockcnt->count, expected, new);
85 if (*val == expected) {
86 *val = new;
87 }
88 continue;
89 }
90
91 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) {
92 *waited = true;
93 trace_lockcnt_futex_wait(lockcnt, *val);
94 qemu_futex_wait(&lockcnt->count, *val);
95 *val = atomic_read(&lockcnt->count);
96 trace_lockcnt_futex_wait_resume(lockcnt, *val);
97 continue;
98 }
99
100 abort();
101 }
102 return false;
103}
104
105static void lockcnt_wake(QemuLockCnt *lockcnt)
106{
107 trace_lockcnt_futex_wake(lockcnt);
108 qemu_futex_wake(&lockcnt->count, 1);
109}
110
111void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
112{
113 int val = atomic_read(&lockcnt->count);
114 bool waited = false;
115
116 for (;;) {
117 if (val >= QEMU_LOCKCNT_COUNT_STEP) {
118 int expected = val;
119 val = atomic_cmpxchg(&lockcnt->count, val, val + QEMU_LOCKCNT_COUNT_STEP);
120 if (val == expected) {
121 break;
122 }
123 } else {
124 /* The fast path is (0, unlocked)->(1, unlocked). */
125 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP,
126 &waited)) {
127 break;
128 }
129 }
130 }
131
132 /* If we were woken by another thread, we should also wake one because
133 * we are effectively releasing the lock that was given to us. This is
134 * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING
135 * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and
136 * wake someone.
137 */
138 if (waited) {
139 lockcnt_wake(lockcnt);
140 }
141}
142
143void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
144{
145 atomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP);
146}
147
148/* Decrement a counter, and return locked if it is decremented to zero.
149 * If the function returns true, it is impossible for the counter to
150 * become nonzero until the next qemu_lockcnt_unlock.
151 */
152bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
153{
154 int val = atomic_read(&lockcnt->count);
155 int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
156 bool waited = false;
157
158 for (;;) {
159 if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) {
160 int expected = val;
161 val = atomic_cmpxchg(&lockcnt->count, val, val - QEMU_LOCKCNT_COUNT_STEP);
162 if (val == expected) {
163 break;
164 }
165 } else {
166 /* If count is going 1->0, take the lock. The fast path is
167 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
168 */
169 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
170 return true;
171 }
172
173 if (waited) {
174 /* At this point we do not know if there are more waiters. Assume
175 * there are.
176 */
177 locked_state = QEMU_LOCKCNT_STATE_WAITING;
178 }
179 }
180 }
181
182 /* If we were woken by another thread, but we're returning in unlocked
183 * state, we should also wake a thread because we are effectively
184 * releasing the lock that was given to us. This is the case where
185 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
186 * bits, and qemu_lockcnt_unlock would find it and wake someone.
187 */
188 if (waited) {
189 lockcnt_wake(lockcnt);
190 }
191 return false;
192}
193
194/* If the counter is one, decrement it and return locked. Otherwise do
195 * nothing.
196 *
197 * If the function returns true, it is impossible for the counter to
198 * become nonzero until the next qemu_lockcnt_unlock.
199 */
200bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
201{
202 int val = atomic_read(&lockcnt->count);
203 int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
204 bool waited = false;
205
206 while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) {
207 /* If count is going 1->0, take the lock. The fast path is
208 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
209 */
210 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
211 return true;
212 }
213
214 if (waited) {
215 /* At this point we do not know if there are more waiters. Assume
216 * there are.
217 */
218 locked_state = QEMU_LOCKCNT_STATE_WAITING;
219 }
220 }
221
222 /* If we were woken by another thread, but we're returning in unlocked
223 * state, we should also wake a thread because we are effectively
224 * releasing the lock that was given to us. This is the case where
225 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
226 * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone.
227 */
228 if (waited) {
229 lockcnt_wake(lockcnt);
230 }
231 return false;
232}
233
234void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
235{
236 int val = atomic_read(&lockcnt->count);
237 int step = QEMU_LOCKCNT_STATE_LOCKED;
238 bool waited = false;
239
240 /* The third argument is only used if the low bits of val are 0
241 * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired
242 * state.
243 */
244 while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) {
245 if (waited) {
246 /* At this point we do not know if there are more waiters. Assume
247 * there are.
248 */
249 step = QEMU_LOCKCNT_STATE_WAITING;
250 }
251 }
252}
253
254void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
255{
256 int expected, new, val;
257
258 val = atomic_read(&lockcnt->count);
259 do {
260 expected = val;
261 new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK;
262 trace_lockcnt_unlock_attempt(lockcnt, val, new);
263 val = atomic_cmpxchg(&lockcnt->count, val, new);
264 } while (val != expected);
265
266 trace_lockcnt_unlock_success(lockcnt, val, new);
267 if (val & QEMU_LOCKCNT_STATE_WAITING) {
268 lockcnt_wake(lockcnt);
269 }
270}
271
272void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
273{
274 int expected, new, val;
275
276 val = atomic_read(&lockcnt->count);
277 do {
278 expected = val;
279 new = val & ~QEMU_LOCKCNT_STATE_MASK;
280 trace_lockcnt_unlock_attempt(lockcnt, val, new);
281 val = atomic_cmpxchg(&lockcnt->count, val, new);
282 } while (val != expected);
283
284 trace_lockcnt_unlock_success(lockcnt, val, new);
285 if (val & QEMU_LOCKCNT_STATE_WAITING) {
286 lockcnt_wake(lockcnt);
287 }
288}
289
290unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
291{
292 return atomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT;
293}
294#else
295void qemu_lockcnt_init(QemuLockCnt *lockcnt)
296{
297 qemu_mutex_init(&lockcnt->mutex);
298 lockcnt->count = 0;
299}
300
301void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
302{
303 qemu_mutex_destroy(&lockcnt->mutex);
304}
305
306void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
307{
308 int old;
309 for (;;) {
310 old = atomic_read(&lockcnt->count);
311 if (old == 0) {
312 qemu_lockcnt_lock(lockcnt);
313 qemu_lockcnt_inc_and_unlock(lockcnt);
314 return;
315 } else {
316 if (atomic_cmpxchg(&lockcnt->count, old, old + 1) == old) {
317 return;
318 }
319 }
320 }
321}
322
323void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
324{
325 atomic_dec(&lockcnt->count);
326}
327
328/* Decrement a counter, and return locked if it is decremented to zero.
329 * It is impossible for the counter to become nonzero while the mutex
330 * is taken.
331 */
332bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
333{
334 int val = atomic_read(&lockcnt->count);
335 while (val > 1) {
336 int old = atomic_cmpxchg(&lockcnt->count, val, val - 1);
337 if (old != val) {
338 val = old;
339 continue;
340 }
341
342 return false;
343 }
344
345 qemu_lockcnt_lock(lockcnt);
346 if (atomic_fetch_dec(&lockcnt->count) == 1) {
347 return true;
348 }
349
350 qemu_lockcnt_unlock(lockcnt);
351 return false;
352}
353
354/* Decrement a counter and return locked if it is decremented to zero.
355 * Otherwise do nothing.
356 *
357 * It is impossible for the counter to become nonzero while the mutex
358 * is taken.
359 */
360bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
361{
362 /* No need for acquire semantics if we return false. */
363 int val = atomic_read(&lockcnt->count);
364 if (val > 1) {
365 return false;
366 }
367
368 qemu_lockcnt_lock(lockcnt);
369 if (atomic_fetch_dec(&lockcnt->count) == 1) {
370 return true;
371 }
372
373 qemu_lockcnt_inc_and_unlock(lockcnt);
374 return false;
375}
376
377void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
378{
379 qemu_mutex_lock(&lockcnt->mutex);
380}
381
382void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
383{
384 atomic_inc(&lockcnt->count);
385 qemu_mutex_unlock(&lockcnt->mutex);
386}
387
388void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
389{
390 qemu_mutex_unlock(&lockcnt->mutex);
391}
392
393unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
394{
395 return atomic_read(&lockcnt->count);
396}
397#endif
398