1 | /* |
2 | * QemuLockCnt implementation |
3 | * |
4 | * Copyright Red Hat, Inc. 2017 |
5 | * |
6 | * Author: |
7 | * Paolo Bonzini <pbonzini@redhat.com> |
8 | */ |
9 | #include "qemu/osdep.h" |
10 | #include "qemu/thread.h" |
11 | #include "qemu/atomic.h" |
12 | #include "trace.h" |
13 | |
14 | #ifdef CONFIG_LINUX |
15 | #include "qemu/futex.h" |
16 | |
17 | /* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter. |
18 | * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok, |
19 | * this is not the most relaxing citation I could make...). It is similar |
20 | * to mutex2 in the paper. |
21 | */ |
22 | |
23 | #define QEMU_LOCKCNT_STATE_MASK 3 |
24 | #define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */ |
25 | #define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */ |
26 | #define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */ |
27 | |
28 | #define QEMU_LOCKCNT_COUNT_STEP 4 |
29 | #define QEMU_LOCKCNT_COUNT_SHIFT 2 |
30 | |
31 | void qemu_lockcnt_init(QemuLockCnt *lockcnt) |
32 | { |
33 | lockcnt->count = 0; |
34 | } |
35 | |
36 | void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) |
37 | { |
38 | } |
39 | |
40 | /* *val is the current value of lockcnt->count. |
41 | * |
42 | * If the lock is free, try a cmpxchg from *val to new_if_free; return |
43 | * true and set *val to the old value found by the cmpxchg in |
44 | * lockcnt->count. |
45 | * |
46 | * If the lock is taken, wait for it to be released and return false |
47 | * *without trying again to take the lock*. Again, set *val to the |
48 | * new value of lockcnt->count. |
49 | * |
50 | * If *waited is true on return, new_if_free's bottom two bits must not |
51 | * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller |
52 | * does not know if there are other waiters. Furthermore, after *waited |
53 | * is set the caller has effectively acquired the lock. If it returns |
54 | * with the lock not taken, it must wake another futex waiter. |
55 | */ |
56 | static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val, |
57 | int new_if_free, bool *waited) |
58 | { |
59 | /* Fast path for when the lock is free. */ |
60 | if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) { |
61 | int expected = *val; |
62 | |
63 | trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free); |
64 | *val = atomic_cmpxchg(&lockcnt->count, expected, new_if_free); |
65 | if (*val == expected) { |
66 | trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free); |
67 | *val = new_if_free; |
68 | return true; |
69 | } |
70 | } |
71 | |
72 | /* The slow path moves from locked to waiting if necessary, then |
73 | * does a futex wait. Both steps can be repeated ad nauseam, |
74 | * only getting out of the loop if we can have another shot at the |
75 | * fast path. Once we can, get out to compute the new destination |
76 | * value for the fast path. |
77 | */ |
78 | while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) { |
79 | if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) { |
80 | int expected = *val; |
81 | int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING; |
82 | |
83 | trace_lockcnt_futex_wait_prepare(lockcnt, expected, new); |
84 | *val = atomic_cmpxchg(&lockcnt->count, expected, new); |
85 | if (*val == expected) { |
86 | *val = new; |
87 | } |
88 | continue; |
89 | } |
90 | |
91 | if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) { |
92 | *waited = true; |
93 | trace_lockcnt_futex_wait(lockcnt, *val); |
94 | qemu_futex_wait(&lockcnt->count, *val); |
95 | *val = atomic_read(&lockcnt->count); |
96 | trace_lockcnt_futex_wait_resume(lockcnt, *val); |
97 | continue; |
98 | } |
99 | |
100 | abort(); |
101 | } |
102 | return false; |
103 | } |
104 | |
105 | static void lockcnt_wake(QemuLockCnt *lockcnt) |
106 | { |
107 | trace_lockcnt_futex_wake(lockcnt); |
108 | qemu_futex_wake(&lockcnt->count, 1); |
109 | } |
110 | |
111 | void qemu_lockcnt_inc(QemuLockCnt *lockcnt) |
112 | { |
113 | int val = atomic_read(&lockcnt->count); |
114 | bool waited = false; |
115 | |
116 | for (;;) { |
117 | if (val >= QEMU_LOCKCNT_COUNT_STEP) { |
118 | int expected = val; |
119 | val = atomic_cmpxchg(&lockcnt->count, val, val + QEMU_LOCKCNT_COUNT_STEP); |
120 | if (val == expected) { |
121 | break; |
122 | } |
123 | } else { |
124 | /* The fast path is (0, unlocked)->(1, unlocked). */ |
125 | if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP, |
126 | &waited)) { |
127 | break; |
128 | } |
129 | } |
130 | } |
131 | |
132 | /* If we were woken by another thread, we should also wake one because |
133 | * we are effectively releasing the lock that was given to us. This is |
134 | * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING |
135 | * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and |
136 | * wake someone. |
137 | */ |
138 | if (waited) { |
139 | lockcnt_wake(lockcnt); |
140 | } |
141 | } |
142 | |
143 | void qemu_lockcnt_dec(QemuLockCnt *lockcnt) |
144 | { |
145 | atomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP); |
146 | } |
147 | |
148 | /* Decrement a counter, and return locked if it is decremented to zero. |
149 | * If the function returns true, it is impossible for the counter to |
150 | * become nonzero until the next qemu_lockcnt_unlock. |
151 | */ |
152 | bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) |
153 | { |
154 | int val = atomic_read(&lockcnt->count); |
155 | int locked_state = QEMU_LOCKCNT_STATE_LOCKED; |
156 | bool waited = false; |
157 | |
158 | for (;;) { |
159 | if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) { |
160 | int expected = val; |
161 | val = atomic_cmpxchg(&lockcnt->count, val, val - QEMU_LOCKCNT_COUNT_STEP); |
162 | if (val == expected) { |
163 | break; |
164 | } |
165 | } else { |
166 | /* If count is going 1->0, take the lock. The fast path is |
167 | * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). |
168 | */ |
169 | if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { |
170 | return true; |
171 | } |
172 | |
173 | if (waited) { |
174 | /* At this point we do not know if there are more waiters. Assume |
175 | * there are. |
176 | */ |
177 | locked_state = QEMU_LOCKCNT_STATE_WAITING; |
178 | } |
179 | } |
180 | } |
181 | |
182 | /* If we were woken by another thread, but we're returning in unlocked |
183 | * state, we should also wake a thread because we are effectively |
184 | * releasing the lock that was given to us. This is the case where |
185 | * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low |
186 | * bits, and qemu_lockcnt_unlock would find it and wake someone. |
187 | */ |
188 | if (waited) { |
189 | lockcnt_wake(lockcnt); |
190 | } |
191 | return false; |
192 | } |
193 | |
194 | /* If the counter is one, decrement it and return locked. Otherwise do |
195 | * nothing. |
196 | * |
197 | * If the function returns true, it is impossible for the counter to |
198 | * become nonzero until the next qemu_lockcnt_unlock. |
199 | */ |
200 | bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) |
201 | { |
202 | int val = atomic_read(&lockcnt->count); |
203 | int locked_state = QEMU_LOCKCNT_STATE_LOCKED; |
204 | bool waited = false; |
205 | |
206 | while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) { |
207 | /* If count is going 1->0, take the lock. The fast path is |
208 | * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). |
209 | */ |
210 | if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { |
211 | return true; |
212 | } |
213 | |
214 | if (waited) { |
215 | /* At this point we do not know if there are more waiters. Assume |
216 | * there are. |
217 | */ |
218 | locked_state = QEMU_LOCKCNT_STATE_WAITING; |
219 | } |
220 | } |
221 | |
222 | /* If we were woken by another thread, but we're returning in unlocked |
223 | * state, we should also wake a thread because we are effectively |
224 | * releasing the lock that was given to us. This is the case where |
225 | * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low |
226 | * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone. |
227 | */ |
228 | if (waited) { |
229 | lockcnt_wake(lockcnt); |
230 | } |
231 | return false; |
232 | } |
233 | |
234 | void qemu_lockcnt_lock(QemuLockCnt *lockcnt) |
235 | { |
236 | int val = atomic_read(&lockcnt->count); |
237 | int step = QEMU_LOCKCNT_STATE_LOCKED; |
238 | bool waited = false; |
239 | |
240 | /* The third argument is only used if the low bits of val are 0 |
241 | * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired |
242 | * state. |
243 | */ |
244 | while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) { |
245 | if (waited) { |
246 | /* At this point we do not know if there are more waiters. Assume |
247 | * there are. |
248 | */ |
249 | step = QEMU_LOCKCNT_STATE_WAITING; |
250 | } |
251 | } |
252 | } |
253 | |
254 | void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) |
255 | { |
256 | int expected, new, val; |
257 | |
258 | val = atomic_read(&lockcnt->count); |
259 | do { |
260 | expected = val; |
261 | new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK; |
262 | trace_lockcnt_unlock_attempt(lockcnt, val, new); |
263 | val = atomic_cmpxchg(&lockcnt->count, val, new); |
264 | } while (val != expected); |
265 | |
266 | trace_lockcnt_unlock_success(lockcnt, val, new); |
267 | if (val & QEMU_LOCKCNT_STATE_WAITING) { |
268 | lockcnt_wake(lockcnt); |
269 | } |
270 | } |
271 | |
272 | void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) |
273 | { |
274 | int expected, new, val; |
275 | |
276 | val = atomic_read(&lockcnt->count); |
277 | do { |
278 | expected = val; |
279 | new = val & ~QEMU_LOCKCNT_STATE_MASK; |
280 | trace_lockcnt_unlock_attempt(lockcnt, val, new); |
281 | val = atomic_cmpxchg(&lockcnt->count, val, new); |
282 | } while (val != expected); |
283 | |
284 | trace_lockcnt_unlock_success(lockcnt, val, new); |
285 | if (val & QEMU_LOCKCNT_STATE_WAITING) { |
286 | lockcnt_wake(lockcnt); |
287 | } |
288 | } |
289 | |
290 | unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) |
291 | { |
292 | return atomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT; |
293 | } |
294 | #else |
295 | void qemu_lockcnt_init(QemuLockCnt *lockcnt) |
296 | { |
297 | qemu_mutex_init(&lockcnt->mutex); |
298 | lockcnt->count = 0; |
299 | } |
300 | |
301 | void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) |
302 | { |
303 | qemu_mutex_destroy(&lockcnt->mutex); |
304 | } |
305 | |
306 | void qemu_lockcnt_inc(QemuLockCnt *lockcnt) |
307 | { |
308 | int old; |
309 | for (;;) { |
310 | old = atomic_read(&lockcnt->count); |
311 | if (old == 0) { |
312 | qemu_lockcnt_lock(lockcnt); |
313 | qemu_lockcnt_inc_and_unlock(lockcnt); |
314 | return; |
315 | } else { |
316 | if (atomic_cmpxchg(&lockcnt->count, old, old + 1) == old) { |
317 | return; |
318 | } |
319 | } |
320 | } |
321 | } |
322 | |
323 | void qemu_lockcnt_dec(QemuLockCnt *lockcnt) |
324 | { |
325 | atomic_dec(&lockcnt->count); |
326 | } |
327 | |
328 | /* Decrement a counter, and return locked if it is decremented to zero. |
329 | * It is impossible for the counter to become nonzero while the mutex |
330 | * is taken. |
331 | */ |
332 | bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) |
333 | { |
334 | int val = atomic_read(&lockcnt->count); |
335 | while (val > 1) { |
336 | int old = atomic_cmpxchg(&lockcnt->count, val, val - 1); |
337 | if (old != val) { |
338 | val = old; |
339 | continue; |
340 | } |
341 | |
342 | return false; |
343 | } |
344 | |
345 | qemu_lockcnt_lock(lockcnt); |
346 | if (atomic_fetch_dec(&lockcnt->count) == 1) { |
347 | return true; |
348 | } |
349 | |
350 | qemu_lockcnt_unlock(lockcnt); |
351 | return false; |
352 | } |
353 | |
354 | /* Decrement a counter and return locked if it is decremented to zero. |
355 | * Otherwise do nothing. |
356 | * |
357 | * It is impossible for the counter to become nonzero while the mutex |
358 | * is taken. |
359 | */ |
360 | bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) |
361 | { |
362 | /* No need for acquire semantics if we return false. */ |
363 | int val = atomic_read(&lockcnt->count); |
364 | if (val > 1) { |
365 | return false; |
366 | } |
367 | |
368 | qemu_lockcnt_lock(lockcnt); |
369 | if (atomic_fetch_dec(&lockcnt->count) == 1) { |
370 | return true; |
371 | } |
372 | |
373 | qemu_lockcnt_inc_and_unlock(lockcnt); |
374 | return false; |
375 | } |
376 | |
377 | void qemu_lockcnt_lock(QemuLockCnt *lockcnt) |
378 | { |
379 | qemu_mutex_lock(&lockcnt->mutex); |
380 | } |
381 | |
382 | void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) |
383 | { |
384 | atomic_inc(&lockcnt->count); |
385 | qemu_mutex_unlock(&lockcnt->mutex); |
386 | } |
387 | |
388 | void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) |
389 | { |
390 | qemu_mutex_unlock(&lockcnt->mutex); |
391 | } |
392 | |
393 | unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) |
394 | { |
395 | return atomic_read(&lockcnt->count); |
396 | } |
397 | #endif |
398 | |