1 | /* |
2 | * Simple interface for atomic operations. |
3 | * |
4 | * Copyright (C) 2013 Red Hat, Inc. |
5 | * |
6 | * Author: Paolo Bonzini <pbonzini@redhat.com> |
7 | * |
8 | * This work is licensed under the terms of the GNU GPL, version 2 or later. |
9 | * See the COPYING file in the top-level directory. |
10 | * |
11 | * See docs/devel/atomics.txt for discussion about the guarantees each |
12 | * atomic primitive is meant to provide. |
13 | */ |
14 | |
15 | #ifndef QEMU_ATOMIC_H |
16 | #define QEMU_ATOMIC_H |
17 | |
18 | /* Compiler barrier */ |
19 | #define barrier() ({ asm volatile("" ::: "memory"); (void)0; }) |
20 | |
21 | /* The variable that receives the old value of an atomically-accessed |
22 | * variable must be non-qualified, because atomic builtins return values |
23 | * through a pointer-type argument as in __atomic_load(&var, &old, MODEL). |
24 | * |
25 | * This macro has to handle types smaller than int manually, because of |
26 | * implicit promotion. int and larger types, as well as pointers, can be |
27 | * converted to a non-qualified type just by applying a binary operator. |
28 | */ |
29 | #define typeof_strip_qual(expr) \ |
30 | typeof( \ |
31 | __builtin_choose_expr( \ |
32 | __builtin_types_compatible_p(typeof(expr), bool) || \ |
33 | __builtin_types_compatible_p(typeof(expr), const bool) || \ |
34 | __builtin_types_compatible_p(typeof(expr), volatile bool) || \ |
35 | __builtin_types_compatible_p(typeof(expr), const volatile bool), \ |
36 | (bool)1, \ |
37 | __builtin_choose_expr( \ |
38 | __builtin_types_compatible_p(typeof(expr), signed char) || \ |
39 | __builtin_types_compatible_p(typeof(expr), const signed char) || \ |
40 | __builtin_types_compatible_p(typeof(expr), volatile signed char) || \ |
41 | __builtin_types_compatible_p(typeof(expr), const volatile signed char), \ |
42 | (signed char)1, \ |
43 | __builtin_choose_expr( \ |
44 | __builtin_types_compatible_p(typeof(expr), unsigned char) || \ |
45 | __builtin_types_compatible_p(typeof(expr), const unsigned char) || \ |
46 | __builtin_types_compatible_p(typeof(expr), volatile unsigned char) || \ |
47 | __builtin_types_compatible_p(typeof(expr), const volatile unsigned char), \ |
48 | (unsigned char)1, \ |
49 | __builtin_choose_expr( \ |
50 | __builtin_types_compatible_p(typeof(expr), signed short) || \ |
51 | __builtin_types_compatible_p(typeof(expr), const signed short) || \ |
52 | __builtin_types_compatible_p(typeof(expr), volatile signed short) || \ |
53 | __builtin_types_compatible_p(typeof(expr), const volatile signed short), \ |
54 | (signed short)1, \ |
55 | __builtin_choose_expr( \ |
56 | __builtin_types_compatible_p(typeof(expr), unsigned short) || \ |
57 | __builtin_types_compatible_p(typeof(expr), const unsigned short) || \ |
58 | __builtin_types_compatible_p(typeof(expr), volatile unsigned short) || \ |
59 | __builtin_types_compatible_p(typeof(expr), const volatile unsigned short), \ |
60 | (unsigned short)1, \ |
61 | (expr)+0)))))) |
62 | |
63 | #ifdef __ATOMIC_RELAXED |
64 | /* For C11 atomic ops */ |
65 | |
66 | /* Manual memory barriers |
67 | * |
68 | *__atomic_thread_fence does not include a compiler barrier; instead, |
69 | * the barrier is part of __atomic_load/__atomic_store's "volatile-like" |
70 | * semantics. If smp_wmb() is a no-op, absence of the barrier means that |
71 | * the compiler is free to reorder stores on each side of the barrier. |
72 | * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). |
73 | */ |
74 | |
75 | #define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); }) |
76 | #define smp_mb_release() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); }) |
77 | #define smp_mb_acquire() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); }) |
78 | |
79 | /* Most compilers currently treat consume and acquire the same, but really |
80 | * no processors except Alpha need a barrier here. Leave it in if |
81 | * using Thread Sanitizer to avoid warnings, otherwise optimize it away. |
82 | */ |
83 | #if defined(__SANITIZE_THREAD__) |
84 | #define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); }) |
85 | #elif defined(__alpha__) |
86 | #define smp_read_barrier_depends() asm volatile("mb":::"memory") |
87 | #else |
88 | #define smp_read_barrier_depends() barrier() |
89 | #endif |
90 | |
91 | /* |
92 | * A signal barrier forces all pending local memory ops to be observed before |
93 | * a SIGSEGV is delivered to the *same* thread. In practice this is exactly |
94 | * the same as barrier(), but since we have the correct builtin, use it. |
95 | */ |
96 | #define signal_barrier() __atomic_signal_fence(__ATOMIC_SEQ_CST) |
97 | |
98 | /* Sanity check that the size of an atomic operation isn't "overly large". |
99 | * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not |
100 | * want to use them because we ought not need them, and this lets us do a |
101 | * bit of sanity checking that other 32-bit hosts might build. |
102 | * |
103 | * That said, we have a problem on 64-bit ILP32 hosts in that in order to |
104 | * sync with TCG_OVERSIZED_GUEST, this must match TCG_TARGET_REG_BITS. |
105 | * We'd prefer not want to pull in everything else TCG related, so handle |
106 | * those few cases by hand. |
107 | * |
108 | * Note that x32 is fully detected with __x86_64__ + _ILP32, and that for |
109 | * Sparc we always force the use of sparcv9 in configure. MIPS n32 (ILP32) & |
110 | * n64 (LP64) ABIs are both detected using __mips64. |
111 | */ |
112 | #if defined(__x86_64__) || defined(__sparc__) || defined(__mips64) |
113 | # define ATOMIC_REG_SIZE 8 |
114 | #else |
115 | # define ATOMIC_REG_SIZE sizeof(void *) |
116 | #endif |
117 | |
118 | /* Weak atomic operations prevent the compiler moving other |
119 | * loads/stores past the atomic operation load/store. However there is |
120 | * no explicit memory barrier for the processor. |
121 | * |
122 | * The C11 memory model says that variables that are accessed from |
123 | * different threads should at least be done with __ATOMIC_RELAXED |
124 | * primitives or the result is undefined. Generally this has little to |
125 | * no effect on the generated code but not using the atomic primitives |
126 | * will get flagged by sanitizers as a violation. |
127 | */ |
128 | #define atomic_read__nocheck(ptr) \ |
129 | __atomic_load_n(ptr, __ATOMIC_RELAXED) |
130 | |
131 | #define atomic_read(ptr) \ |
132 | ({ \ |
133 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ |
134 | atomic_read__nocheck(ptr); \ |
135 | }) |
136 | |
137 | #define atomic_set__nocheck(ptr, i) \ |
138 | __atomic_store_n(ptr, i, __ATOMIC_RELAXED) |
139 | |
140 | #define atomic_set(ptr, i) do { \ |
141 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ |
142 | atomic_set__nocheck(ptr, i); \ |
143 | } while(0) |
144 | |
145 | /* See above: most compilers currently treat consume and acquire the |
146 | * same, but this slows down atomic_rcu_read unnecessarily. |
147 | */ |
148 | #ifdef __SANITIZE_THREAD__ |
149 | #define atomic_rcu_read__nocheck(ptr, valptr) \ |
150 | __atomic_load(ptr, valptr, __ATOMIC_CONSUME); |
151 | #else |
152 | #define atomic_rcu_read__nocheck(ptr, valptr) \ |
153 | __atomic_load(ptr, valptr, __ATOMIC_RELAXED); \ |
154 | smp_read_barrier_depends(); |
155 | #endif |
156 | |
157 | #define atomic_rcu_read(ptr) \ |
158 | ({ \ |
159 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ |
160 | typeof_strip_qual(*ptr) _val; \ |
161 | atomic_rcu_read__nocheck(ptr, &_val); \ |
162 | _val; \ |
163 | }) |
164 | |
165 | #define atomic_rcu_set(ptr, i) do { \ |
166 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ |
167 | __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ |
168 | } while(0) |
169 | |
170 | #define atomic_load_acquire(ptr) \ |
171 | ({ \ |
172 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ |
173 | typeof_strip_qual(*ptr) _val; \ |
174 | __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \ |
175 | _val; \ |
176 | }) |
177 | |
178 | #define atomic_store_release(ptr, i) do { \ |
179 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ |
180 | __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ |
181 | } while(0) |
182 | |
183 | |
184 | /* All the remaining operations are fully sequentially consistent */ |
185 | |
186 | #define atomic_xchg__nocheck(ptr, i) ({ \ |
187 | __atomic_exchange_n(ptr, (i), __ATOMIC_SEQ_CST); \ |
188 | }) |
189 | |
190 | #define atomic_xchg(ptr, i) ({ \ |
191 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ |
192 | atomic_xchg__nocheck(ptr, i); \ |
193 | }) |
194 | |
195 | /* Returns the eventual value, failed or not */ |
196 | #define atomic_cmpxchg__nocheck(ptr, old, new) ({ \ |
197 | typeof_strip_qual(*ptr) _old = (old); \ |
198 | (void)__atomic_compare_exchange_n(ptr, &_old, new, false, \ |
199 | __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ |
200 | _old; \ |
201 | }) |
202 | |
203 | #define atomic_cmpxchg(ptr, old, new) ({ \ |
204 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ |
205 | atomic_cmpxchg__nocheck(ptr, old, new); \ |
206 | }) |
207 | |
208 | /* Provide shorter names for GCC atomic builtins, return old value */ |
209 | #define atomic_fetch_inc(ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST) |
210 | #define atomic_fetch_dec(ptr) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST) |
211 | #define atomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST) |
212 | #define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST) |
213 | #define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST) |
214 | #define atomic_fetch_or(ptr, n) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST) |
215 | #define atomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST) |
216 | |
217 | #define atomic_inc_fetch(ptr) __atomic_add_fetch(ptr, 1, __ATOMIC_SEQ_CST) |
218 | #define atomic_dec_fetch(ptr) __atomic_sub_fetch(ptr, 1, __ATOMIC_SEQ_CST) |
219 | #define atomic_add_fetch(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_SEQ_CST) |
220 | #define atomic_sub_fetch(ptr, n) __atomic_sub_fetch(ptr, n, __ATOMIC_SEQ_CST) |
221 | #define atomic_and_fetch(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_SEQ_CST) |
222 | #define atomic_or_fetch(ptr, n) __atomic_or_fetch(ptr, n, __ATOMIC_SEQ_CST) |
223 | #define atomic_xor_fetch(ptr, n) __atomic_xor_fetch(ptr, n, __ATOMIC_SEQ_CST) |
224 | |
225 | /* And even shorter names that return void. */ |
226 | #define atomic_inc(ptr) ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)) |
227 | #define atomic_dec(ptr) ((void) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST)) |
228 | #define atomic_add(ptr, n) ((void) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST)) |
229 | #define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)) |
230 | #define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)) |
231 | #define atomic_or(ptr, n) ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)) |
232 | #define atomic_xor(ptr, n) ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)) |
233 | |
234 | #else /* __ATOMIC_RELAXED */ |
235 | |
236 | /* |
237 | * We use GCC builtin if it's available, as that can use mfence on |
238 | * 32-bit as well, e.g. if built with -march=pentium-m. However, on |
239 | * i386 the spec is buggy, and the implementation followed it until |
240 | * 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793). |
241 | */ |
242 | #if defined(__i386__) || defined(__x86_64__) |
243 | #if !QEMU_GNUC_PREREQ(4, 4) |
244 | #if defined __x86_64__ |
245 | #define smp_mb() ({ asm volatile("mfence" ::: "memory"); (void)0; }) |
246 | #else |
247 | #define smp_mb() ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; }) |
248 | #endif |
249 | #endif |
250 | #endif |
251 | |
252 | |
253 | #ifdef __alpha__ |
254 | #define smp_read_barrier_depends() asm volatile("mb":::"memory") |
255 | #endif |
256 | |
257 | #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) |
258 | |
259 | /* |
260 | * Because of the strongly ordered storage model, wmb() and rmb() are nops |
261 | * here (a compiler barrier only). QEMU doesn't do accesses to write-combining |
262 | * qemu memory or non-temporal load/stores from C code. |
263 | */ |
264 | #define smp_mb_release() barrier() |
265 | #define smp_mb_acquire() barrier() |
266 | |
267 | /* |
268 | * __sync_lock_test_and_set() is documented to be an acquire barrier only, |
269 | * but it is a full barrier at the hardware level. Add a compiler barrier |
270 | * to make it a full barrier also at the compiler level. |
271 | */ |
272 | #define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i)) |
273 | |
274 | #elif defined(_ARCH_PPC) |
275 | |
276 | /* |
277 | * We use an eieio() for wmb() on powerpc. This assumes we don't |
278 | * need to order cacheable and non-cacheable stores with respect to |
279 | * each other. |
280 | * |
281 | * smp_mb has the same problem as on x86 for not-very-new GCC |
282 | * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011). |
283 | */ |
284 | #define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; }) |
285 | #if defined(__powerpc64__) |
286 | #define smp_mb_release() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) |
287 | #define smp_mb_acquire() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) |
288 | #else |
289 | #define smp_mb_release() ({ asm volatile("sync" ::: "memory"); (void)0; }) |
290 | #define smp_mb_acquire() ({ asm volatile("sync" ::: "memory"); (void)0; }) |
291 | #endif |
292 | #define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; }) |
293 | |
294 | #endif /* _ARCH_PPC */ |
295 | |
296 | /* |
297 | * For (host) platforms we don't have explicit barrier definitions |
298 | * for, we use the gcc __sync_synchronize() primitive to generate a |
299 | * full barrier. This should be safe on all platforms, though it may |
300 | * be overkill for smp_mb_acquire() and smp_mb_release(). |
301 | */ |
302 | #ifndef smp_mb |
303 | #define smp_mb() __sync_synchronize() |
304 | #endif |
305 | |
306 | #ifndef smp_mb_acquire |
307 | #define smp_mb_acquire() __sync_synchronize() |
308 | #endif |
309 | |
310 | #ifndef smp_mb_release |
311 | #define smp_mb_release() __sync_synchronize() |
312 | #endif |
313 | |
314 | #ifndef smp_read_barrier_depends |
315 | #define smp_read_barrier_depends() barrier() |
316 | #endif |
317 | |
318 | #ifndef signal_barrier |
319 | #define signal_barrier() barrier() |
320 | #endif |
321 | |
322 | /* These will only be atomic if the processor does the fetch or store |
323 | * in a single issue memory operation |
324 | */ |
325 | #define atomic_read__nocheck(p) (*(__typeof__(*(p)) volatile*) (p)) |
326 | #define atomic_set__nocheck(p, i) ((*(__typeof__(*(p)) volatile*) (p)) = (i)) |
327 | |
328 | #define atomic_read(ptr) atomic_read__nocheck(ptr) |
329 | #define atomic_set(ptr, i) atomic_set__nocheck(ptr,i) |
330 | |
331 | /** |
332 | * atomic_rcu_read - reads a RCU-protected pointer to a local variable |
333 | * into a RCU read-side critical section. The pointer can later be safely |
334 | * dereferenced within the critical section. |
335 | * |
336 | * This ensures that the pointer copy is invariant thorough the whole critical |
337 | * section. |
338 | * |
339 | * Inserts memory barriers on architectures that require them (currently only |
340 | * Alpha) and documents which pointers are protected by RCU. |
341 | * |
342 | * atomic_rcu_read also includes a compiler barrier to ensure that |
343 | * value-speculative optimizations (e.g. VSS: Value Speculation |
344 | * Scheduling) does not perform the data read before the pointer read |
345 | * by speculating the value of the pointer. |
346 | * |
347 | * Should match atomic_rcu_set(), atomic_xchg(), atomic_cmpxchg(). |
348 | */ |
349 | #define atomic_rcu_read(ptr) ({ \ |
350 | typeof(*ptr) _val = atomic_read(ptr); \ |
351 | smp_read_barrier_depends(); \ |
352 | _val; \ |
353 | }) |
354 | |
355 | /** |
356 | * atomic_rcu_set - assigns (publicizes) a pointer to a new data structure |
357 | * meant to be read by RCU read-side critical sections. |
358 | * |
359 | * Documents which pointers will be dereferenced by RCU read-side critical |
360 | * sections and adds the required memory barriers on architectures requiring |
361 | * them. It also makes sure the compiler does not reorder code initializing the |
362 | * data structure before its publication. |
363 | * |
364 | * Should match atomic_rcu_read(). |
365 | */ |
366 | #define atomic_rcu_set(ptr, i) do { \ |
367 | smp_wmb(); \ |
368 | atomic_set(ptr, i); \ |
369 | } while (0) |
370 | |
371 | #define atomic_load_acquire(ptr) ({ \ |
372 | typeof(*ptr) _val = atomic_read(ptr); \ |
373 | smp_mb_acquire(); \ |
374 | _val; \ |
375 | }) |
376 | |
377 | #define atomic_store_release(ptr, i) do { \ |
378 | smp_mb_release(); \ |
379 | atomic_set(ptr, i); \ |
380 | } while (0) |
381 | |
382 | #ifndef atomic_xchg |
383 | #if defined(__clang__) |
384 | #define atomic_xchg(ptr, i) __sync_swap(ptr, i) |
385 | #else |
386 | /* __sync_lock_test_and_set() is documented to be an acquire barrier only. */ |
387 | #define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i)) |
388 | #endif |
389 | #endif |
390 | #define atomic_xchg__nocheck atomic_xchg |
391 | |
392 | /* Provide shorter names for GCC atomic builtins. */ |
393 | #define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1) |
394 | #define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1) |
395 | #define atomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n) |
396 | #define atomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n) |
397 | #define atomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n) |
398 | #define atomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n) |
399 | #define atomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n) |
400 | |
401 | #define atomic_inc_fetch(ptr) __sync_add_and_fetch(ptr, 1) |
402 | #define atomic_dec_fetch(ptr) __sync_add_and_fetch(ptr, -1) |
403 | #define atomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n) |
404 | #define atomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n) |
405 | #define atomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n) |
406 | #define atomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n) |
407 | #define atomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n) |
408 | |
409 | #define atomic_cmpxchg(ptr, old, new) __sync_val_compare_and_swap(ptr, old, new) |
410 | #define atomic_cmpxchg__nocheck(ptr, old, new) atomic_cmpxchg(ptr, old, new) |
411 | |
412 | /* And even shorter names that return void. */ |
413 | #define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) |
414 | #define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) |
415 | #define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n)) |
416 | #define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) |
417 | #define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n)) |
418 | #define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n)) |
419 | #define atomic_xor(ptr, n) ((void) __sync_fetch_and_xor(ptr, n)) |
420 | |
421 | #endif /* __ATOMIC_RELAXED */ |
422 | |
423 | #ifndef smp_wmb |
424 | #define smp_wmb() smp_mb_release() |
425 | #endif |
426 | #ifndef smp_rmb |
427 | #define smp_rmb() smp_mb_acquire() |
428 | #endif |
429 | |
430 | /* This is more efficient than a store plus a fence. */ |
431 | #if !defined(__SANITIZE_THREAD__) |
432 | #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) |
433 | #define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i)) |
434 | #endif |
435 | #endif |
436 | |
437 | /* atomic_mb_read/set semantics map Java volatile variables. They are |
438 | * less expensive on some platforms (notably POWER) than fully |
439 | * sequentially consistent operations. |
440 | * |
441 | * As long as they are used as paired operations they are safe to |
442 | * use. See docs/devel/atomics.txt for more discussion. |
443 | */ |
444 | |
445 | #ifndef atomic_mb_read |
446 | #define atomic_mb_read(ptr) \ |
447 | atomic_load_acquire(ptr) |
448 | #endif |
449 | |
450 | #ifndef atomic_mb_set |
451 | #define atomic_mb_set(ptr, i) do { \ |
452 | atomic_store_release(ptr, i); \ |
453 | smp_mb(); \ |
454 | } while(0) |
455 | #endif |
456 | |
457 | #define atomic_fetch_inc_nonzero(ptr) ({ \ |
458 | typeof_strip_qual(*ptr) _oldn = atomic_read(ptr); \ |
459 | while (_oldn && atomic_cmpxchg(ptr, _oldn, _oldn + 1) != _oldn) { \ |
460 | _oldn = atomic_read(ptr); \ |
461 | } \ |
462 | _oldn; \ |
463 | }) |
464 | |
465 | /* Abstractions to access atomically (i.e. "once") i64/u64 variables */ |
466 | #ifdef CONFIG_ATOMIC64 |
467 | static inline int64_t atomic_read_i64(const int64_t *ptr) |
468 | { |
469 | /* use __nocheck because sizeof(void *) might be < sizeof(u64) */ |
470 | return atomic_read__nocheck(ptr); |
471 | } |
472 | |
473 | static inline uint64_t atomic_read_u64(const uint64_t *ptr) |
474 | { |
475 | return atomic_read__nocheck(ptr); |
476 | } |
477 | |
478 | static inline void atomic_set_i64(int64_t *ptr, int64_t val) |
479 | { |
480 | atomic_set__nocheck(ptr, val); |
481 | } |
482 | |
483 | static inline void atomic_set_u64(uint64_t *ptr, uint64_t val) |
484 | { |
485 | atomic_set__nocheck(ptr, val); |
486 | } |
487 | |
488 | static inline void atomic64_init(void) |
489 | { |
490 | } |
491 | #else /* !CONFIG_ATOMIC64 */ |
492 | int64_t atomic_read_i64(const int64_t *ptr); |
493 | uint64_t atomic_read_u64(const uint64_t *ptr); |
494 | void atomic_set_i64(int64_t *ptr, int64_t val); |
495 | void atomic_set_u64(uint64_t *ptr, uint64_t val); |
496 | void atomic64_init(void); |
497 | #endif /* !CONFIG_ATOMIC64 */ |
498 | |
499 | #endif /* QEMU_ATOMIC_H */ |
500 | |