atomic.h source code [qemu/include/qemu/atomic.h]

1	/*
2	* Simple interface for atomic operations.
3	*
4	* Copyright (C) 2013 Red Hat, Inc.
5	*
6	* Author: Paolo Bonzini <pbonzini@redhat.com>
7	*
8	* This work is licensed under the terms of the GNU GPL, version 2 or later.
9	* See the COPYING file in the top-level directory.
10	*
11	* See docs/devel/atomics.txt for discussion about the guarantees each
12	* atomic primitive is meant to provide.
13	*/
14
15	#ifndef QEMU_ATOMIC_H
16	#define QEMU_ATOMIC_H
17
18	/ Compiler barrier /
19	#define barrier() ({ asm volatile("" ::: "memory"); (void)0; })
20
21	/ The variable that receives the old value of an atomically-accessed*
22	* variable must be non-qualified, because atomic builtins return values
23	* through a pointer-type argument as in __atomic_load(&var, &old, MODEL).
24	*
25	* This macro has to handle types smaller than int manually, because of
26	* implicit promotion. int and larger types, as well as pointers, can be
27	* converted to a non-qualified type just by applying a binary operator.
28	*/
29	#define typeof_strip_qual(expr) \
30	typeof( \
31	__builtin_choose_expr( \
32	__builtin_types_compatible_p(typeof(expr), bool) \|\| \
33	__builtin_types_compatible_p(typeof(expr), const bool) \|\| \
34	__builtin_types_compatible_p(typeof(expr), volatile bool) \|\| \
35	__builtin_types_compatible_p(typeof(expr), const volatile bool), \
36	(bool)1, \
37	__builtin_choose_expr( \
38	__builtin_types_compatible_p(typeof(expr), signed char) \|\| \
39	__builtin_types_compatible_p(typeof(expr), const signed char) \|\| \
40	__builtin_types_compatible_p(typeof(expr), volatile signed char) \|\| \
41	__builtin_types_compatible_p(typeof(expr), const volatile signed char), \
42	(signed char)1, \
43	__builtin_choose_expr( \
44	__builtin_types_compatible_p(typeof(expr), unsigned char) \|\| \
45	__builtin_types_compatible_p(typeof(expr), const unsigned char) \|\| \
46	__builtin_types_compatible_p(typeof(expr), volatile unsigned char) \|\| \
47	__builtin_types_compatible_p(typeof(expr), const volatile unsigned char), \
48	(unsigned char)1, \
49	__builtin_choose_expr( \
50	__builtin_types_compatible_p(typeof(expr), signed short) \|\| \
51	__builtin_types_compatible_p(typeof(expr), const signed short) \|\| \
52	__builtin_types_compatible_p(typeof(expr), volatile signed short) \|\| \
53	__builtin_types_compatible_p(typeof(expr), const volatile signed short), \
54	(signed short)1, \
55	__builtin_choose_expr( \
56	__builtin_types_compatible_p(typeof(expr), unsigned short) \|\| \
57	__builtin_types_compatible_p(typeof(expr), const unsigned short) \|\| \
58	__builtin_types_compatible_p(typeof(expr), volatile unsigned short) \|\| \
59	__builtin_types_compatible_p(typeof(expr), const volatile unsigned short), \
60	(unsigned short)1, \
61	(expr)+0))))))
62
63	#ifdef __ATOMIC_RELAXED
64	/ For C11 atomic ops /
65
66	/ Manual memory barriers*
67	*
68	*__atomic_thread_fence does not include a compiler barrier; instead,
69	* the barrier is part of __atomic_load/__atomic_store's "volatile-like"
70	* semantics. If smp_wmb() is a no-op, absence of the barrier means that
71	* the compiler is free to reorder stores on each side of the barrier.
72	* Add one here, and similarly in smp_rmb() and smp_read_barrier_depends().
73	*/
74
75	#define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); })
76	#define smp_mb_release() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); })
77	#define smp_mb_acquire() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); })
78
79	/ Most compilers currently treat consume and acquire the same, but really*
80	* no processors except Alpha need a barrier here. Leave it in if
81	* using Thread Sanitizer to avoid warnings, otherwise optimize it away.
82	*/
83	#if defined(__SANITIZE_THREAD__)
84	#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); })
85	#elif defined(__alpha__)
86	#define smp_read_barrier_depends() asm volatile("mb":::"memory")
87	#else
88	#define smp_read_barrier_depends() barrier()
89	#endif
90
91	/*
92	* A signal barrier forces all pending local memory ops to be observed before
93	* a SIGSEGV is delivered to the same thread. In practice this is exactly
94	* the same as barrier(), but since we have the correct builtin, use it.
95	*/
96	#define signal_barrier() __atomic_signal_fence(__ATOMIC_SEQ_CST)
97
98	/ Sanity check that the size of an atomic operation isn't "overly large".*
99	* Despite the fact that e.g. i686 has 64-bit atomic operations, we do not
100	* want to use them because we ought not need them, and this lets us do a
101	* bit of sanity checking that other 32-bit hosts might build.
102	*
103	* That said, we have a problem on 64-bit ILP32 hosts in that in order to
104	* sync with TCG_OVERSIZED_GUEST, this must match TCG_TARGET_REG_BITS.
105	* We'd prefer not want to pull in everything else TCG related, so handle
106	* those few cases by hand.
107	*
108	* Note that x32 is fully detected with __x86_64__ + _ILP32, and that for
109	* Sparc we always force the use of sparcv9 in configure. MIPS n32 (ILP32) &
110	* n64 (LP64) ABIs are both detected using __mips64.
111	*/
112	#if defined(__x86_64__) \|\| defined(__sparc__) \|\| defined(__mips64)
113	# define ATOMIC_REG_SIZE 8
114	#else
115	# define ATOMIC_REG_SIZE sizeof(void *)
116	#endif
117
118	/ Weak atomic operations prevent the compiler moving other*
119	* loads/stores past the atomic operation load/store. However there is
120	* no explicit memory barrier for the processor.
121	*
122	* The C11 memory model says that variables that are accessed from
123	* different threads should at least be done with __ATOMIC_RELAXED
124	* primitives or the result is undefined. Generally this has little to
125	* no effect on the generated code but not using the atomic primitives
126	* will get flagged by sanitizers as a violation.
127	*/
128	#define atomic_read__nocheck(ptr) \
129	__atomic_load_n(ptr, __ATOMIC_RELAXED)
130
131	#define atomic_read(ptr) \
132	({ \
133	QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
134	atomic_read__nocheck(ptr); \
135	})
136
137	#define atomic_set__nocheck(ptr, i) \
138	__atomic_store_n(ptr, i, __ATOMIC_RELAXED)
139
140	#define atomic_set(ptr, i) do { \
141	QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
142	atomic_set__nocheck(ptr, i); \
143	} while(0)
144
145	/ See above: most compilers currently treat consume and acquire the*
146	* same, but this slows down atomic_rcu_read unnecessarily.
147	*/
148	#ifdef __SANITIZE_THREAD__
149	#define atomic_rcu_read__nocheck(ptr, valptr) \
150	__atomic_load(ptr, valptr, __ATOMIC_CONSUME);
151	#else
152	#define atomic_rcu_read__nocheck(ptr, valptr) \
153	__atomic_load(ptr, valptr, __ATOMIC_RELAXED); \
154	smp_read_barrier_depends();
155	#endif
156
157	#define atomic_rcu_read(ptr) \
158	({ \
159	QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
160	typeof_strip_qual(*ptr) _val; \
161	atomic_rcu_read__nocheck(ptr, &_val); \
162	_val; \
163	})
164
165	#define atomic_rcu_set(ptr, i) do { \
166	QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
167	__atomic_store_n(ptr, i, __ATOMIC_RELEASE); \
168	} while(0)
169
170	#define atomic_load_acquire(ptr) \
171	({ \
172	QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
173	typeof_strip_qual(*ptr) _val; \
174	__atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \
175	_val; \
176	})
177
178	#define atomic_store_release(ptr, i) do { \
179	QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
180	__atomic_store_n(ptr, i, __ATOMIC_RELEASE); \
181	} while(0)
182
183
184	/ All the remaining operations are fully sequentially consistent /
185
186	#define atomic_xchg__nocheck(ptr, i) ({ \
187	__atomic_exchange_n(ptr, (i), __ATOMIC_SEQ_CST); \
188	})
189
190	#define atomic_xchg(ptr, i) ({ \
191	QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
192	atomic_xchg__nocheck(ptr, i); \
193	})
194
195	/ Returns the eventual value, failed or not /
196	#define atomic_cmpxchg__nocheck(ptr, old, new) ({ \
197	typeof_strip_qual(*ptr) _old = (old); \
198	(void)__atomic_compare_exchange_n(ptr, &_old, new, false, \
199	__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \
200	_old; \
201	})
202
203	#define atomic_cmpxchg(ptr, old, new) ({ \
204	QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
205	atomic_cmpxchg__nocheck(ptr, old, new); \
206	})
207
208	/ Provide shorter names for GCC atomic builtins, return old value /
209	#define atomic_fetch_inc(ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)
210	#define atomic_fetch_dec(ptr) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST)
211	#define atomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST)
212	#define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)
213	#define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)
214	#define atomic_fetch_or(ptr, n) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)
215	#define atomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)
216
217	#define atomic_inc_fetch(ptr) __atomic_add_fetch(ptr, 1, __ATOMIC_SEQ_CST)
218	#define atomic_dec_fetch(ptr) __atomic_sub_fetch(ptr, 1, __ATOMIC_SEQ_CST)
219	#define atomic_add_fetch(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_SEQ_CST)
220	#define atomic_sub_fetch(ptr, n) __atomic_sub_fetch(ptr, n, __ATOMIC_SEQ_CST)
221	#define atomic_and_fetch(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_SEQ_CST)
222	#define atomic_or_fetch(ptr, n) __atomic_or_fetch(ptr, n, __ATOMIC_SEQ_CST)
223	#define atomic_xor_fetch(ptr, n) __atomic_xor_fetch(ptr, n, __ATOMIC_SEQ_CST)
224
225	/ And even shorter names that return void. /
226	#define atomic_inc(ptr) ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST))
227	#define atomic_dec(ptr) ((void) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST))
228	#define atomic_add(ptr, n) ((void) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST))
229	#define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST))
230	#define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST))
231	#define atomic_or(ptr, n) ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST))
232	#define atomic_xor(ptr, n) ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST))
233
234	#else /* __ATOMIC_RELAXED */
235
236	/*
237	* We use GCC builtin if it's available, as that can use mfence on
238	* 32-bit as well, e.g. if built with -march=pentium-m. However, on
239	* i386 the spec is buggy, and the implementation followed it until
240	* 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793).
241	*/
242	#if defined(__i386__) \|\| defined(__x86_64__)
243	#if !QEMU_GNUC_PREREQ(4, 4)
244	#if defined __x86_64__
245	#define smp_mb() ({ asm volatile("mfence" ::: "memory"); (void)0; })
246	#else
247	#define smp_mb() ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; })
248	#endif
249	#endif
250	#endif
251
252
253	#ifdef __alpha__
254	#define smp_read_barrier_depends() asm volatile("mb":::"memory")
255	#endif
256
257	#if defined(__i386__) \|\| defined(__x86_64__) \|\| defined(__s390x__)
258
259	/*
260	* Because of the strongly ordered storage model, wmb() and rmb() are nops
261	* here (a compiler barrier only). QEMU doesn't do accesses to write-combining
262	* qemu memory or non-temporal load/stores from C code.
263	*/
264	#define smp_mb_release() barrier()
265	#define smp_mb_acquire() barrier()
266
267	/*
268	* __sync_lock_test_and_set() is documented to be an acquire barrier only,
269	* but it is a full barrier at the hardware level. Add a compiler barrier
270	* to make it a full barrier also at the compiler level.
271	*/
272	#define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i))
273
274	#elif defined(_ARCH_PPC)
275
276	/*
277	* We use an eieio() for wmb() on powerpc. This assumes we don't
278	* need to order cacheable and non-cacheable stores with respect to
279	* each other.
280	*
281	* smp_mb has the same problem as on x86 for not-very-new GCC
282	* (http://patchwork.ozlabs.org/patch/126184/, Nov 2011).
283	*/
284	#define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; })
285	#if defined(__powerpc64__)
286	#define smp_mb_release() ({ asm volatile("lwsync" ::: "memory"); (void)0; })
287	#define smp_mb_acquire() ({ asm volatile("lwsync" ::: "memory"); (void)0; })
288	#else
289	#define smp_mb_release() ({ asm volatile("sync" ::: "memory"); (void)0; })
290	#define smp_mb_acquire() ({ asm volatile("sync" ::: "memory"); (void)0; })
291	#endif
292	#define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; })
293
294	#endif /* _ARCH_PPC */
295
296	/*
297	* For (host) platforms we don't have explicit barrier definitions
298	* for, we use the gcc __sync_synchronize() primitive to generate a
299	* full barrier. This should be safe on all platforms, though it may
300	* be overkill for smp_mb_acquire() and smp_mb_release().
301	*/
302	#ifndef smp_mb
303	#define smp_mb() __sync_synchronize()
304	#endif
305
306	#ifndef smp_mb_acquire
307	#define smp_mb_acquire() __sync_synchronize()
308	#endif
309
310	#ifndef smp_mb_release
311	#define smp_mb_release() __sync_synchronize()
312	#endif
313
314	#ifndef smp_read_barrier_depends
315	#define smp_read_barrier_depends() barrier()
316	#endif
317
318	#ifndef signal_barrier
319	#define signal_barrier() barrier()
320	#endif
321
322	/ These will only be atomic if the processor does the fetch or store*
323	* in a single issue memory operation
324	*/
325	#define atomic_read__nocheck(p) ((__typeof__((p)) volatile*) (p))
326	#define atomic_set__nocheck(p, i) (((__typeof__((p)) volatile*) (p)) = (i))
327
328	#define atomic_read(ptr) atomic_read__nocheck(ptr)
329	#define atomic_set(ptr, i) atomic_set__nocheck(ptr,i)
330
331	/**
332	* atomic_rcu_read - reads a RCU-protected pointer to a local variable
333	* into a RCU read-side critical section. The pointer can later be safely
334	* dereferenced within the critical section.
335	*
336	* This ensures that the pointer copy is invariant thorough the whole critical
337	* section.
338	*
339	* Inserts memory barriers on architectures that require them (currently only
340	* Alpha) and documents which pointers are protected by RCU.
341	*
342	* atomic_rcu_read also includes a compiler barrier to ensure that
343	* value-speculative optimizations (e.g. VSS: Value Speculation
344	* Scheduling) does not perform the data read before the pointer read
345	* by speculating the value of the pointer.
346	*
347	* Should match atomic_rcu_set(), atomic_xchg(), atomic_cmpxchg().
348	*/
349	#define atomic_rcu_read(ptr) ({ \
350	typeof(*ptr) _val = atomic_read(ptr); \
351	smp_read_barrier_depends(); \
352	_val; \
353	})
354
355	/**
356	* atomic_rcu_set - assigns (publicizes) a pointer to a new data structure
357	* meant to be read by RCU read-side critical sections.
358	*
359	* Documents which pointers will be dereferenced by RCU read-side critical
360	* sections and adds the required memory barriers on architectures requiring
361	* them. It also makes sure the compiler does not reorder code initializing the
362	* data structure before its publication.
363	*
364	* Should match atomic_rcu_read().
365	*/
366	#define atomic_rcu_set(ptr, i) do { \
367	smp_wmb(); \
368	atomic_set(ptr, i); \
369	} while (0)
370
371	#define atomic_load_acquire(ptr) ({ \
372	typeof(*ptr) _val = atomic_read(ptr); \
373	smp_mb_acquire(); \
374	_val; \
375	})
376
377	#define atomic_store_release(ptr, i) do { \
378	smp_mb_release(); \
379	atomic_set(ptr, i); \
380	} while (0)
381
382	#ifndef atomic_xchg
383	#if defined(__clang__)
384	#define atomic_xchg(ptr, i) __sync_swap(ptr, i)
385	#else
386	/ __sync_lock_test_and_set() is documented to be an acquire barrier only. /
387	#define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i))
388	#endif
389	#endif
390	#define atomic_xchg__nocheck atomic_xchg
391
392	/ Provide shorter names for GCC atomic builtins. /
393	#define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1)
394	#define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1)
395	#define atomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n)
396	#define atomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n)
397	#define atomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n)
398	#define atomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n)
399	#define atomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n)
400
401	#define atomic_inc_fetch(ptr) __sync_add_and_fetch(ptr, 1)
402	#define atomic_dec_fetch(ptr) __sync_add_and_fetch(ptr, -1)
403	#define atomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n)
404	#define atomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n)
405	#define atomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n)
406	#define atomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n)
407	#define atomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n)
408
409	#define atomic_cmpxchg(ptr, old, new) __sync_val_compare_and_swap(ptr, old, new)
410	#define atomic_cmpxchg__nocheck(ptr, old, new) atomic_cmpxchg(ptr, old, new)
411
412	/ And even shorter names that return void. /
413	#define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1))
414	#define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1))
415	#define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n))
416	#define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n))
417	#define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n))
418	#define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n))
419	#define atomic_xor(ptr, n) ((void) __sync_fetch_and_xor(ptr, n))
420
421	#endif /* __ATOMIC_RELAXED */
422
423	#ifndef smp_wmb
424	#define smp_wmb() smp_mb_release()
425	#endif
426	#ifndef smp_rmb
427	#define smp_rmb() smp_mb_acquire()
428	#endif
429
430	/ This is more efficient than a store plus a fence. /
431	#if !defined(__SANITIZE_THREAD__)
432	#if defined(__i386__) \|\| defined(__x86_64__) \|\| defined(__s390x__)
433	#define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i))
434	#endif
435	#endif
436
437	/ atomic_mb_read/set semantics map Java volatile variables. They are*
438	* less expensive on some platforms (notably POWER) than fully
439	* sequentially consistent operations.
440	*
441	* As long as they are used as paired operations they are safe to
442	* use. See docs/devel/atomics.txt for more discussion.
443	*/
444
445	#ifndef atomic_mb_read
446	#define atomic_mb_read(ptr) \
447	atomic_load_acquire(ptr)
448	#endif
449
450	#ifndef atomic_mb_set
451	#define atomic_mb_set(ptr, i) do { \
452	atomic_store_release(ptr, i); \
453	smp_mb(); \
454	} while(0)
455	#endif
456
457	#define atomic_fetch_inc_nonzero(ptr) ({ \
458	typeof_strip_qual(*ptr) _oldn = atomic_read(ptr); \
459	while (_oldn && atomic_cmpxchg(ptr, _oldn, _oldn + 1) != _oldn) { \
460	_oldn = atomic_read(ptr); \
461	} \
462	_oldn; \
463	})
464
465	/ Abstractions to access atomically (i.e. "once") i64/u64 variables /
466	#ifdef CONFIG_ATOMIC64
467	static inline int64_t atomic_read_i64(const int64_t *ptr)
468	{
469	/ use __nocheck because sizeof(void ) might be < sizeof(u64) /*
470	return atomic_read__nocheck(ptr);
471	}
472
473	static inline uint64_t atomic_read_u64(const uint64_t *ptr)
474	{
475	return atomic_read__nocheck(ptr);
476	}
477
478	static inline void atomic_set_i64(int64_t *ptr, int64_t val)
479	{
480	atomic_set__nocheck(ptr, val);
481	}
482
483	static inline void atomic_set_u64(uint64_t *ptr, uint64_t val)
484	{
485	atomic_set__nocheck(ptr, val);
486	}
487
488	static inline void atomic64_init(void)
489	{
490	}
491	#else /* !CONFIG_ATOMIC64 */
492	int64_t atomic_read_i64(const int64_t *ptr);
493	uint64_t atomic_read_u64(const uint64_t *ptr);
494	void atomic_set_i64(int64_t *ptr, int64_t val);
495	void atomic_set_u64(uint64_t *ptr, uint64_t val);
496	void atomic64_init(void);
497	#endif /* !CONFIG_ATOMIC64 */
498
499	#endif /* QEMU_ATOMIC_H */
500

Browse the source code of qemu/include/qemu/atomic.h