arch-x86.h source code [PostgreSQL/src/include/port/atomics/arch-x86.h]

1	/-------------------------------------------------------------------------*
2	*
3	* arch-x86.h
4	* Atomic operations considerations specific to intel x86
5	*
6	* Note that we actually require a 486 upwards because the 386 doesn't have
7	* support for xadd and cmpxchg. Given that the 386 isn't supported anywhere
8	* anymore that's not much of a restriction luckily.
9	*
10	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
11	* Portions Copyright (c) 1994, Regents of the University of California
12	*
13	* NOTES:
14	*
15	* src/include/port/atomics/arch-x86.h
16	*
17	*-------------------------------------------------------------------------
18	*/
19
20	/*
21	* Both 32 and 64 bit x86 do not allow loads to be reordered with other loads,
22	* or stores to be reordered with other stores, but a load can be performed
23	* before a subsequent store.
24	*
25	* Technically, some x86-ish chips support uncached memory access and/or
26	* special instructions that are weakly ordered. In those cases we'd need
27	* the read and write barriers to be lfence and sfence. But since we don't
28	* do those things, a compiler barrier should be enough.
29	*
30	* "lock; addl" has worked for longer than "mfence". It's also rumored to be
31	* faster in many scenarios.
32	*/
33
34	#if defined(__GNUC__) \|\| defined(__INTEL_COMPILER)
35	#if defined(__i386__) \|\| defined(__i386)
36	#define pg_memory_barrier_impl() \
37	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
38	#elif defined(__x86_64__)
39	#define pg_memory_barrier_impl() \
40	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
41	#endif
42	#endif /* defined(__GNUC__) \|\| defined(__INTEL_COMPILER) */
43
44	#define pg_read_barrier_impl() pg_compiler_barrier_impl()
45	#define pg_write_barrier_impl() pg_compiler_barrier_impl()
46
47	/*
48	* Provide implementation for atomics using inline assembly on x86 gcc. It's
49	* nice to support older gcc's and the compare/exchange implementation here is
50	* actually more efficient than the * __sync variant.
51	*/
52	#if defined(HAVE_ATOMICS)
53
54	#if defined(__GNUC__) \|\| defined(__INTEL_COMPILER)
55
56	#define PG_HAVE_ATOMIC_FLAG_SUPPORT
57	typedef struct pg_atomic_flag
58	{
59	volatile char value;
60	} pg_atomic_flag;
61
62	#define PG_HAVE_ATOMIC_U32_SUPPORT
63	typedef struct pg_atomic_uint32
64	{
65	volatile uint32 value;
66	} pg_atomic_uint32;
67
68	/*
69	* It's too complicated to write inline asm for 64bit types on 32bit and the
70	* 486 can't do it anyway.
71	*/
72	#ifdef __x86_64__
73	#define PG_HAVE_ATOMIC_U64_SUPPORT
74	typedef struct pg_atomic_uint64
75	{
76	/ alignment guaranteed due to being on a 64bit platform /
77	volatile uint64 value;
78	} pg_atomic_uint64;
79	#endif /* __x86_64__ */
80
81	#endif /* defined(__GNUC__) \|\| defined(__INTEL_COMPILER) */
82
83	#endif /* defined(HAVE_ATOMICS) */
84
85	#if !defined(PG_HAVE_SPIN_DELAY)
86	/*
87	* This sequence is equivalent to the PAUSE instruction ("rep" is
88	* ignored by old IA32 processors if the following instruction is
89	* not a string operation); the IA-32 Architecture Software
90	* Developer's Manual, Vol. 3, Section 7.7.2 describes why using
91	* PAUSE in the inner loop of a spin lock is necessary for good
92	* performance:
93	*
94	* The PAUSE instruction improves the performance of IA-32
95	* processors supporting Hyper-Threading Technology when
96	* executing spin-wait loops and other routines where one
97	* thread is accessing a shared lock or semaphore in a tight
98	* polling loop. When executing a spin-wait loop, the
99	* processor can suffer a severe performance penalty when
100	* exiting the loop because it detects a possible memory order
101	* violation and flushes the core processor's pipeline. The
102	* PAUSE instruction provides a hint to the processor that the
103	* code sequence is a spin-wait loop. The processor uses this
104	* hint to avoid the memory order violation and prevent the
105	* pipeline flush. In addition, the PAUSE instruction
106	* de-pipelines the spin-wait loop to prevent it from
107	* consuming execution resources excessively.
108	*/
109	#if defined(__GNUC__) \|\| defined(__INTEL_COMPILER)
110	#define PG_HAVE_SPIN_DELAY
111	static __inline__ void
112	pg_spin_delay_impl(void)
113	{
114	__asm__ __volatile__(" rep; nop \n");
115	}
116	#elif defined(_MSC_VER) && defined(__x86_64__)
117	#define PG_HAVE_SPIN_DELAY
118	static __forceinline void
119	pg_spin_delay_impl(void)
120	{
121	_mm_pause();
122	}
123	#elif defined(_MSC_VER)
124	#define PG_HAVE_SPIN_DELAY
125	static __forceinline void
126	pg_spin_delay_impl(void)
127	{
128	/ See comment for gcc code. Same code, MASM syntax /
129	__asm rep nop;
130	}
131	#endif
132	#endif /* !defined(PG_HAVE_SPIN_DELAY) */
133
134
135	#if defined(HAVE_ATOMICS)
136
137	#if defined(__GNUC__) \|\| defined(__INTEL_COMPILER)
138
139	#define PG_HAVE_ATOMIC_TEST_SET_FLAG
140	static inline bool
141	pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
142	{
143	register char _res = `1`;
144
145	__asm__ __volatile__(
146	" lock \n"
147	" xchgb %0,%1 \n"
148	: "+q"(_res), "+m"(ptr->value)
149	:
150	: "memory");
151	return _res == `0`;
152	}
153
154	#define PG_HAVE_ATOMIC_CLEAR_FLAG
155	static inline void
156	pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
157	{
158	/*
159	* On a TSO architecture like x86 it's sufficient to use a compiler
160	* barrier to achieve release semantics.
161	*/
162	__asm__ __volatile__("" ::: "memory");
163	ptr->value = `0`;
164	}
165
166	#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
167	static inline bool
168	pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
169	uint32 *expected, uint32 newval)
170	{
171	char ret;
172
173	/*
174	* Perform cmpxchg and use the zero flag which it implicitly sets when
175	* equal to measure the success.
176	*/
177	__asm__ __volatile__(
178	" lock \n"
179	" cmpxchgl %4,%5 \n"
180	" setz %2 \n"
181	: "=a" (*expected), "=m"(ptr->value), "=q" (ret)
182	: "a" (*expected), "r" (newval), "m"(ptr->value)
183	: "memory", "cc");
184	return (bool) ret;
185	}
186
187	#define PG_HAVE_ATOMIC_FETCH_ADD_U32
188	static inline uint32
189	pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
190	{
191	uint32 res;
192	__asm__ __volatile__(
193	" lock \n"
194	" xaddl %0,%1 \n"
195	: "=q"(res), "=m"(ptr->value)
196	: "0" (add_), "m"(ptr->value)
197	: "memory", "cc");
198	return res;
199	}
200
201	#ifdef __x86_64__
202
203	#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
204	static inline bool
205	pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
206	uint64 *expected, uint64 newval)
207	{
208	char ret;
209
210	/*
211	* Perform cmpxchg and use the zero flag which it implicitly sets when
212	* equal to measure the success.
213	*/
214	__asm__ __volatile__(
215	" lock \n"
216	" cmpxchgq %4,%5 \n"
217	" setz %2 \n"
218	: "=a" (*expected), "=m"(ptr->value), "=q" (ret)
219	: "a" (*expected), "r" (newval), "m"(ptr->value)
220	: "memory", "cc");
221	return (bool) ret;
222	}
223
224	#define PG_HAVE_ATOMIC_FETCH_ADD_U64
225	static inline uint64
226	pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
227	{
228	uint64 res;
229	__asm__ __volatile__(
230	" lock \n"
231	" xaddq %0,%1 \n"
232	: "=q"(res), "=m"(ptr->value)
233	: "0" (add_), "m"(ptr->value)
234	: "memory", "cc");
235	return res;
236	}
237
238	#endif /* __x86_64__ */
239
240	#endif /* defined(__GNUC__) \|\| defined(__INTEL_COMPILER) */
241
242	/*
243	* 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms
244	* since at least the 586. As well as on all x86-64 cpus.
245	*/
246	#if defined(__i568__) \|\| defined(__i668__) \|\| /* gcc i586+ */ \
247	(defined(_M_IX86) && _M_IX86 >= 500) \|\| /* msvc i586+ */ \
248	defined(__x86_64__) \|\| defined(__x86_64) \|\| defined(_M_X64) /* gcc, sunpro, msvc */
249	#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
250	#endif /* 8 byte single-copy atomicity */
251
252	#endif /* HAVE_ATOMICS */
253

Browse the source code of PostgreSQL/src/include/port/atomics/arch-x86.h