1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * atomics.h |
4 | * Atomic operations. |
5 | * |
6 | * Hardware and compiler dependent functions for manipulating memory |
7 | * atomically and dealing with cache coherency. Used to implement locking |
8 | * facilities and lockless algorithms/data structures. |
9 | * |
10 | * To bring up postgres on a platform/compiler at the very least |
11 | * implementations for the following operations should be provided: |
12 | * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier() |
13 | * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32() |
14 | * * pg_atomic_test_set_flag(), pg_atomic_init_flag(), pg_atomic_clear_flag() |
15 | * * PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY should be defined if appropriate. |
16 | * |
17 | * There exist generic, hardware independent, implementations for several |
18 | * compilers which might be sufficient, although possibly not optimal, for a |
19 | * new platform. If no such generic implementation is available spinlocks (or |
20 | * even OS provided semaphores) will be used to implement the API. |
21 | * |
22 | * Implement _u64 atomics if and only if your platform can use them |
23 | * efficiently (and obviously correctly). |
24 | * |
25 | * Use higher level functionality (lwlocks, spinlocks, heavyweight locks) |
26 | * whenever possible. Writing correct code using these facilities is hard. |
27 | * |
28 | * For an introduction to using memory barriers within the PostgreSQL backend, |
29 | * see src/backend/storage/lmgr/README.barrier |
30 | * |
31 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
32 | * Portions Copyright (c) 1994, Regents of the University of California |
33 | * |
34 | * src/include/port/atomics.h |
35 | * |
36 | *------------------------------------------------------------------------- |
37 | */ |
38 | #ifndef ATOMICS_H |
39 | #define ATOMICS_H |
40 | |
41 | #ifdef FRONTEND |
42 | #error "atomics.h may not be included from frontend code" |
43 | #endif |
44 | |
45 | #define INSIDE_ATOMICS_H |
46 | |
47 | #include <limits.h> |
48 | |
49 | /* |
50 | * First a set of architecture specific files is included. |
51 | * |
52 | * These files can provide the full set of atomics or can do pretty much |
53 | * nothing if all the compilers commonly used on these platforms provide |
54 | * usable generics. |
55 | * |
56 | * Don't add an inline assembly of the actual atomic operations if all the |
57 | * common implementations of your platform provide intrinsics. Intrinsics are |
58 | * much easier to understand and potentially support more architectures. |
59 | * |
60 | * It will often make sense to define memory barrier semantics here, since |
61 | * e.g. generic compiler intrinsics for x86 memory barriers can't know that |
62 | * postgres doesn't need x86 read/write barriers do anything more than a |
63 | * compiler barrier. |
64 | * |
65 | */ |
66 | #if defined(__arm__) || defined(__arm) || \ |
67 | defined(__aarch64__) || defined(__aarch64) |
68 | #include "port/atomics/arch-arm.h" |
69 | #elif defined(__i386__) || defined(__i386) || defined(__x86_64__) |
70 | #include "port/atomics/arch-x86.h" |
71 | #elif defined(__ia64__) || defined(__ia64) |
72 | #include "port/atomics/arch-ia64.h" |
73 | #elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) |
74 | #include "port/atomics/arch-ppc.h" |
75 | #elif defined(__hppa) || defined(__hppa__) |
76 | #include "port/atomics/arch-hppa.h" |
77 | #endif |
78 | |
79 | /* |
80 | * Compiler specific, but architecture independent implementations. |
81 | * |
82 | * Provide architecture independent implementations of the atomic |
83 | * facilities. At the very least compiler barriers should be provided, but a |
84 | * full implementation of |
85 | * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier() |
86 | * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32() |
87 | * using compiler intrinsics are a good idea. |
88 | */ |
89 | /* |
90 | * Given a gcc-compatible xlc compiler, prefer the xlc implementation. The |
91 | * ppc64le "IBM XL C/C++ for Linux, V13.1.2" implements both interfaces, but |
92 | * __sync_lock_test_and_set() of one-byte types elicits SIGSEGV. |
93 | */ |
94 | #if defined(__IBMC__) || defined(__IBMCPP__) |
95 | #include "port/atomics/generic-xlc.h" |
96 | /* gcc or compatible, including clang and icc */ |
97 | #elif defined(__GNUC__) || defined(__INTEL_COMPILER) |
98 | #include "port/atomics/generic-gcc.h" |
99 | #elif defined(_MSC_VER) |
100 | #include "port/atomics/generic-msvc.h" |
101 | #elif defined(__hpux) && defined(__ia64) && !defined(__GNUC__) |
102 | #include "port/atomics/generic-acc.h" |
103 | #elif defined(__SUNPRO_C) && !defined(__GNUC__) |
104 | #include "port/atomics/generic-sunpro.h" |
105 | #else |
106 | /* |
107 | * Unsupported compiler, we'll likely use slower fallbacks... At least |
108 | * compiler barriers should really be provided. |
109 | */ |
110 | #endif |
111 | |
112 | /* |
113 | * Provide a full fallback of the pg_*_barrier(), pg_atomic**_flag and |
114 | * pg_atomic_* APIs for platforms without sufficient spinlock and/or atomics |
115 | * support. In the case of spinlock backed atomics the emulation is expected |
116 | * to be efficient, although less so than native atomics support. |
117 | */ |
118 | #include "port/atomics/fallback.h" |
119 | |
120 | /* |
121 | * Provide additional operations using supported infrastructure. These are |
122 | * expected to be efficient if the underlying atomic operations are efficient. |
123 | */ |
124 | #include "port/atomics/generic.h" |
125 | |
126 | |
127 | /* |
128 | * pg_compiler_barrier - prevent the compiler from moving code across |
129 | * |
130 | * A compiler barrier need not (and preferably should not) emit any actual |
131 | * machine code, but must act as an optimization fence: the compiler must not |
132 | * reorder loads or stores to main memory around the barrier. However, the |
133 | * CPU may still reorder loads or stores at runtime, if the architecture's |
134 | * memory model permits this. |
135 | */ |
136 | #define pg_compiler_barrier() pg_compiler_barrier_impl() |
137 | |
138 | /* |
139 | * pg_memory_barrier - prevent the CPU from reordering memory access |
140 | * |
141 | * A memory barrier must act as a compiler barrier, and in addition must |
142 | * guarantee that all loads and stores issued prior to the barrier are |
143 | * completed before any loads or stores issued after the barrier. Unless |
144 | * loads and stores are totally ordered (which is not the case on most |
145 | * architectures) this requires issuing some sort of memory fencing |
146 | * instruction. |
147 | */ |
148 | #define pg_memory_barrier() pg_memory_barrier_impl() |
149 | |
150 | /* |
151 | * pg_(read|write)_barrier - prevent the CPU from reordering memory access |
152 | * |
153 | * A read barrier must act as a compiler barrier, and in addition must |
154 | * guarantee that any loads issued prior to the barrier are completed before |
155 | * any loads issued after the barrier. Similarly, a write barrier acts |
156 | * as a compiler barrier, and also orders stores. Read and write barriers |
157 | * are thus weaker than a full memory barrier, but stronger than a compiler |
158 | * barrier. In practice, on machines with strong memory ordering, read and |
159 | * write barriers may require nothing more than a compiler barrier. |
160 | */ |
161 | #define pg_read_barrier() pg_read_barrier_impl() |
162 | #define pg_write_barrier() pg_write_barrier_impl() |
163 | |
164 | /* |
165 | * Spinloop delay - Allow CPU to relax in busy loops |
166 | */ |
167 | #define pg_spin_delay() pg_spin_delay_impl() |
168 | |
169 | /* |
170 | * pg_atomic_init_flag - initialize atomic flag. |
171 | * |
172 | * No barrier semantics. |
173 | */ |
174 | static inline void |
175 | pg_atomic_init_flag(volatile pg_atomic_flag *ptr) |
176 | { |
177 | pg_atomic_init_flag_impl(ptr); |
178 | } |
179 | |
180 | /* |
181 | * pg_atomic_test_and_set_flag - TAS() |
182 | * |
183 | * Returns true if the flag has successfully been set, false otherwise. |
184 | * |
185 | * Acquire (including read barrier) semantics. |
186 | */ |
187 | static inline bool |
188 | pg_atomic_test_set_flag(volatile pg_atomic_flag *ptr) |
189 | { |
190 | return pg_atomic_test_set_flag_impl(ptr); |
191 | } |
192 | |
193 | /* |
194 | * pg_atomic_unlocked_test_flag - Check if the lock is free |
195 | * |
196 | * Returns true if the flag currently is not set, false otherwise. |
197 | * |
198 | * No barrier semantics. |
199 | */ |
200 | static inline bool |
201 | pg_atomic_unlocked_test_flag(volatile pg_atomic_flag *ptr) |
202 | { |
203 | return pg_atomic_unlocked_test_flag_impl(ptr); |
204 | } |
205 | |
206 | /* |
207 | * pg_atomic_clear_flag - release lock set by TAS() |
208 | * |
209 | * Release (including write barrier) semantics. |
210 | */ |
211 | static inline void |
212 | pg_atomic_clear_flag(volatile pg_atomic_flag *ptr) |
213 | { |
214 | pg_atomic_clear_flag_impl(ptr); |
215 | } |
216 | |
217 | |
218 | /* |
219 | * pg_atomic_init_u32 - initialize atomic variable |
220 | * |
221 | * Has to be done before any concurrent usage.. |
222 | * |
223 | * No barrier semantics. |
224 | */ |
225 | static inline void |
226 | pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val) |
227 | { |
228 | AssertPointerAlignment(ptr, 4); |
229 | |
230 | pg_atomic_init_u32_impl(ptr, val); |
231 | } |
232 | |
233 | /* |
234 | * pg_atomic_read_u32 - unlocked read from atomic variable. |
235 | * |
236 | * The read is guaranteed to return a value as it has been written by this or |
237 | * another process at some point in the past. There's however no cache |
238 | * coherency interaction guaranteeing the value hasn't since been written to |
239 | * again. |
240 | * |
241 | * No barrier semantics. |
242 | */ |
243 | static inline uint32 |
244 | pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr) |
245 | { |
246 | AssertPointerAlignment(ptr, 4); |
247 | return pg_atomic_read_u32_impl(ptr); |
248 | } |
249 | |
250 | /* |
251 | * pg_atomic_write_u32 - write to atomic variable. |
252 | * |
253 | * The write is guaranteed to succeed as a whole, i.e. it's not possible to |
254 | * observe a partial write for any reader. Note that this correctly interacts |
255 | * with pg_atomic_compare_exchange_u32, in contrast to |
256 | * pg_atomic_unlocked_write_u32(). |
257 | * |
258 | * No barrier semantics. |
259 | */ |
260 | static inline void |
261 | pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val) |
262 | { |
263 | AssertPointerAlignment(ptr, 4); |
264 | |
265 | pg_atomic_write_u32_impl(ptr, val); |
266 | } |
267 | |
268 | /* |
269 | * pg_atomic_unlocked_write_u32 - unlocked write to atomic variable. |
270 | * |
271 | * The write is guaranteed to succeed as a whole, i.e. it's not possible to |
272 | * observe a partial write for any reader. But note that writing this way is |
273 | * not guaranteed to correctly interact with read-modify-write operations like |
274 | * pg_atomic_compare_exchange_u32. This should only be used in cases where |
275 | * minor performance regressions due to atomics emulation are unacceptable. |
276 | * |
277 | * No barrier semantics. |
278 | */ |
279 | static inline void |
280 | pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val) |
281 | { |
282 | AssertPointerAlignment(ptr, 4); |
283 | |
284 | pg_atomic_unlocked_write_u32_impl(ptr, val); |
285 | } |
286 | |
287 | /* |
288 | * pg_atomic_exchange_u32 - exchange newval with current value |
289 | * |
290 | * Returns the old value of 'ptr' before the swap. |
291 | * |
292 | * Full barrier semantics. |
293 | */ |
294 | static inline uint32 |
295 | pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval) |
296 | { |
297 | AssertPointerAlignment(ptr, 4); |
298 | |
299 | return pg_atomic_exchange_u32_impl(ptr, newval); |
300 | } |
301 | |
302 | /* |
303 | * pg_atomic_compare_exchange_u32 - CAS operation |
304 | * |
305 | * Atomically compare the current value of ptr with *expected and store newval |
306 | * iff ptr and *expected have the same value. The current value of *ptr will |
307 | * always be stored in *expected. |
308 | * |
309 | * Return true if values have been exchanged, false otherwise. |
310 | * |
311 | * Full barrier semantics. |
312 | */ |
313 | static inline bool |
314 | pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, |
315 | uint32 *expected, uint32 newval) |
316 | { |
317 | AssertPointerAlignment(ptr, 4); |
318 | AssertPointerAlignment(expected, 4); |
319 | |
320 | return pg_atomic_compare_exchange_u32_impl(ptr, expected, newval); |
321 | } |
322 | |
323 | /* |
324 | * pg_atomic_fetch_add_u32 - atomically add to variable |
325 | * |
326 | * Returns the value of ptr before the arithmetic operation. |
327 | * |
328 | * Full barrier semantics. |
329 | */ |
330 | static inline uint32 |
331 | pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_) |
332 | { |
333 | AssertPointerAlignment(ptr, 4); |
334 | return pg_atomic_fetch_add_u32_impl(ptr, add_); |
335 | } |
336 | |
337 | /* |
338 | * pg_atomic_fetch_sub_u32 - atomically subtract from variable |
339 | * |
340 | * Returns the value of ptr before the arithmetic operation. Note that sub_ |
341 | * may not be INT_MIN due to platform limitations. |
342 | * |
343 | * Full barrier semantics. |
344 | */ |
345 | static inline uint32 |
346 | pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, int32 sub_) |
347 | { |
348 | AssertPointerAlignment(ptr, 4); |
349 | Assert(sub_ != INT_MIN); |
350 | return pg_atomic_fetch_sub_u32_impl(ptr, sub_); |
351 | } |
352 | |
353 | /* |
354 | * pg_atomic_fetch_and_u32 - atomically bit-and and_ with variable |
355 | * |
356 | * Returns the value of ptr before the arithmetic operation. |
357 | * |
358 | * Full barrier semantics. |
359 | */ |
360 | static inline uint32 |
361 | pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_) |
362 | { |
363 | AssertPointerAlignment(ptr, 4); |
364 | return pg_atomic_fetch_and_u32_impl(ptr, and_); |
365 | } |
366 | |
367 | /* |
368 | * pg_atomic_fetch_or_u32 - atomically bit-or or_ with variable |
369 | * |
370 | * Returns the value of ptr before the arithmetic operation. |
371 | * |
372 | * Full barrier semantics. |
373 | */ |
374 | static inline uint32 |
375 | pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_) |
376 | { |
377 | AssertPointerAlignment(ptr, 4); |
378 | return pg_atomic_fetch_or_u32_impl(ptr, or_); |
379 | } |
380 | |
381 | /* |
382 | * pg_atomic_add_fetch_u32 - atomically add to variable |
383 | * |
384 | * Returns the value of ptr after the arithmetic operation. |
385 | * |
386 | * Full barrier semantics. |
387 | */ |
388 | static inline uint32 |
389 | pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_) |
390 | { |
391 | AssertPointerAlignment(ptr, 4); |
392 | return pg_atomic_add_fetch_u32_impl(ptr, add_); |
393 | } |
394 | |
395 | /* |
396 | * pg_atomic_sub_fetch_u32 - atomically subtract from variable |
397 | * |
398 | * Returns the value of ptr after the arithmetic operation. Note that sub_ may |
399 | * not be INT_MIN due to platform limitations. |
400 | * |
401 | * Full barrier semantics. |
402 | */ |
403 | static inline uint32 |
404 | pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_) |
405 | { |
406 | AssertPointerAlignment(ptr, 4); |
407 | Assert(sub_ != INT_MIN); |
408 | return pg_atomic_sub_fetch_u32_impl(ptr, sub_); |
409 | } |
410 | |
411 | /* ---- |
412 | * The 64 bit operations have the same semantics as their 32bit counterparts |
413 | * if they are available. Check the corresponding 32bit function for |
414 | * documentation. |
415 | * ---- |
416 | */ |
417 | static inline void |
418 | pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val) |
419 | { |
420 | /* |
421 | * Can't necessarily enforce alignment - and don't need it - when using |
422 | * the spinlock based fallback implementation. Therefore only assert when |
423 | * not using it. |
424 | */ |
425 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
426 | AssertPointerAlignment(ptr, 8); |
427 | #endif |
428 | pg_atomic_init_u64_impl(ptr, val); |
429 | } |
430 | |
431 | static inline uint64 |
432 | pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr) |
433 | { |
434 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
435 | AssertPointerAlignment(ptr, 8); |
436 | #endif |
437 | return pg_atomic_read_u64_impl(ptr); |
438 | } |
439 | |
440 | static inline void |
441 | pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val) |
442 | { |
443 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
444 | AssertPointerAlignment(ptr, 8); |
445 | #endif |
446 | pg_atomic_write_u64_impl(ptr, val); |
447 | } |
448 | |
449 | static inline uint64 |
450 | pg_atomic_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 newval) |
451 | { |
452 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
453 | AssertPointerAlignment(ptr, 8); |
454 | #endif |
455 | return pg_atomic_exchange_u64_impl(ptr, newval); |
456 | } |
457 | |
458 | static inline bool |
459 | pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr, |
460 | uint64 *expected, uint64 newval) |
461 | { |
462 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
463 | AssertPointerAlignment(ptr, 8); |
464 | AssertPointerAlignment(expected, 8); |
465 | #endif |
466 | return pg_atomic_compare_exchange_u64_impl(ptr, expected, newval); |
467 | } |
468 | |
469 | static inline uint64 |
470 | pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_) |
471 | { |
472 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
473 | AssertPointerAlignment(ptr, 8); |
474 | #endif |
475 | return pg_atomic_fetch_add_u64_impl(ptr, add_); |
476 | } |
477 | |
478 | static inline uint64 |
479 | pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, int64 sub_) |
480 | { |
481 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
482 | AssertPointerAlignment(ptr, 8); |
483 | #endif |
484 | Assert(sub_ != PG_INT64_MIN); |
485 | return pg_atomic_fetch_sub_u64_impl(ptr, sub_); |
486 | } |
487 | |
488 | static inline uint64 |
489 | pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_) |
490 | { |
491 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
492 | AssertPointerAlignment(ptr, 8); |
493 | #endif |
494 | return pg_atomic_fetch_and_u64_impl(ptr, and_); |
495 | } |
496 | |
497 | static inline uint64 |
498 | pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_) |
499 | { |
500 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
501 | AssertPointerAlignment(ptr, 8); |
502 | #endif |
503 | return pg_atomic_fetch_or_u64_impl(ptr, or_); |
504 | } |
505 | |
506 | static inline uint64 |
507 | pg_atomic_add_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 add_) |
508 | { |
509 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
510 | AssertPointerAlignment(ptr, 8); |
511 | #endif |
512 | return pg_atomic_add_fetch_u64_impl(ptr, add_); |
513 | } |
514 | |
515 | static inline uint64 |
516 | pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 sub_) |
517 | { |
518 | #ifndef PG_HAVE_ATOMIC_U64_SIMULATION |
519 | AssertPointerAlignment(ptr, 8); |
520 | #endif |
521 | Assert(sub_ != PG_INT64_MIN); |
522 | return pg_atomic_sub_fetch_u64_impl(ptr, sub_); |
523 | } |
524 | |
525 | #undef INSIDE_ATOMICS_H |
526 | |
527 | #endif /* ATOMICS_H */ |
528 | |