1 | //===----------------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H |
9 | #define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H |
10 | |
11 | /* cxa_guard_impl.h - Implements the C++ runtime support for function local |
12 | * static guards. |
13 | * The layout of the guard object is the same across ARM and Itanium. |
14 | * |
15 | * The first "guard byte" (which is checked by the compiler) is set only upon |
16 | * the completion of cxa release. |
17 | * |
18 | * The second "init byte" does the rest of the bookkeeping. It tracks if |
19 | * initialization is complete or pending, and if there are waiting threads. |
20 | * |
21 | * If the guard variable is 64-bits and the platforms supplies a 32-bit thread |
22 | * identifier, it is used to detect recursive initialization. The thread ID of |
23 | * the thread currently performing initialization is stored in the second word. |
24 | * |
25 | * Guard Object Layout: |
26 | * ------------------------------------------------------------------------- |
27 | * |a: guard byte | a+1: init byte | a+2 : unused ... | a+4: thread-id ... | |
28 | * ------------------------------------------------------------------------ |
29 | * |
30 | * Access Protocol: |
31 | * For each implementation the guard byte is checked and set before accessing |
32 | * the init byte. |
33 | * |
34 | * Overall Design: |
35 | * The implementation was designed to allow each implementation to be tested |
36 | * independent of the C++ runtime or platform support. |
37 | * |
38 | */ |
39 | |
40 | #include "__cxxabi_config.h" |
41 | #include "include/atomic_support.h" |
42 | #include <unistd.h> |
43 | #include <sys/types.h> |
44 | #if defined(__has_include) |
45 | # if __has_include(<sys/syscall.h>) |
46 | # include <sys/syscall.h> |
47 | # endif |
48 | #endif |
49 | |
50 | #include <stdlib.h> |
51 | #include <__threading_support> |
52 | #ifndef _LIBCXXABI_HAS_NO_THREADS |
53 | #if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB) |
54 | #pragma comment(lib, "pthread") |
55 | #endif |
56 | #endif |
57 | |
58 | // To make testing possible, this header is included from both cxa_guard.cpp |
59 | // and a number of tests. |
60 | // |
61 | // For this reason we place everything in an anonymous namespace -- even though |
62 | // we're in a header. We want the actual implementation and the tests to have |
63 | // unique definitions of the types in this header (since the tests may depend |
64 | // on function local statics). |
65 | // |
66 | // To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be |
67 | // defined when including this file. Only `src/cxa_guard.cpp` should define |
68 | // the former. |
69 | #ifdef BUILDING_CXA_GUARD |
70 | # include "abort_message.h" |
71 | # define ABORT_WITH_MESSAGE(...) ::abort_message(__VA_ARGS__) |
72 | #elif defined(TESTING_CXA_GUARD) |
73 | # define ABORT_WITH_MESSAGE(...) ::abort() |
74 | #else |
75 | # error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined" |
76 | #endif |
77 | |
78 | #if __has_feature(thread_sanitizer) |
79 | extern "C" void __tsan_acquire(void*); |
80 | extern "C" void __tsan_release(void*); |
81 | #else |
82 | #define __tsan_acquire(addr) ((void)0) |
83 | #define __tsan_release(addr) ((void)0) |
84 | #endif |
85 | |
86 | namespace __cxxabiv1 { |
87 | // Use an anonymous namespace to ensure that the tests and actual implementation |
88 | // have unique definitions of these symbols. |
89 | namespace { |
90 | |
91 | //===----------------------------------------------------------------------===// |
92 | // Misc Utilities |
93 | //===----------------------------------------------------------------------===// |
94 | |
95 | template <class T, T(*Init)()> |
96 | struct LazyValue { |
97 | LazyValue() : is_init(false) {} |
98 | |
99 | T& get() { |
100 | if (!is_init) { |
101 | value = Init(); |
102 | is_init = true; |
103 | } |
104 | return value; |
105 | } |
106 | private: |
107 | T value; |
108 | bool is_init = false; |
109 | }; |
110 | |
111 | //===----------------------------------------------------------------------===// |
112 | // PlatformGetThreadID |
113 | //===----------------------------------------------------------------------===// |
114 | |
115 | #if defined(__APPLE__) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) |
116 | uint32_t PlatformThreadID() { |
117 | static_assert(sizeof(mach_port_t) == sizeof(uint32_t), "" ); |
118 | return static_cast<uint32_t>( |
119 | pthread_mach_thread_np(std::__libcpp_thread_get_current_id())); |
120 | } |
121 | #elif defined(SYS_gettid) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) |
122 | uint32_t PlatformThreadID() { |
123 | static_assert(sizeof(pid_t) == sizeof(uint32_t), "" ); |
124 | return static_cast<uint32_t>(syscall(SYS_gettid)); |
125 | } |
126 | #else |
127 | constexpr uint32_t (*PlatformThreadID)() = nullptr; |
128 | #endif |
129 | |
130 | |
131 | constexpr bool PlatformSupportsThreadID() { |
132 | #ifdef __clang__ |
133 | #pragma clang diagnostic push |
134 | #pragma clang diagnostic ignored "-Wtautological-pointer-compare" |
135 | #endif |
136 | return +PlatformThreadID != nullptr; |
137 | #ifdef __clang__ |
138 | #pragma clang diagnostic pop |
139 | #endif |
140 | } |
141 | |
142 | //===----------------------------------------------------------------------===// |
143 | // GuardBase |
144 | //===----------------------------------------------------------------------===// |
145 | |
146 | enum class AcquireResult { |
147 | INIT_IS_DONE, |
148 | INIT_IS_PENDING, |
149 | }; |
150 | constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE; |
151 | constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING; |
152 | |
153 | static constexpr uint8_t UNSET = 0; |
154 | static constexpr uint8_t COMPLETE_BIT = (1 << 0); |
155 | static constexpr uint8_t PENDING_BIT = (1 << 1); |
156 | static constexpr uint8_t WAITING_BIT = (1 << 2); |
157 | |
158 | template <class Derived> |
159 | struct GuardObject { |
160 | GuardObject() = delete; |
161 | GuardObject(GuardObject const&) = delete; |
162 | GuardObject& operator=(GuardObject const&) = delete; |
163 | |
164 | explicit GuardObject(uint32_t* g) |
165 | : base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)), |
166 | init_byte_address(reinterpret_cast<uint8_t*>(g) + 1), |
167 | thread_id_address(nullptr) {} |
168 | |
169 | explicit GuardObject(uint64_t* g) |
170 | : base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)), |
171 | init_byte_address(reinterpret_cast<uint8_t*>(g) + 1), |
172 | thread_id_address(reinterpret_cast<uint32_t*>(g) + 1) {} |
173 | |
174 | public: |
175 | /// Implements __cxa_guard_acquire |
176 | AcquireResult cxa_guard_acquire() { |
177 | AtomicInt<uint8_t> guard_byte(guard_byte_address); |
178 | if (guard_byte.load(std::_AO_Acquire) != UNSET) |
179 | return INIT_IS_DONE; |
180 | return derived()->acquire_init_byte(); |
181 | } |
182 | |
183 | /// Implements __cxa_guard_release |
184 | void cxa_guard_release() { |
185 | AtomicInt<uint8_t> guard_byte(guard_byte_address); |
186 | // Store complete first, so that when release wakes other folks, they see |
187 | // it as having been completed. |
188 | guard_byte.store(COMPLETE_BIT, std::_AO_Release); |
189 | derived()->release_init_byte(); |
190 | } |
191 | |
192 | /// Implements __cxa_guard_abort |
193 | void cxa_guard_abort() { derived()->abort_init_byte(); } |
194 | |
195 | public: |
196 | /// base_address - the address of the original guard object. |
197 | void* const base_address; |
198 | /// The address of the guord byte at offset 0. |
199 | uint8_t* const guard_byte_address; |
200 | /// The address of the byte used by the implementation during initialization. |
201 | uint8_t* const init_byte_address; |
202 | /// An optional address storing an identifier for the thread performing initialization. |
203 | /// It's used to detect recursive initialization. |
204 | uint32_t* const thread_id_address; |
205 | |
206 | private: |
207 | Derived* derived() { return static_cast<Derived*>(this); } |
208 | }; |
209 | |
210 | //===----------------------------------------------------------------------===// |
211 | // Single Threaded Implementation |
212 | //===----------------------------------------------------------------------===// |
213 | |
214 | struct InitByteNoThreads : GuardObject<InitByteNoThreads> { |
215 | using GuardObject::GuardObject; |
216 | |
217 | AcquireResult acquire_init_byte() { |
218 | if (*init_byte_address == COMPLETE_BIT) |
219 | return INIT_IS_DONE; |
220 | if (*init_byte_address & PENDING_BIT) |
221 | ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization" ); |
222 | *init_byte_address = PENDING_BIT; |
223 | return INIT_IS_PENDING; |
224 | } |
225 | |
226 | void release_init_byte() { *init_byte_address = COMPLETE_BIT; } |
227 | void abort_init_byte() { *init_byte_address = UNSET; } |
228 | }; |
229 | |
230 | |
231 | //===----------------------------------------------------------------------===// |
232 | // Global Mutex Implementation |
233 | //===----------------------------------------------------------------------===// |
234 | |
235 | struct LibcppMutex; |
236 | struct LibcppCondVar; |
237 | |
238 | #ifndef _LIBCXXABI_HAS_NO_THREADS |
239 | struct LibcppMutex { |
240 | LibcppMutex() = default; |
241 | LibcppMutex(LibcppMutex const&) = delete; |
242 | LibcppMutex& operator=(LibcppMutex const&) = delete; |
243 | |
244 | bool lock() { return std::__libcpp_mutex_lock(&mutex); } |
245 | bool unlock() { return std::__libcpp_mutex_unlock(&mutex); } |
246 | |
247 | private: |
248 | friend struct LibcppCondVar; |
249 | std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER; |
250 | }; |
251 | |
252 | struct LibcppCondVar { |
253 | LibcppCondVar() = default; |
254 | LibcppCondVar(LibcppCondVar const&) = delete; |
255 | LibcppCondVar& operator=(LibcppCondVar const&) = delete; |
256 | |
257 | bool wait(LibcppMutex& mut) { |
258 | return std::__libcpp_condvar_wait(&cond, &mut.mutex); |
259 | } |
260 | bool broadcast() { return std::__libcpp_condvar_broadcast(&cond); } |
261 | |
262 | private: |
263 | std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER; |
264 | }; |
265 | #else |
266 | struct LibcppMutex {}; |
267 | struct LibcppCondVar {}; |
268 | #endif // !defined(_LIBCXXABI_HAS_NO_THREADS) |
269 | |
270 | |
271 | template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond, |
272 | uint32_t (*GetThreadID)() = PlatformThreadID> |
273 | struct InitByteGlobalMutex |
274 | : GuardObject<InitByteGlobalMutex<Mutex, CondVar, global_mutex, global_cond, |
275 | GetThreadID>> { |
276 | |
277 | using BaseT = typename InitByteGlobalMutex::GuardObject; |
278 | using BaseT::BaseT; |
279 | |
280 | explicit InitByteGlobalMutex(uint32_t *g) |
281 | : BaseT(g), has_thread_id_support(false) {} |
282 | explicit InitByteGlobalMutex(uint64_t *g) |
283 | : BaseT(g), has_thread_id_support(PlatformSupportsThreadID()) {} |
284 | |
285 | public: |
286 | AcquireResult acquire_init_byte() { |
287 | LockGuard g("__cxa_guard_acquire" ); |
288 | // Check for possible recursive initialization. |
289 | if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) { |
290 | if (*thread_id_address == current_thread_id.get()) |
291 | ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization" ); |
292 | } |
293 | |
294 | // Wait until the pending bit is not set. |
295 | while (*init_byte_address & PENDING_BIT) { |
296 | *init_byte_address |= WAITING_BIT; |
297 | global_cond.wait(global_mutex); |
298 | } |
299 | |
300 | if (*init_byte_address == COMPLETE_BIT) |
301 | return INIT_IS_DONE; |
302 | |
303 | if (has_thread_id_support) |
304 | *thread_id_address = current_thread_id.get(); |
305 | |
306 | *init_byte_address = PENDING_BIT; |
307 | return INIT_IS_PENDING; |
308 | } |
309 | |
310 | void release_init_byte() { |
311 | bool has_waiting; |
312 | { |
313 | LockGuard g("__cxa_guard_release" ); |
314 | has_waiting = *init_byte_address & WAITING_BIT; |
315 | *init_byte_address = COMPLETE_BIT; |
316 | } |
317 | if (has_waiting) { |
318 | if (global_cond.broadcast()) { |
319 | ABORT_WITH_MESSAGE("%s failed to broadcast" , "__cxa_guard_release" ); |
320 | } |
321 | } |
322 | } |
323 | |
324 | void abort_init_byte() { |
325 | bool has_waiting; |
326 | { |
327 | LockGuard g("__cxa_guard_abort" ); |
328 | if (has_thread_id_support) |
329 | *thread_id_address = 0; |
330 | has_waiting = *init_byte_address & WAITING_BIT; |
331 | *init_byte_address = UNSET; |
332 | } |
333 | if (has_waiting) { |
334 | if (global_cond.broadcast()) { |
335 | ABORT_WITH_MESSAGE("%s failed to broadcast" , "__cxa_guard_abort" ); |
336 | } |
337 | } |
338 | } |
339 | |
340 | private: |
341 | using BaseT::init_byte_address; |
342 | using BaseT::thread_id_address; |
343 | const bool has_thread_id_support; |
344 | LazyValue<uint32_t, GetThreadID> current_thread_id; |
345 | |
346 | private: |
347 | struct LockGuard { |
348 | LockGuard() = delete; |
349 | LockGuard(LockGuard const&) = delete; |
350 | LockGuard& operator=(LockGuard const&) = delete; |
351 | |
352 | explicit LockGuard(const char* calling_func) |
353 | : calling_func(calling_func) { |
354 | if (global_mutex.lock()) |
355 | ABORT_WITH_MESSAGE("%s failed to acquire mutex" , calling_func); |
356 | } |
357 | |
358 | ~LockGuard() { |
359 | if (global_mutex.unlock()) |
360 | ABORT_WITH_MESSAGE("%s failed to release mutex" , calling_func); |
361 | } |
362 | |
363 | private: |
364 | const char* const calling_func; |
365 | }; |
366 | }; |
367 | |
368 | //===----------------------------------------------------------------------===// |
369 | // Futex Implementation |
370 | //===----------------------------------------------------------------------===// |
371 | |
372 | #if defined(SYS_futex) |
373 | void PlatformFutexWait(int* addr, int expect) { |
374 | constexpr int WAIT = 0; |
375 | syscall(SYS_futex, addr, WAIT, expect, 0); |
376 | __tsan_acquire(addr); |
377 | } |
378 | void PlatformFutexWake(int* addr) { |
379 | constexpr int WAKE = 1; |
380 | __tsan_release(addr); |
381 | syscall(SYS_futex, addr, WAKE, INT_MAX); |
382 | } |
383 | #else |
384 | constexpr void (*PlatformFutexWait)(int*, int) = nullptr; |
385 | constexpr void (*PlatformFutexWake)(int*) = nullptr; |
386 | #endif |
387 | |
388 | constexpr bool PlatformSupportsFutex() { |
389 | #ifdef __clang__ |
390 | #pragma clang diagnostic push |
391 | #pragma clang diagnostic ignored "-Wtautological-pointer-compare" |
392 | #endif |
393 | return +PlatformFutexWait != nullptr; |
394 | #ifdef __clang__ |
395 | #pragma clang diagnostic pop |
396 | #endif |
397 | } |
398 | |
399 | /// InitByteFutex - Manages initialization using atomics and the futex syscall |
400 | /// for waiting and waking. |
401 | template <void (*Wait)(int*, int) = PlatformFutexWait, |
402 | void (*Wake)(int*) = PlatformFutexWake, |
403 | uint32_t (*GetThreadIDArg)() = PlatformThreadID> |
404 | struct InitByteFutex : GuardObject<InitByteFutex<Wait, Wake, GetThreadIDArg>> { |
405 | using BaseT = typename InitByteFutex::GuardObject; |
406 | |
407 | /// ARM Constructor |
408 | explicit InitByteFutex(uint32_t *g) : BaseT(g), |
409 | init_byte(this->init_byte_address), |
410 | has_thread_id_support(this->thread_id_address && GetThreadIDArg), |
411 | thread_id(this->thread_id_address) {} |
412 | |
413 | /// Itanium Constructor |
414 | explicit InitByteFutex(uint64_t *g) : BaseT(g), |
415 | init_byte(this->init_byte_address), |
416 | has_thread_id_support(this->thread_id_address && GetThreadIDArg), |
417 | thread_id(this->thread_id_address) {} |
418 | |
419 | public: |
420 | AcquireResult acquire_init_byte() { |
421 | while (true) { |
422 | uint8_t last_val = UNSET; |
423 | if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel, |
424 | std::_AO_Acquire)) { |
425 | if (has_thread_id_support) { |
426 | thread_id.store(current_thread_id.get(), std::_AO_Relaxed); |
427 | } |
428 | return INIT_IS_PENDING; |
429 | } |
430 | |
431 | if (last_val == COMPLETE_BIT) |
432 | return INIT_IS_DONE; |
433 | |
434 | if (last_val & PENDING_BIT) { |
435 | |
436 | // Check for recursive initialization |
437 | if (has_thread_id_support && thread_id.load(std::_AO_Relaxed) == current_thread_id.get()) { |
438 | ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization" ); |
439 | } |
440 | |
441 | if ((last_val & WAITING_BIT) == 0) { |
442 | // This compare exchange can fail for several reasons |
443 | // (1) another thread finished the whole thing before we got here |
444 | // (2) another thread set the waiting bit we were trying to thread |
445 | // (3) another thread had an exception and failed to finish |
446 | if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT, |
447 | std::_AO_Acq_Rel, std::_AO_Release)) { |
448 | // (1) success, via someone else's work! |
449 | if (last_val == COMPLETE_BIT) |
450 | return INIT_IS_DONE; |
451 | |
452 | // (3) someone else, bailed on doing the work, retry from the start! |
453 | if (last_val == UNSET) |
454 | continue; |
455 | |
456 | // (2) the waiting bit got set, so we are happy to keep waiting |
457 | } |
458 | } |
459 | wait_on_initialization(); |
460 | } |
461 | } |
462 | } |
463 | |
464 | void release_init_byte() { |
465 | uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel); |
466 | if (old & WAITING_BIT) |
467 | wake_all(); |
468 | } |
469 | |
470 | void abort_init_byte() { |
471 | if (has_thread_id_support) |
472 | thread_id.store(0, std::_AO_Relaxed); |
473 | |
474 | uint8_t old = init_byte.exchange(0, std::_AO_Acq_Rel); |
475 | if (old & WAITING_BIT) |
476 | wake_all(); |
477 | } |
478 | |
479 | private: |
480 | /// Use the futex to wait on the current guard variable. Futex expects a |
481 | /// 32-bit 4-byte aligned address as the first argument, so we have to use use |
482 | /// the base address of the guard variable (not the init byte). |
483 | void wait_on_initialization() { |
484 | Wait(static_cast<int*>(this->base_address), |
485 | expected_value_for_futex(PENDING_BIT | WAITING_BIT)); |
486 | } |
487 | void wake_all() { Wake(static_cast<int*>(this->base_address)); } |
488 | |
489 | private: |
490 | AtomicInt<uint8_t> init_byte; |
491 | |
492 | const bool has_thread_id_support; |
493 | // Unsafe to use unless has_thread_id_support |
494 | AtomicInt<uint32_t> thread_id; |
495 | LazyValue<uint32_t, GetThreadIDArg> current_thread_id; |
496 | |
497 | /// Create the expected integer value for futex `wait(int* addr, int expected)`. |
498 | /// We pass the base address as the first argument, So this function creates |
499 | /// an zero-initialized integer with `b` copied at the correct offset. |
500 | static int expected_value_for_futex(uint8_t b) { |
501 | int dest_val = 0; |
502 | std::memcpy(reinterpret_cast<char*>(&dest_val) + 1, &b, 1); |
503 | return dest_val; |
504 | } |
505 | |
506 | static_assert(Wait != nullptr && Wake != nullptr, "" ); |
507 | }; |
508 | |
509 | //===----------------------------------------------------------------------===// |
510 | // |
511 | //===----------------------------------------------------------------------===// |
512 | |
513 | template <class T> |
514 | struct GlobalStatic { |
515 | static T instance; |
516 | }; |
517 | template <class T> |
518 | _LIBCPP_SAFE_STATIC T GlobalStatic<T>::instance = {}; |
519 | |
520 | enum class Implementation { |
521 | NoThreads, |
522 | GlobalLock, |
523 | Futex |
524 | }; |
525 | |
526 | template <Implementation Impl> |
527 | struct SelectImplementation; |
528 | |
529 | template <> |
530 | struct SelectImplementation<Implementation::NoThreads> { |
531 | using type = InitByteNoThreads; |
532 | }; |
533 | |
534 | template <> |
535 | struct SelectImplementation<Implementation::GlobalLock> { |
536 | using type = InitByteGlobalMutex< |
537 | LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance, |
538 | GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>; |
539 | }; |
540 | |
541 | template <> |
542 | struct SelectImplementation<Implementation::Futex> { |
543 | using type = |
544 | InitByteFutex<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>; |
545 | }; |
546 | |
547 | // TODO(EricWF): We should prefer the futex implementation when available. But |
548 | // it should be done in a separate step from adding the implementation. |
549 | constexpr Implementation CurrentImplementation = |
550 | #if defined(_LIBCXXABI_HAS_NO_THREADS) |
551 | Implementation::NoThreads; |
552 | #elif defined(_LIBCXXABI_USE_FUTEX) |
553 | Implementation::Futex; |
554 | #else |
555 | Implementation::GlobalLock; |
556 | #endif |
557 | |
558 | static_assert(CurrentImplementation != Implementation::Futex |
559 | || PlatformSupportsFutex(), "Futex selected but not supported" ); |
560 | |
561 | using SelectedImplementation = |
562 | SelectImplementation<CurrentImplementation>::type; |
563 | |
564 | } // end namespace |
565 | } // end namespace __cxxabiv1 |
566 | |
567 | #endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H |
568 | |