cxa_guard_impl.h source code [ClickHouse/contrib/libcxxabi/src/cxa_guard_impl.h]

1	//===----------------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	#ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
9	#define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
10
11	/ cxa_guard_impl.h - Implements the C++ runtime support for function local*
12	* static guards.
13	* The layout of the guard object is the same across ARM and Itanium.
14	*
15	* The first "guard byte" (which is checked by the compiler) is set only upon
16	* the completion of cxa release.
17	*
18	* The second "init byte" does the rest of the bookkeeping. It tracks if
19	* initialization is complete or pending, and if there are waiting threads.
20	*
21	* If the guard variable is 64-bits and the platforms supplies a 32-bit thread
22	* identifier, it is used to detect recursive initialization. The thread ID of
23	* the thread currently performing initialization is stored in the second word.
24	*
25	* Guard Object Layout:
26	* -------------------------------------------------------------------------
27	* \|a: guard byte \| a+1: init byte \| a+2 : unused ... \| a+4: thread-id ... \|
28	* ------------------------------------------------------------------------
29	*
30	* Access Protocol:
31	* For each implementation the guard byte is checked and set before accessing
32	* the init byte.
33	*
34	* Overall Design:
35	* The implementation was designed to allow each implementation to be tested
36	* independent of the C++ runtime or platform support.
37	*
38	*/
39
40	#include "__cxxabi_config.h"
41	#include "include/atomic_support.h"
42	#include <unistd.h>
43	#include <sys/types.h>
44	#if defined(__has_include)
45	# if __has_include(<sys/syscall.h>)
46	# include <sys/syscall.h>
47	# endif
48	#endif
49
50	#include <stdlib.h>
51	#include <__threading_support>
52	#ifndef _LIBCXXABI_HAS_NO_THREADS
53	#if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB)
54	#pragma comment(lib, "pthread")
55	#endif
56	#endif
57
58	// To make testing possible, this header is included from both cxa_guard.cpp
59	// and a number of tests.
60	//
61	// For this reason we place everything in an anonymous namespace -- even though
62	// we're in a header. We want the actual implementation and the tests to have
63	// unique definitions of the types in this header (since the tests may depend
64	// on function local statics).
65	//
66	// To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be
67	// defined when including this file. Only `src/cxa_guard.cpp` should define
68	// the former.
69	#ifdef BUILDING_CXA_GUARD
70	# include "abort_message.h"
71	# define ABORT_WITH_MESSAGE(...) ::abort_message(__VA_ARGS__)
72	#elif defined(TESTING_CXA_GUARD)
73	# define ABORT_WITH_MESSAGE(...) ::abort()
74	#else
75	# error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined"
76	#endif
77
78	#if __has_feature(thread_sanitizer)
79	extern "C" void __tsan_acquire(void*);
80	extern "C" void __tsan_release(void*);
81	#else
82	#define __tsan_acquire(addr) ((void)0)
83	#define __tsan_release(addr) ((void)0)
84	#endif
85
86	namespace __cxxabiv1 {
87	// Use an anonymous namespace to ensure that the tests and actual implementation
88	// have unique definitions of these symbols.
89	namespace {
90
91	//===----------------------------------------------------------------------===//
92	// Misc Utilities
93	//===----------------------------------------------------------------------===//
94
95	template <class T, T(*Init)()>
96	struct LazyValue {
97	LazyValue() : is_init(false) {}
98
99	T& get() {
100	if (!is_init) {
101	value = Init();
102	is_init = true;
103	}
104	return value;
105	}
106	private:
107	T value;
108	bool is_init = false;
109	};
110
111	//===----------------------------------------------------------------------===//
112	// PlatformGetThreadID
113	//===----------------------------------------------------------------------===//
114
115	#if defined(__APPLE__) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
116	uint32_t PlatformThreadID() {
117	static_assert(sizeof(mach_port_t) == sizeof(uint32_t), "");
118	return static_cast<uint32_t>(
119	pthread_mach_thread_np(std::__libcpp_thread_get_current_id()));
120	}
121	#elif defined(SYS_gettid) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
122	uint32_t PlatformThreadID() {
123	static_assert(sizeof(pid_t) == sizeof(uint32_t), "");
124	return static_cast<uint32_t>(syscall(SYS_gettid));
125	}
126	#else
127	constexpr uint32_t (PlatformThreadID)() = nullptr*;
128	#endif
129
130
131	constexpr bool PlatformSupportsThreadID() {
132	#ifdef __clang__
133	#pragma clang diagnostic push
134	#pragma clang diagnostic ignored "-Wtautological-pointer-compare"
135	#endif
136	return +PlatformThreadID != nullptr;
137	#ifdef __clang__
138	#pragma clang diagnostic pop
139	#endif
140	}
141
142	//===----------------------------------------------------------------------===//
143	// GuardBase
144	//===----------------------------------------------------------------------===//
145
146	enum class AcquireResult {
147	INIT_IS_DONE,
148	INIT_IS_PENDING,
149	};
150	constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE;
151	constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING;
152
153	static constexpr uint8_t UNSET = `0`;
154	static constexpr uint8_t COMPLETE_BIT = (`1` << `0`);
155	static constexpr uint8_t PENDING_BIT = (`1` << `1`);
156	static constexpr uint8_t WAITING_BIT = (`1` << `2`);
157
158	template <class Derived>
159	struct GuardObject {
160	GuardObject() = delete;
161	GuardObject(GuardObject const&) = delete;
162	GuardObject& operator=(GuardObject const&) = delete;
163
164	explicit GuardObject(uint32_t* g)
165	: base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
166	init_byte_address(reinterpret_cast<uint8_t*>(g) + `1`),
167	thread_id_address(nullptr) {}
168
169	explicit GuardObject(uint64_t* g)
170	: base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
171	init_byte_address(reinterpret_cast<uint8_t*>(g) + `1`),
172	thread_id_address(reinterpret_cast<uint32_t*>(g) + `1`) {}
173
174	public:
175	/// Implements __cxa_guard_acquire
176	AcquireResult cxa_guard_acquire() {
177	AtomicInt<uint8_t> guard_byte(guard_byte_address);
178	if (guard_byte.load(std::_AO_Acquire) != UNSET)
179	return INIT_IS_DONE;
180	return derived()->acquire_init_byte();
181	}
182
183	/// Implements __cxa_guard_release
184	void cxa_guard_release() {
185	AtomicInt<uint8_t> guard_byte(guard_byte_address);
186	// Store complete first, so that when release wakes other folks, they see
187	// it as having been completed.
188	guard_byte.store(COMPLETE_BIT, std::_AO_Release);
189	derived()->release_init_byte();
190	}
191
192	/// Implements __cxa_guard_abort
193	void cxa_guard_abort() { derived()->abort_init_byte(); }
194
195	public:
196	/// base_address - the address of the original guard object.
197	void* const base_address;
198	/// The address of the guord byte at offset 0.
199	uint8_t* const guard_byte_address;
200	/// The address of the byte used by the implementation during initialization.
201	uint8_t* const init_byte_address;
202	/// An optional address storing an identifier for the thread performing initialization.
203	/// It's used to detect recursive initialization.
204	uint32_t* const thread_id_address;
205
206	private:
207	Derived* derived() { return static_cast<Derived>(this*); }
208	};
209
210	//===----------------------------------------------------------------------===//
211	// Single Threaded Implementation
212	//===----------------------------------------------------------------------===//
213
214	struct InitByteNoThreads : GuardObject<InitByteNoThreads> {
215	using GuardObject::GuardObject;
216
217	AcquireResult acquire_init_byte() {
218	if (*init_byte_address == COMPLETE_BIT)
219	return INIT_IS_DONE;
220	if (*init_byte_address & PENDING_BIT)
221	ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
222	*init_byte_address = PENDING_BIT;
223	return INIT_IS_PENDING;
224	}
225
226	void release_init_byte() { *init_byte_address = COMPLETE_BIT; }
227	void abort_init_byte() { *init_byte_address = UNSET; }
228	};
229
230
231	//===----------------------------------------------------------------------===//
232	// Global Mutex Implementation
233	//===----------------------------------------------------------------------===//
234
235	struct LibcppMutex;
236	struct LibcppCondVar;
237
238	#ifndef _LIBCXXABI_HAS_NO_THREADS
239	struct LibcppMutex {
240	LibcppMutex() = default;
241	LibcppMutex(LibcppMutex const&) = delete;
242	LibcppMutex& operator=(LibcppMutex const&) = delete;
243
244	bool lock() { return std::__libcpp_mutex_lock(&mutex); }
245	bool unlock() { return std::__libcpp_mutex_unlock(&mutex); }
246
247	private:
248	friend struct LibcppCondVar;
249	std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER;
250	};
251
252	struct LibcppCondVar {
253	LibcppCondVar() = default;
254	LibcppCondVar(LibcppCondVar const&) = delete;
255	LibcppCondVar& operator=(LibcppCondVar const&) = delete;
256
257	bool wait(LibcppMutex& mut) {
258	return std::__libcpp_condvar_wait(&cond, &mut.mutex);
259	}
260	bool broadcast() { return std::__libcpp_condvar_broadcast(&cond); }
261
262	private:
263	std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER;
264	};
265	#else
266	struct LibcppMutex {};
267	struct LibcppCondVar {};
268	#endif // !defined(_LIBCXXABI_HAS_NO_THREADS)
269
270
271	template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond,
272	uint32_t (*GetThreadID)() = PlatformThreadID>
273	struct InitByteGlobalMutex
274	: GuardObject<InitByteGlobalMutex<Mutex, CondVar, global_mutex, global_cond,
275	GetThreadID>> {
276
277	using BaseT = typename InitByteGlobalMutex::GuardObject;
278	using BaseT::BaseT;
279
280	explicit InitByteGlobalMutex(uint32_t *g)
281	: BaseT(g), has_thread_id_support(false) {}
282	explicit InitByteGlobalMutex(uint64_t *g)
283	: BaseT(g), has_thread_id_support(PlatformSupportsThreadID()) {}
284
285	public:
286	AcquireResult acquire_init_byte() {
287	LockGuard g("__cxa_guard_acquire");
288	// Check for possible recursive initialization.
289	if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) {
290	if (*thread_id_address == current_thread_id.get())
291	ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
292	}
293
294	// Wait until the pending bit is not set.
295	while (*init_byte_address & PENDING_BIT) {
296	*init_byte_address \|= WAITING_BIT;
297	global_cond.wait(global_mutex);
298	}
299
300	if (*init_byte_address == COMPLETE_BIT)
301	return INIT_IS_DONE;
302
303	if (has_thread_id_support)
304	*thread_id_address = current_thread_id.get();
305
306	*init_byte_address = PENDING_BIT;
307	return INIT_IS_PENDING;
308	}
309
310	void release_init_byte() {
311	bool has_waiting;
312	{
313	LockGuard g("__cxa_guard_release");
314	has_waiting = *init_byte_address & WAITING_BIT;
315	*init_byte_address = COMPLETE_BIT;
316	}
317	if (has_waiting) {
318	if (global_cond.broadcast()) {
319	ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_release");
320	}
321	}
322	}
323
324	void abort_init_byte() {
325	bool has_waiting;
326	{
327	LockGuard g("__cxa_guard_abort");
328	if (has_thread_id_support)
329	*thread_id_address = `0`;
330	has_waiting = *init_byte_address & WAITING_BIT;
331	*init_byte_address = UNSET;
332	}
333	if (has_waiting) {
334	if (global_cond.broadcast()) {
335	ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_abort");
336	}
337	}
338	}
339
340	private:
341	using BaseT::init_byte_address;
342	using BaseT::thread_id_address;
343	const bool has_thread_id_support;
344	LazyValue<uint32_t, GetThreadID> current_thread_id;
345
346	private:
347	struct LockGuard {
348	LockGuard() = delete;
349	LockGuard(LockGuard const&) = delete;
350	LockGuard& operator=(LockGuard const&) = delete;
351
352	explicit LockGuard(const char* calling_func)
353	: calling_func(calling_func) {
354	if (global_mutex.lock())
355	ABORT_WITH_MESSAGE("%s failed to acquire mutex", calling_func);
356	}
357
358	~LockGuard() {
359	if (global_mutex.unlock())
360	ABORT_WITH_MESSAGE("%s failed to release mutex", calling_func);
361	}
362
363	private:
364	const char* const calling_func;
365	};
366	};
367
368	//===----------------------------------------------------------------------===//
369	// Futex Implementation
370	//===----------------------------------------------------------------------===//
371
372	#if defined(SYS_futex)
373	void PlatformFutexWait(int* addr, int expect) {
374	constexpr int WAIT = `0`;
375	syscall(SYS_futex, addr, WAIT, expect, `0`);
376	__tsan_acquire(addr);
377	}
378	void PlatformFutexWake(int* addr) {
379	constexpr int WAKE = `1`;
380	__tsan_release(addr);
381	syscall(SYS_futex, addr, WAKE, INT_MAX);
382	}
383	#else
384	constexpr void (PlatformFutexWait)(int*, int) = nullptr*;
385	constexpr void (PlatformFutexWake)(int) = nullptr**;
386	#endif
387
388	constexpr bool PlatformSupportsFutex() {
389	#ifdef __clang__
390	#pragma clang diagnostic push
391	#pragma clang diagnostic ignored "-Wtautological-pointer-compare"
392	#endif
393	return +PlatformFutexWait != nullptr;
394	#ifdef __clang__
395	#pragma clang diagnostic pop
396	#endif
397	}
398
399	/// InitByteFutex - Manages initialization using atomics and the futex syscall
400	/// for waiting and waking.
401	template <void (Wait)(int*, int*) = PlatformFutexWait,
402	void (Wake)(int**) = PlatformFutexWake,
403	uint32_t (*GetThreadIDArg)() = PlatformThreadID>
404	struct InitByteFutex : GuardObject<InitByteFutex<Wait, Wake, GetThreadIDArg>> {
405	using BaseT = typename InitByteFutex::GuardObject;
406
407	/// ARM Constructor
408	explicit InitByteFutex(uint32_t *g) : BaseT(g),
409	init_byte(this->init_byte_address),
410	has_thread_id_support(this->thread_id_address && GetThreadIDArg),
411	thread_id(this->thread_id_address) {}
412
413	/// Itanium Constructor
414	explicit InitByteFutex(uint64_t *g) : BaseT(g),
415	init_byte(this->init_byte_address),
416	has_thread_id_support(this->thread_id_address && GetThreadIDArg),
417	thread_id(this->thread_id_address) {}
418
419	public:
420	AcquireResult acquire_init_byte() {
421	while (true) {
422	uint8_t last_val = UNSET;
423	if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel,
424	std::_AO_Acquire)) {
425	if (has_thread_id_support) {
426	thread_id.store(current_thread_id.get(), std::_AO_Relaxed);
427	}
428	return INIT_IS_PENDING;
429	}
430
431	if (last_val == COMPLETE_BIT)
432	return INIT_IS_DONE;
433
434	if (last_val & PENDING_BIT) {
435
436	// Check for recursive initialization
437	if (has_thread_id_support && thread_id.load(std::_AO_Relaxed) == current_thread_id.get()) {
438	ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
439	}
440
441	if ((last_val & WAITING_BIT) == `0`) {
442	// This compare exchange can fail for several reasons
443	// (1) another thread finished the whole thing before we got here
444	// (2) another thread set the waiting bit we were trying to thread
445	// (3) another thread had an exception and failed to finish
446	if (!init_byte.compare_exchange(&last_val, PENDING_BIT \| WAITING_BIT,
447	std::_AO_Acq_Rel, std::_AO_Release)) {
448	// (1) success, via someone else's work!
449	if (last_val == COMPLETE_BIT)
450	return INIT_IS_DONE;
451
452	// (3) someone else, bailed on doing the work, retry from the start!
453	if (last_val == UNSET)
454	continue;
455
456	// (2) the waiting bit got set, so we are happy to keep waiting
457	}
458	}
459	wait_on_initialization();
460	}
461	}
462	}
463
464	void release_init_byte() {
465	uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel);
466	if (old & WAITING_BIT)
467	wake_all();
468	}
469
470	void abort_init_byte() {
471	if (has_thread_id_support)
472	thread_id.store(`0`, std::_AO_Relaxed);
473
474	uint8_t old = init_byte.exchange(`0`, std::_AO_Acq_Rel);
475	if (old & WAITING_BIT)
476	wake_all();
477	}
478
479	private:
480	/// Use the futex to wait on the current guard variable. Futex expects a
481	/// 32-bit 4-byte aligned address as the first argument, so we have to use use
482	/// the base address of the guard variable (not the init byte).
483	void wait_on_initialization() {
484	Wait(static_cast<int>(this*->base_address),
485	expected_value_for_futex(PENDING_BIT \| WAITING_BIT));
486	}
487	void wake_all() { Wake(static_cast<int>(this*->base_address)); }
488
489	private:
490	AtomicInt<uint8_t> init_byte;
491
492	const bool has_thread_id_support;
493	// Unsafe to use unless has_thread_id_support
494	AtomicInt<uint32_t> thread_id;
495	LazyValue<uint32_t, GetThreadIDArg> current_thread_id;
496
497	/// Create the expected integer value for futex `wait(int addr, int expected)`.*
498	/// We pass the base address as the first argument, So this function creates
499	/// an zero-initialized integer with `b` copied at the correct offset.
500	static int expected_value_for_futex(uint8_t b) {
501	int dest_val = `0`;
502	std::memcpy(reinterpret_cast<char*>(&dest_val) + `1`, &b, `1`);
503	return dest_val;
504	}
505
506	static_assert(Wait != nullptr && Wake != nullptr, "");
507	};
508
509	//===----------------------------------------------------------------------===//
510	//
511	//===----------------------------------------------------------------------===//
512
513	template <class T>
514	struct GlobalStatic {
515	static T instance;
516	};
517	template <class T>
518	_LIBCPP_SAFE_STATIC T GlobalStatic<T>::instance = {};
519
520	enum class Implementation {
521	NoThreads,
522	GlobalLock,
523	Futex
524	};
525
526	template <Implementation Impl>
527	struct SelectImplementation;
528
529	template <>
530	struct SelectImplementation<Implementation::NoThreads> {
531	using type = InitByteNoThreads;
532	};
533
534	template <>
535	struct SelectImplementation<Implementation::GlobalLock> {
536	using type = InitByteGlobalMutex<
537	LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance,
538	GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>;
539	};
540
541	template <>
542	struct SelectImplementation<Implementation::Futex> {
543	using type =
544	InitByteFutex<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>;
545	};
546
547	// TODO(EricWF): We should prefer the futex implementation when available. But
548	// it should be done in a separate step from adding the implementation.
549	constexpr Implementation CurrentImplementation =
550	#if defined(_LIBCXXABI_HAS_NO_THREADS)
551	Implementation::NoThreads;
552	#elif defined(_LIBCXXABI_USE_FUTEX)
553	Implementation::Futex;
554	#else
555	Implementation::GlobalLock;
556	#endif
557
558	static_assert(CurrentImplementation != Implementation::Futex
559	\|\| PlatformSupportsFutex(), "Futex selected but not supported");
560
561	using SelectedImplementation =
562	SelectImplementation<CurrentImplementation>::type;
563
564	} // end namespace
565	} // end namespace __cxxabiv1
566
567	#endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
568

Browse the source code of ClickHouse/contrib/libcxxabi/src/cxa_guard_impl.h