ThreadLocalDetail.h source code [folly/detail/ThreadLocalDetail.h]

1	/*
2	* Copyright 2011-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#pragma once
18
19	#include <limits.h>
20
21	#include <atomic>
22	#include <functional>
23	#include <mutex>
24	#include <string>
25	#include <vector>
26
27	#include <glog/logging.h>
28
29	#include <folly/Exception.h>
30	#include <folly/Function.h>
31	#include <folly/Portability.h>
32	#include <folly/ScopeGuard.h>
33	#include <folly/SharedMutex.h>
34	#include <folly/container/Foreach.h>
35	#include <folly/detail/AtFork.h>
36	#include <folly/memory/Malloc.h>
37	#include <folly/portability/PThread.h>
38	#include <folly/synchronization/MicroSpinLock.h>
39
40	#include <folly/detail/StaticSingletonManager.h>
41
42	// In general, emutls cleanup is not guaranteed to play nice with the way
43	// StaticMeta mixes direct pthread calls and the use of __thread. This has
44	// caused problems on multiple platforms so don't use __thread there.
45	//
46	// XXX: Ideally we would instead determine if emutls is in use at runtime as it
47	// is possible to configure glibc on Linux to use emutls regardless.
48	#if !FOLLY_MOBILE && !defined(__APPLE__) && !defined(_MSC_VER)
49	#define FOLLY_TLD_USE_FOLLY_TLS 1
50	#else
51	#undef FOLLY_TLD_USE_FOLLY_TLS
52	#endif
53
54	namespace folly {
55
56	enum class TLPDestructionMode { THIS_THREAD, ALL_THREADS };
57	struct AccessModeStrict {};
58
59	namespace threadlocal_detail {
60
61	constexpr uint32_t kEntryIDInvalid = std::numeric_limits<uint32_t>::max();
62
63	struct ThreadEntry;
64	/ This represents a node in doubly linked list where all the nodes*
65	* are part of an ElementWrapper struct that has the same id.
66	* we cannot use prev and next as ThreadEntryNode pointers since the
67	* ThreadEntry::elements can be reallocated and the pointers will change
68	* in this case. So we keep a pointer to the parent ThreadEntry struct
69	* one for the prev and next and also the id.
70	* We will traverse and update the list only when holding the
71	* StaticMetaBase::lock_
72	*/
73	struct ThreadEntryNode {
74	uint32_t id;
75	ThreadEntry* parent;
76	ThreadEntry* prev;
77	ThreadEntry* next;
78
79	void initIfZero(bool locked);
80
81	void init(ThreadEntry* entry, uint32_t newId) {
82	id = newId;
83	parent = prev = next = entry;
84	}
85
86	void initZero(ThreadEntry* entry, uint32_t newId) {
87	id = newId;
88	parent = entry;
89	prev = next = nullptr;
90	}
91
92	// if the list this node is part of is empty
93	FOLLY_ALWAYS_INLINE bool empty() const {
94	return (next == parent);
95	}
96
97	FOLLY_ALWAYS_INLINE bool zero() const {
98	return (!prev);
99	}
100
101	FOLLY_ALWAYS_INLINE ThreadEntry* getThreadEntry() {
102	return parent;
103	}
104
105	FOLLY_ALWAYS_INLINE ThreadEntryNode* getPrev();
106
107	FOLLY_ALWAYS_INLINE ThreadEntryNode* getNext();
108
109	void push_back(ThreadEntry* head);
110
111	void eraseZero();
112	};
113
114	/**
115	* POD wrapper around an element (a void*) and an associated deleter.
116	* This must be POD, as we memset() it to 0 and memcpy() it around.
117	*/
118	struct ElementWrapper {
119	using DeleterFunType = void(void*, TLPDestructionMode);
120
121	bool dispose(TLPDestructionMode mode) {
122	if (ptr == nullptr) {
123	return false;
124	}
125
126	DCHECK(deleter1 != nullptr);
127	ownsDeleter ? (deleter2)(ptr, mode) : (deleter1)(ptr, mode);
128	return true;
129	}
130
131	void* release() {
132	auto retPtr = ptr;
133
134	if (ptr != nullptr) {
135	cleanup();
136	}
137
138	return retPtr;
139	}
140
141	template <class Ptr>
142	void set(Ptr p) {
143	auto guard = makeGuard([&] { delete p; });
144	DCHECK(ptr == nullptr);
145	DCHECK(deleter1 == nullptr);
146
147	if (p) {
148	node.initIfZero(true /locked/);
149	ptr = p;
150	deleter1 = [](void* pt, TLPDestructionMode) {
151	delete static_cast<Ptr>(pt);
152	};
153	ownsDeleter = false;
154	guard.dismiss();
155	}
156	}
157
158	template <class Ptr, class Deleter>
159	void set(Ptr p, const Deleter& d) {
160	auto guard = makeGuard([&] {
161	if (p) {
162	d(p, TLPDestructionMode::THIS_THREAD);
163	}
164	});
165	DCHECK(ptr == nullptr);
166	DCHECK(deleter2 == nullptr);
167	if (p) {
168	node.initIfZero(true /locked/);
169	ptr = p;
170	auto d2 = d; // gcc-4.8 doesn't decay types correctly in lambda captures
171	deleter2 = new std::function<DeleterFunType>(
172	[d2](void* pt, TLPDestructionMode mode) {
173	d2(static_cast<Ptr>(pt), mode);
174	});
175	ownsDeleter = true;
176	guard.dismiss();
177	}
178	}
179
180	void cleanup() {
181	if (ownsDeleter) {
182	delete deleter2;
183	}
184	ptr = nullptr;
185	deleter1 = nullptr;
186	ownsDeleter = false;
187	}
188
189	void* ptr;
190	union {
191	DeleterFunType* deleter1;
192	std::function<DeleterFunType>* deleter2;
193	};
194	bool ownsDeleter;
195	ThreadEntryNode node;
196	};
197
198	struct StaticMetaBase;
199	struct ThreadEntryList;
200
201	/**
202	* Per-thread entry. Each thread using a StaticMeta object has one.
203	* This is written from the owning thread only (under the lock), read
204	* from the owning thread (no lock necessary), and read from other threads
205	* (under the lock).
206	* StaticMetaBase::head_ elementsCapacity can be read from any thread on
207	* reallocate (no lock)
208	*/
209	struct ThreadEntry {
210	ElementWrapper* elements{nullptr};
211	std::atomic<size_t> elementsCapacity{`0`};
212	ThreadEntry* next{nullptr};
213	ThreadEntry* prev{nullptr};
214	ThreadEntryList* list{nullptr};
215	ThreadEntry* listNext{nullptr};
216	StaticMetaBase* meta{nullptr};
217	bool removed_{false};
218
219	size_t getElementsCapacity() const noexcept {
220	return elementsCapacity.load(std::memory_order_relaxed);
221	}
222
223	void setElementsCapacity(size_t capacity) noexcept {
224	elementsCapacity.store(capacity, std::memory_order_relaxed);
225	}
226	};
227
228	struct ThreadEntryList {
229	ThreadEntry* head{nullptr};
230	size_t count{`0`};
231	};
232
233	struct PthreadKeyUnregisterTester;
234
235	FOLLY_ALWAYS_INLINE ThreadEntryNode* ThreadEntryNode::getPrev() {
236	return &prev->elements[id].node;
237	}
238
239	FOLLY_ALWAYS_INLINE ThreadEntryNode* ThreadEntryNode::getNext() {
240	return &next->elements[id].node;
241	}
242
243	/**
244	* We want to disable onThreadExit call at the end of shutdown, we don't care
245	* about leaking memory at that point.
246	*
247	* Otherwise if ThreadLocal is used in a shared library, onThreadExit may be
248	* called after dlclose().
249	*
250	* This class has one single static instance; however since it's so widely used,
251	* directly or indirectly, by so many classes, we need to take care to avoid
252	* problems stemming from the Static Initialization/Destruction Order Fiascos.
253	* Therefore this class needs to be constexpr-constructible, so as to avoid
254	* the need for this to participate in init/destruction order.
255	*/
256	class PthreadKeyUnregister {
257	public:
258	static constexpr size_t kMaxKeys = `1UL` << `16`;
259
260	~PthreadKeyUnregister() {
261	// If static constructor priorities are not supported then
262	// ~PthreadKeyUnregister logic is not safe.
263	#if !defined(__APPLE__) && !defined(_MSC_VER)
264	MSLGuard lg(lock_);
265	while (size_) {
266	pthread_key_delete(keys_[--size_]);
267	}
268	#endif
269	}
270
271	static void registerKey(pthread_key_t key) {
272	instance_.registerKeyImpl(key);
273	}
274
275	private:
276	/**
277	* Only one global instance should exist, hence this is private.
278	* See also the important note at the top of this class about `constexpr`
279	* usage.
280	*/
281	constexpr PthreadKeyUnregister() : lock_(), size_(`0`), keys_() {}
282	friend struct folly::threadlocal_detail::PthreadKeyUnregisterTester;
283
284	void registerKeyImpl(pthread_key_t key) {
285	MSLGuard lg(lock_);
286	if (size_ == kMaxKeys) {
287	throw std::logic_error ("pthread_key limit has already been reached");
288	}
289	keys_[size_++] = key;
290	}
291
292	MicroSpinLock lock_;
293	size_t size_;
294	pthread_key_t keys_[kMaxKeys];
295
296	static PthreadKeyUnregister instance_;
297	};
298
299	struct StaticMetaBase {
300	// Represents an ID of a thread local object. Initially set to the maximum
301	// uint. This representation allows us to avoid a branch in accessing TLS data
302	// (because if you test capacity > id if id = maxint then the test will always
303	// fail). It allows us to keep a constexpr constructor and avoid SIOF.
304	class EntryID {
305	public:
306	std::atomic<uint32_t> value;
307
308	constexpr EntryID() : value (kEntryIDInvalid) {}
309
310	EntryID(EntryID&& other) noexcept : value (other.value.load()) {
311	other.value = kEntryIDInvalid;
312	}
313
314	EntryID& operator=(EntryID&& other) {
315	assert(this != &other);
316	value = other.value.load();
317	other.value = kEntryIDInvalid;
318	return *this;
319	}
320
321	EntryID(const EntryID& other) = delete;
322	EntryID& operator=(const EntryID& other) = delete;
323
324	uint32_t getOrInvalid() {
325	// It's OK for this to be relaxed, even though we're effectively doing
326	// double checked locking in using this value. We only care about the
327	// uniqueness of IDs, getOrAllocate does not modify any other memory
328	// this thread will use.
329	return value.load(std::memory_order_relaxed);
330	}
331
332	uint32_t getOrAllocate(StaticMetaBase& meta) {
333	uint32_t id = getOrInvalid();
334	if (id != kEntryIDInvalid) {
335	return id;
336	}
337	// The lock inside allocate ensures that a single value is allocated
338	return meta.allocate(this);
339	}
340	};
341
342	StaticMetaBase(ThreadEntry* (threadEntry)(), bool* strict);
343
344	void push_back(ThreadEntry* t) {
345	t->next = &head_;
346	t->prev = head_.prev;
347	head_.prev->next = t;
348	head_.prev = t;
349	}
350
351	void erase(ThreadEntry* t) {
352	t->next->prev = t->prev;
353	t->prev->next = t->next;
354	t->next = t->prev = t;
355	}
356
357	FOLLY_EXPORT static ThreadEntryList* getThreadEntryList();
358
359	static void onThreadExit(void* ptr);
360
361	// returns the elementsCapacity for the
362	// current thread ThreadEntry struct
363	uint32_t elementsCapacity() const;
364
365	uint32_t allocate(EntryID* ent);
366
367	void destroy(EntryID* ent);
368
369	/**
370	* Reserve enough space in the ThreadEntry::elements for the item
371	* @id to fit in.
372	*/
373	void reserve(EntryID* id);
374
375	ElementWrapper& getElement(EntryID* ent);
376
377	// reserve an id in the head_ ThreadEntry->elements
378	// array if not already there
379	void reserveHeadUnlocked(uint32_t id);
380
381	// push back an entry in the doubly linked list
382	// that corresponds to idx id
383	void pushBackLocked(ThreadEntry* t, uint32_t id);
384	void pushBackUnlocked(ThreadEntry* t, uint32_t id);
385
386	// static helper method to reallocate the ThreadEntry::elements
387	// returns != nullptr if the ThreadEntry::elements was reallocated
388	// nullptr if the ThreadEntry::elements was just extended
389	// and throws stdd:bad_alloc if memory cannot be allocated
390	static ElementWrapper*
391	reallocate(ThreadEntry* threadEntry, uint32_t idval, size_t& newCapacity);
392
393	uint32_t nextId_;
394	std::vector<uint32_t> freeIds_;
395	std::mutex lock_;
396	SharedMutex accessAllThreadsLock_;
397	pthread_key_t pthreadKey_;
398	ThreadEntry head_;
399	ThreadEntry* (*threadEntry_)();
400	bool strict_;
401
402	protected:
403	~StaticMetaBase() {}
404	};
405
406	// Held in a singleton to track our global instances.
407	// We have one of these per "Tag", by default one for the whole system
408	// (Tag=void).
409	//
410	// Creating and destroying ThreadLocalPtr objects, as well as thread exit
411	// for threads that use ThreadLocalPtr objects collide on a lock inside
412	// StaticMeta; you can specify multiple Tag types to break that lock.
413	template <class Tag, class AccessMode>
414	struct StaticMeta final : StaticMetaBase {
415	StaticMeta()
416	: StaticMetaBase(
417	&StaticMeta::getThreadEntrySlow,
418	std::is_same<AccessMode, AccessModeStrict>::value) {
419	detail::AtFork::registerHandler(
420	this,
421	/prepare/ &StaticMeta::preFork,
422	/parent/ &StaticMeta::onForkParent,
423	/child/ &StaticMeta::onForkChild);
424	}
425
426	~StaticMeta() = delete;
427
428	static StaticMeta<Tag, AccessMode>& instance() {
429	// Leak it on exit, there's only one per process and we don't have to
430	// worry about synchronization with exiting threads.
431	return detail::createGlobal<StaticMeta<Tag, AccessMode>, void>();
432	}
433
434	FOLLY_EXPORT FOLLY_ALWAYS_INLINE static ElementWrapper& get(EntryID* ent) {
435	// Eliminate as many branches and as much extra code as possible in the
436	// cached fast path, leaving only one branch here and one indirection below.
437	uint32_t id = ent->getOrInvalid();
438	#ifdef FOLLY_TLD_USE_FOLLY_TLS
439	static FOLLY_TLS ThreadEntry* threadEntry{};
440	static FOLLY_TLS size_t capacity{};
441	#else
442	ThreadEntry* threadEntry{};
443	size_t capacity{};
444	#endif
445	if (FOLLY_UNLIKELY(capacity <= id)) {
446	getSlowReserveAndCache(ent, id, threadEntry, capacity);
447	}
448	return threadEntry->elements[id];
449	}
450
451	FOLLY_NOINLINE static void getSlowReserveAndCache(
452	EntryID* ent,
453	uint32_t& id,
454	ThreadEntry*& threadEntry,
455	size_t& capacity) {
456	auto& inst = instance();
457	threadEntry = inst.threadEntry_();
458	if (UNLIKELY(threadEntry->getElementsCapacity() <= id)) {
459	inst.reserve(ent);
460	id = ent->getOrInvalid();
461	}
462	capacity = threadEntry->getElementsCapacity();
463	assert(capacity > id);
464	}
465
466	FOLLY_EXPORT FOLLY_NOINLINE static ThreadEntry* getThreadEntrySlow() {
467	auto& meta = instance();
468	auto key = meta.pthreadKey_;
469	ThreadEntry* threadEntry =
470	static_cast<ThreadEntry*>(pthread_getspecific(key));
471	if (!threadEntry) {
472	ThreadEntryList* threadEntryList = StaticMeta::getThreadEntryList();
473	#ifdef FOLLY_TLD_USE_FOLLY_TLS
474	static FOLLY_TLS ThreadEntry threadEntrySingleton;
475	threadEntry = &threadEntrySingleton;
476	#else
477	threadEntry = new ThreadEntry();
478	#endif
479	// if the ThreadEntry already exists
480	// but pthread_getspecific returns NULL
481	// do not add the same entry twice to the list
482	// since this would create a loop in the list
483	if (!threadEntry->list) {
484	threadEntry->list = threadEntryList;
485	threadEntry->listNext = threadEntryList->head;
486	threadEntryList->head = threadEntry;
487	}
488
489	// if we're adding a thread entry
490	// we need to increment the list count
491	// even if the entry is reused
492	threadEntryList->count++;
493
494	threadEntry->meta = &meta;
495	int ret = pthread_setspecific(key, threadEntry);
496	checkPosixError(ret, "pthread_setspecific failed");
497	}
498	return threadEntry;
499	}
500
501	static bool preFork() {
502	return instance().lock_.try_lock(); // Make sure it's created
503	}
504
505	static void onForkParent() {
506	instance().lock_.unlock();
507	}
508
509	static void onForkChild() {
510	// only the current thread survives
511	auto& head = instance().head_;
512	// init the head list
513	head.next = head.prev = &head;
514	// init the circular lists
515	auto elementsCapacity = head.getElementsCapacity();
516	for (size_t i = `0u`; i < elementsCapacity; ++i) {
517	head.elements[i].node.init(&head, static_cast<uint32_t>(i));
518	}
519	// init the thread entry
520	ThreadEntry* threadEntry = instance().threadEntry_();
521	elementsCapacity = threadEntry->getElementsCapacity();
522	for (size_t i = `0u`; i < elementsCapacity; ++i) {
523	if (!threadEntry->elements[i].node.zero()) {
524	threadEntry->elements[i].node.initZero(
525	threadEntry, static_cast<uint32_t>(i));
526	threadEntry->elements[i].node.initIfZero(false /locked/);
527	}
528	}
529
530	// If this thread was in the list before the fork, add it back.
531	if (elementsCapacity != `0`) {
532	instance().push_back(threadEntry);
533	}
534	instance().lock_.unlock();
535	}
536	};
537	} // namespace threadlocal_detail
538	} // namespace folly
539

Browse the source code of folly/detail/ThreadLocalDetail.h