ThreadLocalDetail.cpp source code [folly/detail/ThreadLocalDetail.cpp]

1	/*
2	* Copyright 2015-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#include <folly/detail/ThreadLocalDetail.h>
17	#include <folly/synchronization/CallOnce.h>
18
19	#include <list>
20	#include <mutex>
21
22	constexpr auto kSmallGrowthFactor = `1.1`;
23	constexpr auto kBigGrowthFactor = `1.7`;
24
25	namespace folly {
26	namespace threadlocal_detail {
27
28	void ThreadEntryNode::initIfZero(bool locked) {
29	if (UNLIKELY(!next)) {
30	if (LIKELY(locked)) {
31	parent->meta->pushBackLocked(parent, id);
32	} else {
33	parent->meta->pushBackUnlocked(parent, id);
34	}
35	}
36	}
37
38	void ThreadEntryNode::push_back(ThreadEntry* head) {
39	// get the head prev and next nodes
40	ThreadEntryNode* hnode = &head->elements[id].node;
41
42	// update current
43	next = head;
44	prev = hnode->prev;
45
46	// hprev
47	ThreadEntryNode* hprev = &hnode->prev->elements[id].node;
48	hprev->next = parent;
49	hnode->prev = parent;
50	}
51
52	void ThreadEntryNode::eraseZero() {
53	if (LIKELY(prev != nullptr)) {
54	// get the prev and next nodes
55	ThreadEntryNode* nprev = &prev->elements[id].node;
56	ThreadEntryNode* nnext = &next->elements[id].node;
57
58	// update the prev and next
59	nnext->prev = prev;
60	nprev->next = next;
61
62	// set the prev and next to nullptr
63	next = prev = nullptr;
64	}
65	}
66
67	StaticMetaBase::StaticMetaBase(ThreadEntry* (threadEntry)(), bool* strict)
68	: nextId_(`1`), threadEntry_(threadEntry), strict_(strict) {
69	head_.next = head_.prev = &head_;
70	int ret = pthread_key_create(&pthreadKey_, &onThreadExit);
71	checkPosixError(ret, "pthread_key_create failed");
72	PthreadKeyUnregister::registerKey(pthreadKey_);
73	}
74
75	ThreadEntryList* StaticMetaBase::getThreadEntryList() {
76	#ifdef FOLLY_TLD_USE_FOLLY_TLS
77	static FOLLY_TLS ThreadEntryList threadEntryListSingleton;
78	return &threadEntryListSingleton;
79	#else
80	class PthreadKey {
81	public:
82	PthreadKey() {
83	int ret = pthread_key_create(&pthreadKey_, nullptr);
84	checkPosixError(ret, "pthread_key_create failed");
85	PthreadKeyUnregister::registerKey(pthreadKey_);
86	}
87
88	FOLLY_ALWAYS_INLINE pthread_key_t get() const {
89	return pthreadKey_;
90	}
91
92	private:
93	pthread_key_t pthreadKey_;
94	};
95
96	auto& instance = detail::createGlobal<PthreadKey, void>();
97
98	ThreadEntryList* threadEntryList =
99	static_cast<ThreadEntryList*>(pthread_getspecific(instance.get()));
100
101	if (UNLIKELY(!threadEntryList)) {
102	threadEntryList = new ThreadEntryList();
103	int ret = pthread_setspecific(instance.get(), threadEntryList);
104	checkPosixError(ret, "pthread_setspecific failed");
105	}
106
107	return threadEntryList;
108	#endif
109	}
110
111	void StaticMetaBase::onThreadExit(void* ptr) {
112	auto threadEntry = static_cast<ThreadEntry*>(ptr);
113
114	{
115	auto& meta = *threadEntry->meta;
116
117	// Make sure this ThreadEntry is available if ThreadLocal A is accessed in
118	// ThreadLocal B destructor.
119	pthread_setspecific(meta.pthreadKey_, threadEntry);
120	SharedMutex::ReadHolder rlock(nullptr);
121	if (meta.strict_) {
122	rlock = SharedMutex::ReadHolder (meta.accessAllThreadsLock_);
123	}
124	{
125	std::lock_guard<std::mutex> g(meta.lock_);
126	// mark it as removed
127	threadEntry->removed_ = true;
128	meta.erase(&(*threadEntry));
129	auto elementsCapacity = threadEntry->getElementsCapacity();
130	for (size_t i = `0u`; i < elementsCapacity; ++i) {
131	threadEntry->elements[i].node.eraseZero();
132	}
133	// No need to hold the lock any longer; the ThreadEntry is private to this
134	// thread now that it's been removed from meta.
135	}
136	// NOTE: User-provided deleter / object dtor itself may be using ThreadLocal
137	// with the same Tag, so dispose() calls below may (re)create some of the
138	// elements or even increase elementsCapacity, thus multiple cleanup rounds
139	// may be required.
140	for (bool shouldRun = true; shouldRun;) {
141	shouldRun = false;
142	auto elementsCapacity = threadEntry->getElementsCapacity();
143	FOR_EACH_RANGE (i, `0`, elementsCapacity) {
144	if (threadEntry->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
145	threadEntry->elements[i].cleanup();
146	shouldRun = true;
147	}
148	}
149	}
150	pthread_setspecific(meta.pthreadKey_, nullptr);
151	}
152
153	auto threadEntryList = threadEntry->list;
154	DCHECK_GT(threadEntryList->count, `0u`);
155
156	--threadEntryList->count;
157
158	if (threadEntryList->count) {
159	return;
160	}
161
162	// dispose all the elements
163	for (bool shouldRunOuter = true; shouldRunOuter;) {
164	shouldRunOuter = false;
165	auto tmp = threadEntryList->head;
166	while (tmp) {
167	auto& meta = *tmp->meta;
168	pthread_setspecific(meta.pthreadKey_, tmp);
169	SharedMutex::ReadHolder rlock(nullptr);
170	if (meta.strict_) {
171	rlock = SharedMutex::ReadHolder (meta.accessAllThreadsLock_);
172	}
173	for (bool shouldRunInner = true; shouldRunInner;) {
174	shouldRunInner = false;
175	auto elementsCapacity = tmp->getElementsCapacity();
176	FOR_EACH_RANGE (i, `0`, elementsCapacity) {
177	if (tmp->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
178	tmp->elements[i].cleanup();
179	shouldRunInner = true;
180	shouldRunOuter = true;
181	}
182	}
183	}
184	pthread_setspecific(meta.pthreadKey_, nullptr);
185	tmp = tmp->listNext;
186	}
187	}
188
189	// free the entry list
190	auto head = threadEntryList->head;
191	threadEntryList->head = nullptr;
192	while (head) {
193	auto tmp = head;
194	head = head->listNext;
195	if (tmp->elements) {
196	free(tmp->elements);
197	tmp->elements = nullptr;
198	tmp->setElementsCapacity(`0`);
199	}
200
201	#ifndef FOLLY_TLD_USE_FOLLY_TLS
202	delete tmp;
203	#endif
204	}
205
206	#ifndef FOLLY_TLD_USE_FOLLY_TLS
207	delete threadEntryList;
208	#endif
209	}
210
211	uint32_t StaticMetaBase::elementsCapacity() const {
212	ThreadEntry* threadEntry = (*threadEntry_)();
213
214	return FOLLY_LIKELY(!!threadEntry) ? threadEntry->getElementsCapacity() : `0`;
215	}
216
217	uint32_t StaticMetaBase::allocate(EntryID* ent) {
218	uint32_t id;
219	auto& meta = *this;
220	std::lock_guard<std::mutex> g(meta.lock_);
221
222	id = ent->value.load();
223	if (id != kEntryIDInvalid) {
224	return id;
225	}
226
227	if (!meta.freeIds_.empty()) {
228	id = meta.freeIds_.back();
229	meta.freeIds_.pop_back();
230	} else {
231	id = meta.nextId_++;
232	}
233
234	uint32_t old_id = ent->value.exchange(id);
235	DCHECK_EQ(old_id, kEntryIDInvalid);
236
237	reserveHeadUnlocked(id);
238
239	return id;
240	}
241
242	void StaticMetaBase::destroy(EntryID* ent) {
243	try {
244	auto& meta = *this;
245
246	// Elements in other threads that use this id.
247	std::vector<ElementWrapper> elements;
248
249	{
250	SharedMutex::WriteHolder wlock(nullptr);
251	if (meta.strict_) {
252	/*
253	* In strict mode, the logic guarantees per-thread instances are
254	* destroyed by the moment ThreadLocal<> dtor returns.
255	* In order to achieve that, we should wait until concurrent
256	* onThreadExit() calls (that might acquire ownership over per-thread
257	* instances in order to destroy them) are finished.
258	*/
259	wlock = SharedMutex::WriteHolder (meta.accessAllThreadsLock_);
260	}
261
262	{
263	std::lock_guard<std::mutex> g(meta.lock_);
264	uint32_t id = ent->value.exchange(kEntryIDInvalid);
265	if (id == kEntryIDInvalid) {
266	return;
267	}
268
269	auto& node = meta.head_.elements[id].node;
270	while (!node.empty()) {
271	auto* next = node.getNext();
272	next->eraseZero();
273
274	ThreadEntry* e = next->parent;
275	auto elementsCapacity = e->getElementsCapacity();
276	if (id < elementsCapacity && e->elements[id].ptr) {
277	elements.push_back(e->elements[id]);
278
279	/*
280	* Writing another thread's ThreadEntry from here is fine;
281	* the only other potential reader is the owning thread --
282	* from onThreadExit (which grabs the lock, so is properly
283	* synchronized with us) or from get(), which also grabs
284	* the lock if it needs to resize the elements vector.
285	*
286	* We can't conflict with reads for a get(id), because
287	* it's illegal to call get on a thread local that's
288	* destructing.
289	*/
290	e->elements[id].ptr = nullptr;
291	e->elements[id].deleter1 = nullptr;
292	e->elements[id].ownsDeleter = false;
293	}
294	}
295	meta.freeIds_.push_back(id);
296	}
297	}
298	// Delete elements outside the locks.
299	for (ElementWrapper& elem : elements) {
300	if (elem.dispose(TLPDestructionMode::ALL_THREADS)) {
301	elem.cleanup();
302	}
303	}
304	} catch (...) { // Just in case we get a lock error or something anyway...
305	LOG(WARNING) << "Destructor discarding an exception that was thrown.";
306	}
307	}
308
309	ElementWrapper* StaticMetaBase::reallocate(
310	ThreadEntry* threadEntry,
311	uint32_t idval,
312	size_t& newCapacity) {
313	size_t prevCapacity = threadEntry->getElementsCapacity();
314
315	// Growth factor < 2, see folly/docs/FBVector.md; + 5 to prevent
316	// very slow start.
317	auto smallCapacity = static_cast<size_t>((idval + `5`) * kSmallGrowthFactor);
318	auto bigCapacity = static_cast<size_t>((idval + `5`) * kBigGrowthFactor);
319
320	newCapacity =
321	(threadEntry->meta &&
322	(bigCapacity <= threadEntry->meta->head_.getElementsCapacity()))
323	? bigCapacity
324	: smallCapacity;
325
326	assert(newCapacity > prevCapacity);
327	ElementWrapper* reallocated = nullptr;
328
329	// Need to grow. Note that we can't call realloc, as elements is
330	// still linked in meta, so another thread might access invalid memory
331	// after realloc succeeds. We'll copy by hand and update our ThreadEntry
332	// under the lock.
333	if (usingJEMalloc()) {
334	bool success = false;
335	size_t newByteSize = nallocx(newCapacity * sizeof(ElementWrapper), `0`);
336
337	// Try to grow in place.
338	//
339	// Note that xallocx(MALLOCX_ZERO) will only zero newly allocated memory,
340	// even if a previous allocation allocated more than we requested.
341	// This is fine; we always use MALLOCX_ZERO with jemalloc and we
342	// always expand our allocation to the real size.
343	if (prevCapacity * sizeof(ElementWrapper) >= jemallocMinInPlaceExpandable) {
344	success =
345	(xallocx(threadEntry->elements, newByteSize, `0`, MALLOCX_ZERO) ==
346	newByteSize);
347	}
348
349	// In-place growth failed.
350	if (!success) {
351	success =
352	((reallocated = static_cast<ElementWrapper*>(
353	mallocx(newByteSize, MALLOCX_ZERO))) != nullptr);
354	}
355
356	if (success) {
357	// Expand to real size
358	assert(newByteSize / sizeof(ElementWrapper) >= newCapacity);
359	newCapacity = newByteSize / sizeof(ElementWrapper);
360	} else {
361	throw std::bad_alloc ();
362	}
363	} else { // no jemalloc
364	// calloc() is simpler than malloc() followed by memset(), and
365	// potentially faster when dealing with a lot of memory, as it can get
366	// already-zeroed pages from the kernel.
367	reallocated = static_cast<ElementWrapper*>(
368	calloc(newCapacity, sizeof(ElementWrapper)));
369	if (!reallocated) {
370	throw std::bad_alloc ();
371	}
372	}
373
374	return reallocated;
375	}
376
377	/**
378	* Reserve enough space in the ThreadEntry::elements for the item
379	* @id to fit in.
380	*/
381
382	void StaticMetaBase::reserve(EntryID* id) {
383	auto& meta = *this;
384	ThreadEntry* threadEntry = (*threadEntry_)();
385	size_t prevCapacity = threadEntry->getElementsCapacity();
386
387	uint32_t idval = id->getOrAllocate(meta);
388	if (prevCapacity > idval) {
389	return;
390	}
391
392	size_t newCapacity;
393	ElementWrapper* reallocated = reallocate(threadEntry, idval, newCapacity);
394
395	// Success, update the entry
396	{
397	std::lock_guard<std::mutex> g(meta.lock_);
398
399	if (prevCapacity == `0`) {
400	meta.push_back(threadEntry);
401	}
402
403	if (reallocated) {
404	/*
405	* Note: we need to hold the meta lock when copying data out of
406	* the old vector, because some other thread might be
407	* destructing a ThreadLocal and writing to the elements vector
408	* of this thread.
409	*/
410	if (prevCapacity != `0`) {
411	memcpy(
412	reallocated,
413	threadEntry->elements,
414	sizeof(reallocated) prevCapacity);
415	}
416	std::swap(reallocated, threadEntry->elements);
417	}
418
419	for (size_t i = prevCapacity; i < newCapacity; i++) {
420	threadEntry->elements[i].node.initZero(threadEntry, i);
421	}
422
423	threadEntry->setElementsCapacity(newCapacity);
424	}
425
426	free(reallocated);
427	}
428
429	void StaticMetaBase::reserveHeadUnlocked(uint32_t id) {
430	if (head_.getElementsCapacity() <= id) {
431	size_t prevCapacity = head_.getElementsCapacity();
432	size_t newCapacity;
433	ElementWrapper* reallocated = reallocate(&head_, id, newCapacity);
434
435	if (reallocated) {
436	if (prevCapacity != `0`) {
437	memcpy(
438	reallocated, head_.elements, sizeof(reallocated) prevCapacity);
439	}
440	std::swap(reallocated, head_.elements);
441	}
442
443	for (size_t i = prevCapacity; i < newCapacity; i++) {
444	head_.elements[i].node.init(&head_, i);
445	}
446
447	head_.setElementsCapacity(newCapacity);
448	free(reallocated);
449	}
450	}
451
452	void StaticMetaBase::pushBackLocked(ThreadEntry* t, uint32_t id) {
453	if (LIKELY(!t->removed_)) {
454	std::lock_guard<std::mutex> g(lock_);
455	auto* node = &t->elements[id].node;
456	node->push_back(&head_);
457	}
458	}
459
460	void StaticMetaBase::pushBackUnlocked(ThreadEntry* t, uint32_t id) {
461	if (LIKELY(!t->removed_)) {
462	auto* node = &t->elements[id].node;
463	node->push_back(&head_);
464	}
465	}
466
467	FOLLY_STATIC_CTOR_PRIORITY_MAX
468	PthreadKeyUnregister PthreadKeyUnregister::instance_;
469	} // namespace threadlocal_detail
470	} // namespace folly
471

Browse the source code of folly/detail/ThreadLocalDetail.cpp