1/*
2 * Copyright 2015-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <folly/detail/ThreadLocalDetail.h>
17#include <folly/synchronization/CallOnce.h>
18
19#include <list>
20#include <mutex>
21
22constexpr auto kSmallGrowthFactor = 1.1;
23constexpr auto kBigGrowthFactor = 1.7;
24
25namespace folly {
26namespace threadlocal_detail {
27
28void ThreadEntryNode::initIfZero(bool locked) {
29 if (UNLIKELY(!next)) {
30 if (LIKELY(locked)) {
31 parent->meta->pushBackLocked(parent, id);
32 } else {
33 parent->meta->pushBackUnlocked(parent, id);
34 }
35 }
36}
37
38void ThreadEntryNode::push_back(ThreadEntry* head) {
39 // get the head prev and next nodes
40 ThreadEntryNode* hnode = &head->elements[id].node;
41
42 // update current
43 next = head;
44 prev = hnode->prev;
45
46 // hprev
47 ThreadEntryNode* hprev = &hnode->prev->elements[id].node;
48 hprev->next = parent;
49 hnode->prev = parent;
50}
51
52void ThreadEntryNode::eraseZero() {
53 if (LIKELY(prev != nullptr)) {
54 // get the prev and next nodes
55 ThreadEntryNode* nprev = &prev->elements[id].node;
56 ThreadEntryNode* nnext = &next->elements[id].node;
57
58 // update the prev and next
59 nnext->prev = prev;
60 nprev->next = next;
61
62 // set the prev and next to nullptr
63 next = prev = nullptr;
64 }
65}
66
67StaticMetaBase::StaticMetaBase(ThreadEntry* (*threadEntry)(), bool strict)
68 : nextId_(1), threadEntry_(threadEntry), strict_(strict) {
69 head_.next = head_.prev = &head_;
70 int ret = pthread_key_create(&pthreadKey_, &onThreadExit);
71 checkPosixError(ret, "pthread_key_create failed");
72 PthreadKeyUnregister::registerKey(pthreadKey_);
73}
74
75ThreadEntryList* StaticMetaBase::getThreadEntryList() {
76#ifdef FOLLY_TLD_USE_FOLLY_TLS
77 static FOLLY_TLS ThreadEntryList threadEntryListSingleton;
78 return &threadEntryListSingleton;
79#else
80 class PthreadKey {
81 public:
82 PthreadKey() {
83 int ret = pthread_key_create(&pthreadKey_, nullptr);
84 checkPosixError(ret, "pthread_key_create failed");
85 PthreadKeyUnregister::registerKey(pthreadKey_);
86 }
87
88 FOLLY_ALWAYS_INLINE pthread_key_t get() const {
89 return pthreadKey_;
90 }
91
92 private:
93 pthread_key_t pthreadKey_;
94 };
95
96 auto& instance = detail::createGlobal<PthreadKey, void>();
97
98 ThreadEntryList* threadEntryList =
99 static_cast<ThreadEntryList*>(pthread_getspecific(instance.get()));
100
101 if (UNLIKELY(!threadEntryList)) {
102 threadEntryList = new ThreadEntryList();
103 int ret = pthread_setspecific(instance.get(), threadEntryList);
104 checkPosixError(ret, "pthread_setspecific failed");
105 }
106
107 return threadEntryList;
108#endif
109}
110
111void StaticMetaBase::onThreadExit(void* ptr) {
112 auto threadEntry = static_cast<ThreadEntry*>(ptr);
113
114 {
115 auto& meta = *threadEntry->meta;
116
117 // Make sure this ThreadEntry is available if ThreadLocal A is accessed in
118 // ThreadLocal B destructor.
119 pthread_setspecific(meta.pthreadKey_, threadEntry);
120 SharedMutex::ReadHolder rlock(nullptr);
121 if (meta.strict_) {
122 rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_);
123 }
124 {
125 std::lock_guard<std::mutex> g(meta.lock_);
126 // mark it as removed
127 threadEntry->removed_ = true;
128 meta.erase(&(*threadEntry));
129 auto elementsCapacity = threadEntry->getElementsCapacity();
130 for (size_t i = 0u; i < elementsCapacity; ++i) {
131 threadEntry->elements[i].node.eraseZero();
132 }
133 // No need to hold the lock any longer; the ThreadEntry is private to this
134 // thread now that it's been removed from meta.
135 }
136 // NOTE: User-provided deleter / object dtor itself may be using ThreadLocal
137 // with the same Tag, so dispose() calls below may (re)create some of the
138 // elements or even increase elementsCapacity, thus multiple cleanup rounds
139 // may be required.
140 for (bool shouldRun = true; shouldRun;) {
141 shouldRun = false;
142 auto elementsCapacity = threadEntry->getElementsCapacity();
143 FOR_EACH_RANGE (i, 0, elementsCapacity) {
144 if (threadEntry->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
145 threadEntry->elements[i].cleanup();
146 shouldRun = true;
147 }
148 }
149 }
150 pthread_setspecific(meta.pthreadKey_, nullptr);
151 }
152
153 auto threadEntryList = threadEntry->list;
154 DCHECK_GT(threadEntryList->count, 0u);
155
156 --threadEntryList->count;
157
158 if (threadEntryList->count) {
159 return;
160 }
161
162 // dispose all the elements
163 for (bool shouldRunOuter = true; shouldRunOuter;) {
164 shouldRunOuter = false;
165 auto tmp = threadEntryList->head;
166 while (tmp) {
167 auto& meta = *tmp->meta;
168 pthread_setspecific(meta.pthreadKey_, tmp);
169 SharedMutex::ReadHolder rlock(nullptr);
170 if (meta.strict_) {
171 rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_);
172 }
173 for (bool shouldRunInner = true; shouldRunInner;) {
174 shouldRunInner = false;
175 auto elementsCapacity = tmp->getElementsCapacity();
176 FOR_EACH_RANGE (i, 0, elementsCapacity) {
177 if (tmp->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
178 tmp->elements[i].cleanup();
179 shouldRunInner = true;
180 shouldRunOuter = true;
181 }
182 }
183 }
184 pthread_setspecific(meta.pthreadKey_, nullptr);
185 tmp = tmp->listNext;
186 }
187 }
188
189 // free the entry list
190 auto head = threadEntryList->head;
191 threadEntryList->head = nullptr;
192 while (head) {
193 auto tmp = head;
194 head = head->listNext;
195 if (tmp->elements) {
196 free(tmp->elements);
197 tmp->elements = nullptr;
198 tmp->setElementsCapacity(0);
199 }
200
201#ifndef FOLLY_TLD_USE_FOLLY_TLS
202 delete tmp;
203#endif
204 }
205
206#ifndef FOLLY_TLD_USE_FOLLY_TLS
207 delete threadEntryList;
208#endif
209}
210
211uint32_t StaticMetaBase::elementsCapacity() const {
212 ThreadEntry* threadEntry = (*threadEntry_)();
213
214 return FOLLY_LIKELY(!!threadEntry) ? threadEntry->getElementsCapacity() : 0;
215}
216
217uint32_t StaticMetaBase::allocate(EntryID* ent) {
218 uint32_t id;
219 auto& meta = *this;
220 std::lock_guard<std::mutex> g(meta.lock_);
221
222 id = ent->value.load();
223 if (id != kEntryIDInvalid) {
224 return id;
225 }
226
227 if (!meta.freeIds_.empty()) {
228 id = meta.freeIds_.back();
229 meta.freeIds_.pop_back();
230 } else {
231 id = meta.nextId_++;
232 }
233
234 uint32_t old_id = ent->value.exchange(id);
235 DCHECK_EQ(old_id, kEntryIDInvalid);
236
237 reserveHeadUnlocked(id);
238
239 return id;
240}
241
242void StaticMetaBase::destroy(EntryID* ent) {
243 try {
244 auto& meta = *this;
245
246 // Elements in other threads that use this id.
247 std::vector<ElementWrapper> elements;
248
249 {
250 SharedMutex::WriteHolder wlock(nullptr);
251 if (meta.strict_) {
252 /*
253 * In strict mode, the logic guarantees per-thread instances are
254 * destroyed by the moment ThreadLocal<> dtor returns.
255 * In order to achieve that, we should wait until concurrent
256 * onThreadExit() calls (that might acquire ownership over per-thread
257 * instances in order to destroy them) are finished.
258 */
259 wlock = SharedMutex::WriteHolder(meta.accessAllThreadsLock_);
260 }
261
262 {
263 std::lock_guard<std::mutex> g(meta.lock_);
264 uint32_t id = ent->value.exchange(kEntryIDInvalid);
265 if (id == kEntryIDInvalid) {
266 return;
267 }
268
269 auto& node = meta.head_.elements[id].node;
270 while (!node.empty()) {
271 auto* next = node.getNext();
272 next->eraseZero();
273
274 ThreadEntry* e = next->parent;
275 auto elementsCapacity = e->getElementsCapacity();
276 if (id < elementsCapacity && e->elements[id].ptr) {
277 elements.push_back(e->elements[id]);
278
279 /*
280 * Writing another thread's ThreadEntry from here is fine;
281 * the only other potential reader is the owning thread --
282 * from onThreadExit (which grabs the lock, so is properly
283 * synchronized with us) or from get(), which also grabs
284 * the lock if it needs to resize the elements vector.
285 *
286 * We can't conflict with reads for a get(id), because
287 * it's illegal to call get on a thread local that's
288 * destructing.
289 */
290 e->elements[id].ptr = nullptr;
291 e->elements[id].deleter1 = nullptr;
292 e->elements[id].ownsDeleter = false;
293 }
294 }
295 meta.freeIds_.push_back(id);
296 }
297 }
298 // Delete elements outside the locks.
299 for (ElementWrapper& elem : elements) {
300 if (elem.dispose(TLPDestructionMode::ALL_THREADS)) {
301 elem.cleanup();
302 }
303 }
304 } catch (...) { // Just in case we get a lock error or something anyway...
305 LOG(WARNING) << "Destructor discarding an exception that was thrown.";
306 }
307}
308
309ElementWrapper* StaticMetaBase::reallocate(
310 ThreadEntry* threadEntry,
311 uint32_t idval,
312 size_t& newCapacity) {
313 size_t prevCapacity = threadEntry->getElementsCapacity();
314
315 // Growth factor < 2, see folly/docs/FBVector.md; + 5 to prevent
316 // very slow start.
317 auto smallCapacity = static_cast<size_t>((idval + 5) * kSmallGrowthFactor);
318 auto bigCapacity = static_cast<size_t>((idval + 5) * kBigGrowthFactor);
319
320 newCapacity =
321 (threadEntry->meta &&
322 (bigCapacity <= threadEntry->meta->head_.getElementsCapacity()))
323 ? bigCapacity
324 : smallCapacity;
325
326 assert(newCapacity > prevCapacity);
327 ElementWrapper* reallocated = nullptr;
328
329 // Need to grow. Note that we can't call realloc, as elements is
330 // still linked in meta, so another thread might access invalid memory
331 // after realloc succeeds. We'll copy by hand and update our ThreadEntry
332 // under the lock.
333 if (usingJEMalloc()) {
334 bool success = false;
335 size_t newByteSize = nallocx(newCapacity * sizeof(ElementWrapper), 0);
336
337 // Try to grow in place.
338 //
339 // Note that xallocx(MALLOCX_ZERO) will only zero newly allocated memory,
340 // even if a previous allocation allocated more than we requested.
341 // This is fine; we always use MALLOCX_ZERO with jemalloc and we
342 // always expand our allocation to the real size.
343 if (prevCapacity * sizeof(ElementWrapper) >= jemallocMinInPlaceExpandable) {
344 success =
345 (xallocx(threadEntry->elements, newByteSize, 0, MALLOCX_ZERO) ==
346 newByteSize);
347 }
348
349 // In-place growth failed.
350 if (!success) {
351 success =
352 ((reallocated = static_cast<ElementWrapper*>(
353 mallocx(newByteSize, MALLOCX_ZERO))) != nullptr);
354 }
355
356 if (success) {
357 // Expand to real size
358 assert(newByteSize / sizeof(ElementWrapper) >= newCapacity);
359 newCapacity = newByteSize / sizeof(ElementWrapper);
360 } else {
361 throw std::bad_alloc();
362 }
363 } else { // no jemalloc
364 // calloc() is simpler than malloc() followed by memset(), and
365 // potentially faster when dealing with a lot of memory, as it can get
366 // already-zeroed pages from the kernel.
367 reallocated = static_cast<ElementWrapper*>(
368 calloc(newCapacity, sizeof(ElementWrapper)));
369 if (!reallocated) {
370 throw std::bad_alloc();
371 }
372 }
373
374 return reallocated;
375}
376
377/**
378 * Reserve enough space in the ThreadEntry::elements for the item
379 * @id to fit in.
380 */
381
382void StaticMetaBase::reserve(EntryID* id) {
383 auto& meta = *this;
384 ThreadEntry* threadEntry = (*threadEntry_)();
385 size_t prevCapacity = threadEntry->getElementsCapacity();
386
387 uint32_t idval = id->getOrAllocate(meta);
388 if (prevCapacity > idval) {
389 return;
390 }
391
392 size_t newCapacity;
393 ElementWrapper* reallocated = reallocate(threadEntry, idval, newCapacity);
394
395 // Success, update the entry
396 {
397 std::lock_guard<std::mutex> g(meta.lock_);
398
399 if (prevCapacity == 0) {
400 meta.push_back(threadEntry);
401 }
402
403 if (reallocated) {
404 /*
405 * Note: we need to hold the meta lock when copying data out of
406 * the old vector, because some other thread might be
407 * destructing a ThreadLocal and writing to the elements vector
408 * of this thread.
409 */
410 if (prevCapacity != 0) {
411 memcpy(
412 reallocated,
413 threadEntry->elements,
414 sizeof(*reallocated) * prevCapacity);
415 }
416 std::swap(reallocated, threadEntry->elements);
417 }
418
419 for (size_t i = prevCapacity; i < newCapacity; i++) {
420 threadEntry->elements[i].node.initZero(threadEntry, i);
421 }
422
423 threadEntry->setElementsCapacity(newCapacity);
424 }
425
426 free(reallocated);
427}
428
429void StaticMetaBase::reserveHeadUnlocked(uint32_t id) {
430 if (head_.getElementsCapacity() <= id) {
431 size_t prevCapacity = head_.getElementsCapacity();
432 size_t newCapacity;
433 ElementWrapper* reallocated = reallocate(&head_, id, newCapacity);
434
435 if (reallocated) {
436 if (prevCapacity != 0) {
437 memcpy(
438 reallocated, head_.elements, sizeof(*reallocated) * prevCapacity);
439 }
440 std::swap(reallocated, head_.elements);
441 }
442
443 for (size_t i = prevCapacity; i < newCapacity; i++) {
444 head_.elements[i].node.init(&head_, i);
445 }
446
447 head_.setElementsCapacity(newCapacity);
448 free(reallocated);
449 }
450}
451
452void StaticMetaBase::pushBackLocked(ThreadEntry* t, uint32_t id) {
453 if (LIKELY(!t->removed_)) {
454 std::lock_guard<std::mutex> g(lock_);
455 auto* node = &t->elements[id].node;
456 node->push_back(&head_);
457 }
458}
459
460void StaticMetaBase::pushBackUnlocked(ThreadEntry* t, uint32_t id) {
461 if (LIKELY(!t->removed_)) {
462 auto* node = &t->elements[id].node;
463 node->push_back(&head_);
464 }
465}
466
467FOLLY_STATIC_CTOR_PRIORITY_MAX
468PthreadKeyUnregister PthreadKeyUnregister::instance_;
469} // namespace threadlocal_detail
470} // namespace folly
471