1 | /* |
2 | * Copyright 2015-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #include <folly/detail/ThreadLocalDetail.h> |
17 | #include <folly/synchronization/CallOnce.h> |
18 | |
19 | #include <list> |
20 | #include <mutex> |
21 | |
22 | constexpr auto kSmallGrowthFactor = 1.1; |
23 | constexpr auto kBigGrowthFactor = 1.7; |
24 | |
25 | namespace folly { |
26 | namespace threadlocal_detail { |
27 | |
28 | void ThreadEntryNode::initIfZero(bool locked) { |
29 | if (UNLIKELY(!next)) { |
30 | if (LIKELY(locked)) { |
31 | parent->meta->pushBackLocked(parent, id); |
32 | } else { |
33 | parent->meta->pushBackUnlocked(parent, id); |
34 | } |
35 | } |
36 | } |
37 | |
38 | void ThreadEntryNode::push_back(ThreadEntry* head) { |
39 | // get the head prev and next nodes |
40 | ThreadEntryNode* hnode = &head->elements[id].node; |
41 | |
42 | // update current |
43 | next = head; |
44 | prev = hnode->prev; |
45 | |
46 | // hprev |
47 | ThreadEntryNode* hprev = &hnode->prev->elements[id].node; |
48 | hprev->next = parent; |
49 | hnode->prev = parent; |
50 | } |
51 | |
52 | void ThreadEntryNode::eraseZero() { |
53 | if (LIKELY(prev != nullptr)) { |
54 | // get the prev and next nodes |
55 | ThreadEntryNode* nprev = &prev->elements[id].node; |
56 | ThreadEntryNode* nnext = &next->elements[id].node; |
57 | |
58 | // update the prev and next |
59 | nnext->prev = prev; |
60 | nprev->next = next; |
61 | |
62 | // set the prev and next to nullptr |
63 | next = prev = nullptr; |
64 | } |
65 | } |
66 | |
67 | StaticMetaBase::StaticMetaBase(ThreadEntry* (*threadEntry)(), bool strict) |
68 | : nextId_(1), threadEntry_(threadEntry), strict_(strict) { |
69 | head_.next = head_.prev = &head_; |
70 | int ret = pthread_key_create(&pthreadKey_, &onThreadExit); |
71 | checkPosixError(ret, "pthread_key_create failed" ); |
72 | PthreadKeyUnregister::registerKey(pthreadKey_); |
73 | } |
74 | |
75 | ThreadEntryList* StaticMetaBase::getThreadEntryList() { |
76 | #ifdef FOLLY_TLD_USE_FOLLY_TLS |
77 | static FOLLY_TLS ThreadEntryList threadEntryListSingleton; |
78 | return &threadEntryListSingleton; |
79 | #else |
80 | class PthreadKey { |
81 | public: |
82 | PthreadKey() { |
83 | int ret = pthread_key_create(&pthreadKey_, nullptr); |
84 | checkPosixError(ret, "pthread_key_create failed" ); |
85 | PthreadKeyUnregister::registerKey(pthreadKey_); |
86 | } |
87 | |
88 | FOLLY_ALWAYS_INLINE pthread_key_t get() const { |
89 | return pthreadKey_; |
90 | } |
91 | |
92 | private: |
93 | pthread_key_t pthreadKey_; |
94 | }; |
95 | |
96 | auto& instance = detail::createGlobal<PthreadKey, void>(); |
97 | |
98 | ThreadEntryList* threadEntryList = |
99 | static_cast<ThreadEntryList*>(pthread_getspecific(instance.get())); |
100 | |
101 | if (UNLIKELY(!threadEntryList)) { |
102 | threadEntryList = new ThreadEntryList(); |
103 | int ret = pthread_setspecific(instance.get(), threadEntryList); |
104 | checkPosixError(ret, "pthread_setspecific failed" ); |
105 | } |
106 | |
107 | return threadEntryList; |
108 | #endif |
109 | } |
110 | |
111 | void StaticMetaBase::onThreadExit(void* ptr) { |
112 | auto threadEntry = static_cast<ThreadEntry*>(ptr); |
113 | |
114 | { |
115 | auto& meta = *threadEntry->meta; |
116 | |
117 | // Make sure this ThreadEntry is available if ThreadLocal A is accessed in |
118 | // ThreadLocal B destructor. |
119 | pthread_setspecific(meta.pthreadKey_, threadEntry); |
120 | SharedMutex::ReadHolder rlock(nullptr); |
121 | if (meta.strict_) { |
122 | rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_); |
123 | } |
124 | { |
125 | std::lock_guard<std::mutex> g(meta.lock_); |
126 | // mark it as removed |
127 | threadEntry->removed_ = true; |
128 | meta.erase(&(*threadEntry)); |
129 | auto elementsCapacity = threadEntry->getElementsCapacity(); |
130 | for (size_t i = 0u; i < elementsCapacity; ++i) { |
131 | threadEntry->elements[i].node.eraseZero(); |
132 | } |
133 | // No need to hold the lock any longer; the ThreadEntry is private to this |
134 | // thread now that it's been removed from meta. |
135 | } |
136 | // NOTE: User-provided deleter / object dtor itself may be using ThreadLocal |
137 | // with the same Tag, so dispose() calls below may (re)create some of the |
138 | // elements or even increase elementsCapacity, thus multiple cleanup rounds |
139 | // may be required. |
140 | for (bool shouldRun = true; shouldRun;) { |
141 | shouldRun = false; |
142 | auto elementsCapacity = threadEntry->getElementsCapacity(); |
143 | FOR_EACH_RANGE (i, 0, elementsCapacity) { |
144 | if (threadEntry->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) { |
145 | threadEntry->elements[i].cleanup(); |
146 | shouldRun = true; |
147 | } |
148 | } |
149 | } |
150 | pthread_setspecific(meta.pthreadKey_, nullptr); |
151 | } |
152 | |
153 | auto threadEntryList = threadEntry->list; |
154 | DCHECK_GT(threadEntryList->count, 0u); |
155 | |
156 | --threadEntryList->count; |
157 | |
158 | if (threadEntryList->count) { |
159 | return; |
160 | } |
161 | |
162 | // dispose all the elements |
163 | for (bool shouldRunOuter = true; shouldRunOuter;) { |
164 | shouldRunOuter = false; |
165 | auto tmp = threadEntryList->head; |
166 | while (tmp) { |
167 | auto& meta = *tmp->meta; |
168 | pthread_setspecific(meta.pthreadKey_, tmp); |
169 | SharedMutex::ReadHolder rlock(nullptr); |
170 | if (meta.strict_) { |
171 | rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_); |
172 | } |
173 | for (bool shouldRunInner = true; shouldRunInner;) { |
174 | shouldRunInner = false; |
175 | auto elementsCapacity = tmp->getElementsCapacity(); |
176 | FOR_EACH_RANGE (i, 0, elementsCapacity) { |
177 | if (tmp->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) { |
178 | tmp->elements[i].cleanup(); |
179 | shouldRunInner = true; |
180 | shouldRunOuter = true; |
181 | } |
182 | } |
183 | } |
184 | pthread_setspecific(meta.pthreadKey_, nullptr); |
185 | tmp = tmp->listNext; |
186 | } |
187 | } |
188 | |
189 | // free the entry list |
190 | auto head = threadEntryList->head; |
191 | threadEntryList->head = nullptr; |
192 | while (head) { |
193 | auto tmp = head; |
194 | head = head->listNext; |
195 | if (tmp->elements) { |
196 | free(tmp->elements); |
197 | tmp->elements = nullptr; |
198 | tmp->setElementsCapacity(0); |
199 | } |
200 | |
201 | #ifndef FOLLY_TLD_USE_FOLLY_TLS |
202 | delete tmp; |
203 | #endif |
204 | } |
205 | |
206 | #ifndef FOLLY_TLD_USE_FOLLY_TLS |
207 | delete threadEntryList; |
208 | #endif |
209 | } |
210 | |
211 | uint32_t StaticMetaBase::elementsCapacity() const { |
212 | ThreadEntry* threadEntry = (*threadEntry_)(); |
213 | |
214 | return FOLLY_LIKELY(!!threadEntry) ? threadEntry->getElementsCapacity() : 0; |
215 | } |
216 | |
217 | uint32_t StaticMetaBase::allocate(EntryID* ent) { |
218 | uint32_t id; |
219 | auto& meta = *this; |
220 | std::lock_guard<std::mutex> g(meta.lock_); |
221 | |
222 | id = ent->value.load(); |
223 | if (id != kEntryIDInvalid) { |
224 | return id; |
225 | } |
226 | |
227 | if (!meta.freeIds_.empty()) { |
228 | id = meta.freeIds_.back(); |
229 | meta.freeIds_.pop_back(); |
230 | } else { |
231 | id = meta.nextId_++; |
232 | } |
233 | |
234 | uint32_t old_id = ent->value.exchange(id); |
235 | DCHECK_EQ(old_id, kEntryIDInvalid); |
236 | |
237 | reserveHeadUnlocked(id); |
238 | |
239 | return id; |
240 | } |
241 | |
242 | void StaticMetaBase::destroy(EntryID* ent) { |
243 | try { |
244 | auto& meta = *this; |
245 | |
246 | // Elements in other threads that use this id. |
247 | std::vector<ElementWrapper> elements; |
248 | |
249 | { |
250 | SharedMutex::WriteHolder wlock(nullptr); |
251 | if (meta.strict_) { |
252 | /* |
253 | * In strict mode, the logic guarantees per-thread instances are |
254 | * destroyed by the moment ThreadLocal<> dtor returns. |
255 | * In order to achieve that, we should wait until concurrent |
256 | * onThreadExit() calls (that might acquire ownership over per-thread |
257 | * instances in order to destroy them) are finished. |
258 | */ |
259 | wlock = SharedMutex::WriteHolder(meta.accessAllThreadsLock_); |
260 | } |
261 | |
262 | { |
263 | std::lock_guard<std::mutex> g(meta.lock_); |
264 | uint32_t id = ent->value.exchange(kEntryIDInvalid); |
265 | if (id == kEntryIDInvalid) { |
266 | return; |
267 | } |
268 | |
269 | auto& node = meta.head_.elements[id].node; |
270 | while (!node.empty()) { |
271 | auto* next = node.getNext(); |
272 | next->eraseZero(); |
273 | |
274 | ThreadEntry* e = next->parent; |
275 | auto elementsCapacity = e->getElementsCapacity(); |
276 | if (id < elementsCapacity && e->elements[id].ptr) { |
277 | elements.push_back(e->elements[id]); |
278 | |
279 | /* |
280 | * Writing another thread's ThreadEntry from here is fine; |
281 | * the only other potential reader is the owning thread -- |
282 | * from onThreadExit (which grabs the lock, so is properly |
283 | * synchronized with us) or from get(), which also grabs |
284 | * the lock if it needs to resize the elements vector. |
285 | * |
286 | * We can't conflict with reads for a get(id), because |
287 | * it's illegal to call get on a thread local that's |
288 | * destructing. |
289 | */ |
290 | e->elements[id].ptr = nullptr; |
291 | e->elements[id].deleter1 = nullptr; |
292 | e->elements[id].ownsDeleter = false; |
293 | } |
294 | } |
295 | meta.freeIds_.push_back(id); |
296 | } |
297 | } |
298 | // Delete elements outside the locks. |
299 | for (ElementWrapper& elem : elements) { |
300 | if (elem.dispose(TLPDestructionMode::ALL_THREADS)) { |
301 | elem.cleanup(); |
302 | } |
303 | } |
304 | } catch (...) { // Just in case we get a lock error or something anyway... |
305 | LOG(WARNING) << "Destructor discarding an exception that was thrown." ; |
306 | } |
307 | } |
308 | |
309 | ElementWrapper* StaticMetaBase::reallocate( |
310 | ThreadEntry* threadEntry, |
311 | uint32_t idval, |
312 | size_t& newCapacity) { |
313 | size_t prevCapacity = threadEntry->getElementsCapacity(); |
314 | |
315 | // Growth factor < 2, see folly/docs/FBVector.md; + 5 to prevent |
316 | // very slow start. |
317 | auto smallCapacity = static_cast<size_t>((idval + 5) * kSmallGrowthFactor); |
318 | auto bigCapacity = static_cast<size_t>((idval + 5) * kBigGrowthFactor); |
319 | |
320 | newCapacity = |
321 | (threadEntry->meta && |
322 | (bigCapacity <= threadEntry->meta->head_.getElementsCapacity())) |
323 | ? bigCapacity |
324 | : smallCapacity; |
325 | |
326 | assert(newCapacity > prevCapacity); |
327 | ElementWrapper* reallocated = nullptr; |
328 | |
329 | // Need to grow. Note that we can't call realloc, as elements is |
330 | // still linked in meta, so another thread might access invalid memory |
331 | // after realloc succeeds. We'll copy by hand and update our ThreadEntry |
332 | // under the lock. |
333 | if (usingJEMalloc()) { |
334 | bool success = false; |
335 | size_t newByteSize = nallocx(newCapacity * sizeof(ElementWrapper), 0); |
336 | |
337 | // Try to grow in place. |
338 | // |
339 | // Note that xallocx(MALLOCX_ZERO) will only zero newly allocated memory, |
340 | // even if a previous allocation allocated more than we requested. |
341 | // This is fine; we always use MALLOCX_ZERO with jemalloc and we |
342 | // always expand our allocation to the real size. |
343 | if (prevCapacity * sizeof(ElementWrapper) >= jemallocMinInPlaceExpandable) { |
344 | success = |
345 | (xallocx(threadEntry->elements, newByteSize, 0, MALLOCX_ZERO) == |
346 | newByteSize); |
347 | } |
348 | |
349 | // In-place growth failed. |
350 | if (!success) { |
351 | success = |
352 | ((reallocated = static_cast<ElementWrapper*>( |
353 | mallocx(newByteSize, MALLOCX_ZERO))) != nullptr); |
354 | } |
355 | |
356 | if (success) { |
357 | // Expand to real size |
358 | assert(newByteSize / sizeof(ElementWrapper) >= newCapacity); |
359 | newCapacity = newByteSize / sizeof(ElementWrapper); |
360 | } else { |
361 | throw std::bad_alloc(); |
362 | } |
363 | } else { // no jemalloc |
364 | // calloc() is simpler than malloc() followed by memset(), and |
365 | // potentially faster when dealing with a lot of memory, as it can get |
366 | // already-zeroed pages from the kernel. |
367 | reallocated = static_cast<ElementWrapper*>( |
368 | calloc(newCapacity, sizeof(ElementWrapper))); |
369 | if (!reallocated) { |
370 | throw std::bad_alloc(); |
371 | } |
372 | } |
373 | |
374 | return reallocated; |
375 | } |
376 | |
377 | /** |
378 | * Reserve enough space in the ThreadEntry::elements for the item |
379 | * @id to fit in. |
380 | */ |
381 | |
382 | void StaticMetaBase::reserve(EntryID* id) { |
383 | auto& meta = *this; |
384 | ThreadEntry* threadEntry = (*threadEntry_)(); |
385 | size_t prevCapacity = threadEntry->getElementsCapacity(); |
386 | |
387 | uint32_t idval = id->getOrAllocate(meta); |
388 | if (prevCapacity > idval) { |
389 | return; |
390 | } |
391 | |
392 | size_t newCapacity; |
393 | ElementWrapper* reallocated = reallocate(threadEntry, idval, newCapacity); |
394 | |
395 | // Success, update the entry |
396 | { |
397 | std::lock_guard<std::mutex> g(meta.lock_); |
398 | |
399 | if (prevCapacity == 0) { |
400 | meta.push_back(threadEntry); |
401 | } |
402 | |
403 | if (reallocated) { |
404 | /* |
405 | * Note: we need to hold the meta lock when copying data out of |
406 | * the old vector, because some other thread might be |
407 | * destructing a ThreadLocal and writing to the elements vector |
408 | * of this thread. |
409 | */ |
410 | if (prevCapacity != 0) { |
411 | memcpy( |
412 | reallocated, |
413 | threadEntry->elements, |
414 | sizeof(*reallocated) * prevCapacity); |
415 | } |
416 | std::swap(reallocated, threadEntry->elements); |
417 | } |
418 | |
419 | for (size_t i = prevCapacity; i < newCapacity; i++) { |
420 | threadEntry->elements[i].node.initZero(threadEntry, i); |
421 | } |
422 | |
423 | threadEntry->setElementsCapacity(newCapacity); |
424 | } |
425 | |
426 | free(reallocated); |
427 | } |
428 | |
429 | void StaticMetaBase::reserveHeadUnlocked(uint32_t id) { |
430 | if (head_.getElementsCapacity() <= id) { |
431 | size_t prevCapacity = head_.getElementsCapacity(); |
432 | size_t newCapacity; |
433 | ElementWrapper* reallocated = reallocate(&head_, id, newCapacity); |
434 | |
435 | if (reallocated) { |
436 | if (prevCapacity != 0) { |
437 | memcpy( |
438 | reallocated, head_.elements, sizeof(*reallocated) * prevCapacity); |
439 | } |
440 | std::swap(reallocated, head_.elements); |
441 | } |
442 | |
443 | for (size_t i = prevCapacity; i < newCapacity; i++) { |
444 | head_.elements[i].node.init(&head_, i); |
445 | } |
446 | |
447 | head_.setElementsCapacity(newCapacity); |
448 | free(reallocated); |
449 | } |
450 | } |
451 | |
452 | void StaticMetaBase::pushBackLocked(ThreadEntry* t, uint32_t id) { |
453 | if (LIKELY(!t->removed_)) { |
454 | std::lock_guard<std::mutex> g(lock_); |
455 | auto* node = &t->elements[id].node; |
456 | node->push_back(&head_); |
457 | } |
458 | } |
459 | |
460 | void StaticMetaBase::pushBackUnlocked(ThreadEntry* t, uint32_t id) { |
461 | if (LIKELY(!t->removed_)) { |
462 | auto* node = &t->elements[id].node; |
463 | node->push_back(&head_); |
464 | } |
465 | } |
466 | |
467 | FOLLY_STATIC_CTOR_PRIORITY_MAX |
468 | PthreadKeyUnregister PthreadKeyUnregister::instance_; |
469 | } // namespace threadlocal_detail |
470 | } // namespace folly |
471 | |