1/*
2 * Copyright 2011-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * Improved thread local storage for non-trivial types (similar speed as
19 * pthread_getspecific but only consumes a single pthread_key_t, and 4x faster
20 * than boost::thread_specific_ptr).
21 *
22 * Also includes an accessor interface to walk all the thread local child
23 * objects of a parent. accessAllThreads() initializes an accessor which holds
24 * a global lock *that blocks all creation and destruction of ThreadLocal
25 * objects with the same Tag* and can be used as an iterable container.
26 * accessAllThreads() can race with destruction of thread-local elements. We
27 * provide a strict mode which is dangerous because it requires the access lock
28 * to be held while destroying thread-local elements which could cause
29 * deadlocks. We gate this mode behind the AccessModeStrict template parameter.
30 *
31 * Intended use is for frequent write, infrequent read data access patterns such
32 * as counters.
33 *
34 * There are two classes here - ThreadLocal and ThreadLocalPtr. ThreadLocalPtr
35 * has semantics similar to boost::thread_specific_ptr. ThreadLocal is a thin
36 * wrapper around ThreadLocalPtr that manages allocation automatically.
37 *
38 * @author Spencer Ahrens (sahrens)
39 */
40
41#pragma once
42
43#include <iterator>
44#include <type_traits>
45#include <utility>
46
47#include <folly/Likely.h>
48#include <folly/Portability.h>
49#include <folly/ScopeGuard.h>
50#include <folly/SharedMutex.h>
51#include <folly/detail/ThreadLocalDetail.h>
52
53namespace folly {
54
55template <class T, class Tag, class AccessMode>
56class ThreadLocalPtr;
57
58template <class T, class Tag = void, class AccessMode = void>
59class ThreadLocal {
60 public:
61 constexpr ThreadLocal() : constructor_([]() { return new T(); }) {}
62
63 template <typename F, std::enable_if_t<is_invocable_r<T*, F>::value, int> = 0>
64 explicit ThreadLocal(F&& constructor)
65 : constructor_(std::forward<F>(constructor)) {}
66
67 FOLLY_ALWAYS_INLINE FOLLY_ATTR_VISIBILITY_HIDDEN T* get() const {
68 auto const ptr = tlp_.get();
69 return FOLLY_LIKELY(!!ptr) ? ptr : makeTlp();
70 }
71
72 T* operator->() const {
73 return get();
74 }
75
76 T& operator*() const {
77 return *get();
78 }
79
80 void reset(T* newPtr = nullptr) {
81 tlp_.reset(newPtr);
82 }
83
84 typedef typename ThreadLocalPtr<T, Tag, AccessMode>::Accessor Accessor;
85 Accessor accessAllThreads() const {
86 return tlp_.accessAllThreads();
87 }
88
89 // movable
90 ThreadLocal(ThreadLocal&&) = default;
91 ThreadLocal& operator=(ThreadLocal&&) = default;
92
93 private:
94 // non-copyable
95 ThreadLocal(const ThreadLocal&) = delete;
96 ThreadLocal& operator=(const ThreadLocal&) = delete;
97
98 FOLLY_NOINLINE T* makeTlp() const {
99 auto const ptr = constructor_();
100 tlp_.reset(ptr);
101 return ptr;
102 }
103
104 mutable ThreadLocalPtr<T, Tag, AccessMode> tlp_;
105 std::function<T*()> constructor_;
106};
107
108/*
109 * The idea here is that __thread is faster than pthread_getspecific, so we
110 * keep a __thread array of pointers to objects (ThreadEntry::elements) where
111 * each array has an index for each unique instance of the ThreadLocalPtr
112 * object. Each ThreadLocalPtr object has a unique id that is an index into
113 * these arrays so we can fetch the correct object from thread local storage
114 * very efficiently.
115 *
116 * In order to prevent unbounded growth of the id space and thus huge
117 * ThreadEntry::elements, arrays, for example due to continuous creation and
118 * destruction of ThreadLocalPtr objects, we keep a set of all active
119 * instances. When an instance is destroyed we remove it from the active
120 * set and insert the id into freeIds_ for reuse. These operations require a
121 * global mutex, but only happen at construction and destruction time.
122 *
123 * We use a single global pthread_key_t per Tag to manage object destruction and
124 * memory cleanup upon thread exit because there is a finite number of
125 * pthread_key_t's available per machine.
126 *
127 * NOTE: Apple platforms don't support the same semantics for __thread that
128 * Linux does (and it's only supported at all on i386). For these, use
129 * pthread_setspecific()/pthread_getspecific() for the per-thread
130 * storage. Windows (MSVC and GCC) does support the same semantics
131 * with __declspec(thread)
132 */
133
134template <class T, class Tag = void, class AccessMode = void>
135class ThreadLocalPtr {
136 private:
137 typedef threadlocal_detail::StaticMeta<Tag, AccessMode> StaticMeta;
138
139 public:
140 constexpr ThreadLocalPtr() : id_() {}
141
142 ThreadLocalPtr(ThreadLocalPtr&& other) noexcept : id_(std::move(other.id_)) {}
143
144 ThreadLocalPtr& operator=(ThreadLocalPtr&& other) {
145 assert(this != &other);
146 destroy();
147 id_ = std::move(other.id_);
148 return *this;
149 }
150
151 ~ThreadLocalPtr() {
152 destroy();
153 }
154
155 T* get() const {
156 threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
157 return static_cast<T*>(w.ptr);
158 }
159
160 T* operator->() const {
161 return get();
162 }
163
164 T& operator*() const {
165 return *get();
166 }
167
168 T* release() {
169 threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
170
171 return static_cast<T*>(w.release());
172 }
173
174 void reset(T* newPtr = nullptr) {
175 auto guard = makeGuard([&] { delete newPtr; });
176 threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_);
177
178 w->dispose(TLPDestructionMode::THIS_THREAD);
179 // need to get a new ptr since the
180 // ThreadEntry::elements array can be reallocated
181 w = &StaticMeta::get(&id_);
182 w->cleanup();
183 guard.dismiss();
184 w->set(newPtr);
185 }
186
187 explicit operator bool() const {
188 return get() != nullptr;
189 }
190
191 /**
192 * reset() that transfers ownership from a smart pointer
193 */
194 template <
195 typename SourceT,
196 typename Deleter,
197 typename = typename std::enable_if<
198 std::is_convertible<SourceT*, T*>::value>::type>
199 void reset(std::unique_ptr<SourceT, Deleter> source) {
200 auto deleter = [delegate = source.get_deleter()](
201 T* ptr, TLPDestructionMode) { delegate(ptr); };
202 reset(source.release(), deleter);
203 }
204
205 /**
206 * reset() that transfers ownership from a smart pointer with the default
207 * deleter
208 */
209 template <
210 typename SourceT,
211 typename = typename std::enable_if<
212 std::is_convertible<SourceT*, T*>::value>::type>
213 void reset(std::unique_ptr<SourceT> source) {
214 reset(source.release());
215 }
216
217 /**
218 * reset() with a custom deleter:
219 * deleter(T* ptr, TLPDestructionMode mode)
220 * "mode" is ALL_THREADS if we're destructing this ThreadLocalPtr (and thus
221 * deleting pointers for all threads), and THIS_THREAD if we're only deleting
222 * the member for one thread (because of thread exit or reset()).
223 * Invoking the deleter must not throw.
224 */
225 template <class Deleter>
226 void reset(T* newPtr, const Deleter& deleter) {
227 auto guard = makeGuard([&] {
228 if (newPtr) {
229 deleter(newPtr, TLPDestructionMode::THIS_THREAD);
230 }
231 });
232 threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_);
233 w->dispose(TLPDestructionMode::THIS_THREAD);
234 // need to get a new ptr since the
235 // ThreadEntry::elements array can be reallocated
236 w = &StaticMeta::get(&id_);
237 w->cleanup();
238 guard.dismiss();
239 w->set(newPtr, deleter);
240 }
241
242 // Holds a global lock for iteration through all thread local child objects.
243 // Can be used as an iterable container.
244 // Use accessAllThreads() to obtain one.
245 class Accessor {
246 friend class ThreadLocalPtr<T, Tag, AccessMode>;
247
248 threadlocal_detail::StaticMetaBase& meta_;
249 SharedMutex* accessAllThreadsLock_;
250 std::mutex* lock_;
251 uint32_t id_;
252
253 public:
254 class Iterator;
255 friend class Iterator;
256
257 // The iterators obtained from Accessor are bidirectional iterators.
258 class Iterator {
259 friend class Accessor;
260 const Accessor* accessor_;
261 threadlocal_detail::ThreadEntryNode* e_;
262
263 void increment() {
264 e_ = e_->getNext();
265 incrementToValid();
266 }
267
268 void decrement() {
269 e_ = e_->getPrev();
270 decrementToValid();
271 }
272
273 const T& dereference() const {
274 return *static_cast<T*>(
275 e_->getThreadEntry()->elements[accessor_->id_].ptr);
276 }
277
278 T& dereference() {
279 return *static_cast<T*>(
280 e_->getThreadEntry()->elements[accessor_->id_].ptr);
281 }
282
283 bool equal(const Iterator& other) const {
284 return (accessor_->id_ == other.accessor_->id_ && e_ == other.e_);
285 }
286
287 explicit Iterator(const Accessor* accessor)
288 : accessor_(accessor),
289 e_(&accessor_->meta_.head_.elements[accessor_->id_].node) {}
290
291 // we just need to check the ptr since it can be set to nullptr
292 // even if the entry is part of the list
293 bool valid() const {
294 return (e_->getThreadEntry()->elements[accessor_->id_].ptr);
295 }
296
297 void incrementToValid() {
298 for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node &&
299 !valid();
300 e_ = e_->getNext()) {
301 }
302 }
303
304 void decrementToValid() {
305 for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node &&
306 !valid();
307 e_ = e_->getPrev()) {
308 }
309 }
310
311 public:
312 using difference_type = ssize_t;
313 using value_type = T;
314 using reference = T const&;
315 using pointer = T const*;
316 using iterator_category = std::bidirectional_iterator_tag;
317
318 Iterator& operator++() {
319 increment();
320 return *this;
321 }
322
323 Iterator& operator++(int) {
324 Iterator copy(*this);
325 increment();
326 return copy;
327 }
328
329 Iterator& operator--() {
330 decrement();
331 return *this;
332 }
333
334 Iterator& operator--(int) {
335 Iterator copy(*this);
336 decrement();
337 return copy;
338 }
339
340 T& operator*() {
341 return dereference();
342 }
343
344 T const& operator*() const {
345 return dereference();
346 }
347
348 T* operator->() {
349 return &dereference();
350 }
351
352 T const* operator->() const {
353 return &dereference();
354 }
355
356 bool operator==(Iterator const& rhs) const {
357 return equal(rhs);
358 }
359
360 bool operator!=(Iterator const& rhs) const {
361 return !equal(rhs);
362 }
363 };
364
365 ~Accessor() {
366 release();
367 }
368
369 Iterator begin() const {
370 return ++Iterator(this);
371 }
372
373 Iterator end() const {
374 return Iterator(this);
375 }
376
377 Accessor(const Accessor&) = delete;
378 Accessor& operator=(const Accessor&) = delete;
379
380 Accessor(Accessor&& other) noexcept
381 : meta_(other.meta_),
382 accessAllThreadsLock_(other.accessAllThreadsLock_),
383 lock_(other.lock_),
384 id_(other.id_) {
385 other.id_ = 0;
386 other.accessAllThreadsLock_ = nullptr;
387 other.lock_ = nullptr;
388 }
389
390 Accessor& operator=(Accessor&& other) noexcept {
391 // Each Tag has its own unique meta, and accessors with different Tags
392 // have different types. So either *this is empty, or this and other
393 // have the same tag. But if they have the same tag, they have the same
394 // meta (and lock), so they'd both hold the lock at the same time,
395 // which is impossible, which leaves only one possible scenario --
396 // *this is empty. Assert it.
397 assert(&meta_ == &other.meta_);
398 assert(lock_ == nullptr);
399 using std::swap;
400 swap(accessAllThreadsLock_, other.accessAllThreadsLock_);
401 swap(lock_, other.lock_);
402 swap(id_, other.id_);
403 }
404
405 Accessor()
406 : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()),
407 accessAllThreadsLock_(nullptr),
408 lock_(nullptr),
409 id_(0) {}
410
411 private:
412 explicit Accessor(uint32_t id)
413 : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()),
414 accessAllThreadsLock_(&meta_.accessAllThreadsLock_),
415 lock_(&meta_.lock_) {
416 accessAllThreadsLock_->lock();
417 lock_->lock();
418 id_ = id;
419 }
420
421 void release() {
422 if (lock_) {
423 lock_->unlock();
424 DCHECK(accessAllThreadsLock_ != nullptr);
425 accessAllThreadsLock_->unlock();
426 id_ = 0;
427 lock_ = nullptr;
428 accessAllThreadsLock_ = nullptr;
429 }
430 }
431 };
432
433 // accessor allows a client to iterate through all thread local child
434 // elements of this ThreadLocal instance. Holds a global lock for each <Tag>
435 Accessor accessAllThreads() const {
436 static_assert(
437 !std::is_same<Tag, void>::value,
438 "Must use a unique Tag to use the accessAllThreads feature");
439 return Accessor(id_.getOrAllocate(StaticMeta::instance()));
440 }
441
442 private:
443 void destroy() {
444 StaticMeta::instance().destroy(&id_);
445 }
446
447 // non-copyable
448 ThreadLocalPtr(const ThreadLocalPtr&) = delete;
449 ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete;
450
451 mutable typename StaticMeta::EntryID id_;
452};
453
454namespace threadlocal_detail {
455template <typename>
456struct static_meta_of;
457
458template <typename T, typename Tag, typename AccessMode>
459struct static_meta_of<ThreadLocalPtr<T, Tag, AccessMode>> {
460 using type = StaticMeta<Tag, AccessMode>;
461};
462
463template <typename T, typename Tag, typename AccessMode>
464struct static_meta_of<ThreadLocal<T, Tag, AccessMode>> {
465 using type = StaticMeta<Tag, AccessMode>;
466};
467
468} // namespace threadlocal_detail
469} // namespace folly
470