1 | /* |
2 | * Copyright 2011-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | /** |
18 | * Higher performance (up to 10x) atomic increment using thread caching. |
19 | * |
20 | * @author Spencer Ahrens (sahrens) |
21 | */ |
22 | |
23 | #pragma once |
24 | |
25 | #include <atomic> |
26 | |
27 | #include <boost/noncopyable.hpp> |
28 | |
29 | #include <folly/Likely.h> |
30 | #include <folly/ThreadLocal.h> |
31 | |
32 | namespace folly { |
33 | |
34 | // Note that readFull requires holding a lock and iterating through all of the |
35 | // thread local objects with the same Tag, so if you have a lot of |
36 | // ThreadCachedInt's you should considering breaking up the Tag space even |
37 | // further. |
38 | template <class IntT, class Tag = IntT> |
39 | class ThreadCachedInt : boost::noncopyable { |
40 | struct IntCache; |
41 | |
42 | public: |
43 | explicit ThreadCachedInt(IntT initialVal = 0, uint32_t cacheSize = 1000) |
44 | : target_(initialVal), cacheSize_(cacheSize) {} |
45 | |
46 | void increment(IntT inc) { |
47 | auto cache = cache_.get(); |
48 | if (UNLIKELY(cache == nullptr)) { |
49 | cache = new IntCache(*this); |
50 | cache_.reset(cache); |
51 | } |
52 | cache->increment(inc); |
53 | } |
54 | |
55 | // Quickly grabs the current value which may not include some cached |
56 | // increments. |
57 | IntT readFast() const { |
58 | return target_.load(std::memory_order_relaxed); |
59 | } |
60 | |
61 | // Reads the current value plus all the cached increments. Requires grabbing |
62 | // a lock, so this is significantly slower than readFast(). |
63 | IntT readFull() const { |
64 | // This could race with thread destruction and so the access lock should be |
65 | // acquired before reading the current value |
66 | const auto accessor = cache_.accessAllThreads(); |
67 | IntT ret = readFast(); |
68 | for (const auto& cache : accessor) { |
69 | if (!cache.reset_.load(std::memory_order_acquire)) { |
70 | ret += cache.val_.load(std::memory_order_relaxed); |
71 | } |
72 | } |
73 | return ret; |
74 | } |
75 | |
76 | // Quickly reads and resets current value (doesn't reset cached increments). |
77 | IntT readFastAndReset() { |
78 | return target_.exchange(0, std::memory_order_release); |
79 | } |
80 | |
81 | // This function is designed for accumulating into another counter, where you |
82 | // only want to count each increment once. It can still get the count a |
83 | // little off, however, but it should be much better than calling readFull() |
84 | // and set(0) sequentially. |
85 | IntT readFullAndReset() { |
86 | // This could race with thread destruction and so the access lock should be |
87 | // acquired before reading the current value |
88 | auto accessor = cache_.accessAllThreads(); |
89 | IntT ret = readFastAndReset(); |
90 | for (auto& cache : accessor) { |
91 | if (!cache.reset_.load(std::memory_order_acquire)) { |
92 | ret += cache.val_.load(std::memory_order_relaxed); |
93 | cache.reset_.store(true, std::memory_order_release); |
94 | } |
95 | } |
96 | return ret; |
97 | } |
98 | |
99 | void setCacheSize(uint32_t newSize) { |
100 | cacheSize_.store(newSize, std::memory_order_release); |
101 | } |
102 | |
103 | uint32_t getCacheSize() const { |
104 | return cacheSize_.load(); |
105 | } |
106 | |
107 | ThreadCachedInt& operator+=(IntT inc) { |
108 | increment(inc); |
109 | return *this; |
110 | } |
111 | ThreadCachedInt& operator-=(IntT inc) { |
112 | increment(-inc); |
113 | return *this; |
114 | } |
115 | // pre-increment (we don't support post-increment) |
116 | ThreadCachedInt& operator++() { |
117 | increment(1); |
118 | return *this; |
119 | } |
120 | ThreadCachedInt& operator--() { |
121 | increment(IntT(-1)); |
122 | return *this; |
123 | } |
124 | |
125 | // Thread-safe set function. |
126 | // This is a best effort implementation. In some edge cases, there could be |
127 | // data loss (missing counts) |
128 | void set(IntT newVal) { |
129 | for (auto& cache : cache_.accessAllThreads()) { |
130 | cache.reset_.store(true, std::memory_order_release); |
131 | } |
132 | target_.store(newVal, std::memory_order_release); |
133 | } |
134 | |
135 | private: |
136 | std::atomic<IntT> target_; |
137 | std::atomic<uint32_t> cacheSize_; |
138 | ThreadLocalPtr<IntCache, Tag, AccessModeStrict> |
139 | cache_; // Must be last for dtor ordering |
140 | |
141 | // This should only ever be modified by one thread |
142 | struct IntCache { |
143 | ThreadCachedInt* parent_; |
144 | mutable std::atomic<IntT> val_; |
145 | mutable uint32_t numUpdates_; |
146 | std::atomic<bool> reset_; |
147 | |
148 | explicit IntCache(ThreadCachedInt& parent) |
149 | : parent_(&parent), val_(0), numUpdates_(0), reset_(false) {} |
150 | |
151 | void increment(IntT inc) { |
152 | if (LIKELY(!reset_.load(std::memory_order_acquire))) { |
153 | // This thread is the only writer to val_, so it's fine do do |
154 | // a relaxed load and do the addition non-atomically. |
155 | val_.store( |
156 | val_.load(std::memory_order_relaxed) + inc, |
157 | std::memory_order_release); |
158 | } else { |
159 | val_.store(inc, std::memory_order_relaxed); |
160 | reset_.store(false, std::memory_order_release); |
161 | } |
162 | ++numUpdates_; |
163 | if (UNLIKELY( |
164 | numUpdates_ > |
165 | parent_->cacheSize_.load(std::memory_order_acquire))) { |
166 | flush(); |
167 | } |
168 | } |
169 | |
170 | void flush() const { |
171 | parent_->target_.fetch_add(val_, std::memory_order_release); |
172 | val_.store(0, std::memory_order_release); |
173 | numUpdates_ = 0; |
174 | } |
175 | |
176 | ~IntCache() { |
177 | flush(); |
178 | } |
179 | }; |
180 | }; |
181 | |
182 | } // namespace folly |
183 | |