1/*
2 * Copyright 2011-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * Higher performance (up to 10x) atomic increment using thread caching.
19 *
20 * @author Spencer Ahrens (sahrens)
21 */
22
23#pragma once
24
25#include <atomic>
26
27#include <boost/noncopyable.hpp>
28
29#include <folly/Likely.h>
30#include <folly/ThreadLocal.h>
31
32namespace folly {
33
34// Note that readFull requires holding a lock and iterating through all of the
35// thread local objects with the same Tag, so if you have a lot of
36// ThreadCachedInt's you should considering breaking up the Tag space even
37// further.
38template <class IntT, class Tag = IntT>
39class ThreadCachedInt : boost::noncopyable {
40 struct IntCache;
41
42 public:
43 explicit ThreadCachedInt(IntT initialVal = 0, uint32_t cacheSize = 1000)
44 : target_(initialVal), cacheSize_(cacheSize) {}
45
46 void increment(IntT inc) {
47 auto cache = cache_.get();
48 if (UNLIKELY(cache == nullptr)) {
49 cache = new IntCache(*this);
50 cache_.reset(cache);
51 }
52 cache->increment(inc);
53 }
54
55 // Quickly grabs the current value which may not include some cached
56 // increments.
57 IntT readFast() const {
58 return target_.load(std::memory_order_relaxed);
59 }
60
61 // Reads the current value plus all the cached increments. Requires grabbing
62 // a lock, so this is significantly slower than readFast().
63 IntT readFull() const {
64 // This could race with thread destruction and so the access lock should be
65 // acquired before reading the current value
66 const auto accessor = cache_.accessAllThreads();
67 IntT ret = readFast();
68 for (const auto& cache : accessor) {
69 if (!cache.reset_.load(std::memory_order_acquire)) {
70 ret += cache.val_.load(std::memory_order_relaxed);
71 }
72 }
73 return ret;
74 }
75
76 // Quickly reads and resets current value (doesn't reset cached increments).
77 IntT readFastAndReset() {
78 return target_.exchange(0, std::memory_order_release);
79 }
80
81 // This function is designed for accumulating into another counter, where you
82 // only want to count each increment once. It can still get the count a
83 // little off, however, but it should be much better than calling readFull()
84 // and set(0) sequentially.
85 IntT readFullAndReset() {
86 // This could race with thread destruction and so the access lock should be
87 // acquired before reading the current value
88 auto accessor = cache_.accessAllThreads();
89 IntT ret = readFastAndReset();
90 for (auto& cache : accessor) {
91 if (!cache.reset_.load(std::memory_order_acquire)) {
92 ret += cache.val_.load(std::memory_order_relaxed);
93 cache.reset_.store(true, std::memory_order_release);
94 }
95 }
96 return ret;
97 }
98
99 void setCacheSize(uint32_t newSize) {
100 cacheSize_.store(newSize, std::memory_order_release);
101 }
102
103 uint32_t getCacheSize() const {
104 return cacheSize_.load();
105 }
106
107 ThreadCachedInt& operator+=(IntT inc) {
108 increment(inc);
109 return *this;
110 }
111 ThreadCachedInt& operator-=(IntT inc) {
112 increment(-inc);
113 return *this;
114 }
115 // pre-increment (we don't support post-increment)
116 ThreadCachedInt& operator++() {
117 increment(1);
118 return *this;
119 }
120 ThreadCachedInt& operator--() {
121 increment(IntT(-1));
122 return *this;
123 }
124
125 // Thread-safe set function.
126 // This is a best effort implementation. In some edge cases, there could be
127 // data loss (missing counts)
128 void set(IntT newVal) {
129 for (auto& cache : cache_.accessAllThreads()) {
130 cache.reset_.store(true, std::memory_order_release);
131 }
132 target_.store(newVal, std::memory_order_release);
133 }
134
135 private:
136 std::atomic<IntT> target_;
137 std::atomic<uint32_t> cacheSize_;
138 ThreadLocalPtr<IntCache, Tag, AccessModeStrict>
139 cache_; // Must be last for dtor ordering
140
141 // This should only ever be modified by one thread
142 struct IntCache {
143 ThreadCachedInt* parent_;
144 mutable std::atomic<IntT> val_;
145 mutable uint32_t numUpdates_;
146 std::atomic<bool> reset_;
147
148 explicit IntCache(ThreadCachedInt& parent)
149 : parent_(&parent), val_(0), numUpdates_(0), reset_(false) {}
150
151 void increment(IntT inc) {
152 if (LIKELY(!reset_.load(std::memory_order_acquire))) {
153 // This thread is the only writer to val_, so it's fine do do
154 // a relaxed load and do the addition non-atomically.
155 val_.store(
156 val_.load(std::memory_order_relaxed) + inc,
157 std::memory_order_release);
158 } else {
159 val_.store(inc, std::memory_order_relaxed);
160 reset_.store(false, std::memory_order_release);
161 }
162 ++numUpdates_;
163 if (UNLIKELY(
164 numUpdates_ >
165 parent_->cacheSize_.load(std::memory_order_acquire))) {
166 flush();
167 }
168 }
169
170 void flush() const {
171 parent_->target_.fetch_add(val_, std::memory_order_release);
172 val_.store(0, std::memory_order_release);
173 numUpdates_ = 0;
174 }
175
176 ~IntCache() {
177 flush();
178 }
179 };
180};
181
182} // namespace folly
183