1/*
2 * Copyright 2012-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <folly/memory/ThreadCachedArena.h>
18
19#include <algorithm>
20#include <iterator>
21#include <map>
22#include <mutex>
23#include <random>
24#include <thread>
25#include <unordered_map>
26
27#include <glog/logging.h>
28
29#include <folly/Benchmark.h>
30#include <folly/Memory.h>
31#include <folly/Range.h>
32#include <folly/lang/Align.h>
33#include <folly/portability/GTest.h>
34
35using namespace folly;
36
37namespace {
38
39class ArenaTester {
40 public:
41 explicit ArenaTester(ThreadCachedArena& arena) : arena_(&arena) {}
42
43 void allocate(size_t count, size_t maxSize);
44 void verify();
45 void merge(ArenaTester&& other);
46
47 private:
48 std::mutex mergeMutex_;
49 std::vector<std::pair<uint8_t, Range<uint8_t*>>> areas_;
50 ThreadCachedArena* arena_;
51};
52
53void ArenaTester::allocate(size_t count, size_t maxSize) {
54 // Allocate chunks of memory of random sizes
55 std::random_device rd{};
56 std::mt19937 rnd{rd()};
57 std::uniform_int_distribution<uint32_t> sizeDist(1, maxSize - 1);
58 areas_.clear();
59 areas_.reserve(count);
60 for (size_t i = 0; i < count; i++) {
61 size_t size = sizeDist(rnd);
62 uint8_t* p = static_cast<uint8_t*>(arena_->allocate(size));
63 areas_.emplace_back(uint8_t(rnd() & 0xff), Range<uint8_t*>(p, size));
64 }
65
66 // Fill each area with a different value, to prove that they don't overlap
67 // Fill in random order.
68 std::shuffle(areas_.begin(), areas_.end(), rnd);
69
70 for (auto& p : areas_) {
71 std::fill(p.second.begin(), p.second.end(), p.first);
72 }
73}
74
75void ArenaTester::verify() {
76 for (auto& p : areas_) {
77 for (auto v : p.second) {
78 EXPECT_EQ(p.first, v);
79 }
80 }
81}
82
83void ArenaTester::merge(ArenaTester&& other) {
84 {
85 std::lock_guard<std::mutex> lock(mergeMutex_);
86 std::move(
87 other.areas_.begin(), other.areas_.end(), std::back_inserter(areas_));
88 }
89 other.areas_.clear();
90}
91
92} // namespace
93
94TEST(ThreadCachedArena, BlockSize) {
95 static const size_t alignment = max_align_v;
96 static const size_t requestedBlockSize = 64;
97
98 ThreadCachedArena arena(requestedBlockSize);
99 size_t blockSize = alignment;
100 uint8_t* prev = static_cast<uint8_t*>(arena.allocate(1));
101
102 // Keep allocating until we're no longer one single alignment away from the
103 // previous allocation -- that's when we've gotten to the next block.
104 uint8_t* p;
105 while ((p = static_cast<uint8_t*>(arena.allocate(1))) == prev + alignment) {
106 prev = p;
107 blockSize += alignment;
108 }
109
110 VLOG(1) << "Requested block size: " << requestedBlockSize
111 << ", actual: " << blockSize;
112 EXPECT_LE(requestedBlockSize, blockSize);
113}
114
115TEST(ThreadCachedArena, SingleThreaded) {
116 static const size_t requestedBlockSize = 64;
117 ThreadCachedArena arena(requestedBlockSize);
118 EXPECT_EQ(arena.totalSize(), sizeof(ThreadCachedArena));
119
120 ArenaTester tester(arena);
121 tester.allocate(100, 100 << 10);
122 tester.verify();
123
124 EXPECT_GT(arena.totalSize(), sizeof(ThreadCachedArena));
125}
126
127TEST(ThreadCachedArena, MultiThreaded) {
128 static const size_t requestedBlockSize = 64;
129 ThreadCachedArena arena(requestedBlockSize);
130 ArenaTester mainTester(arena);
131
132 // Do this twice, to catch the possibility that memory from the first
133 // round gets freed
134 static const size_t numThreads = 20;
135 for (size_t i = 0; i < 2; i++) {
136 std::vector<std::thread> threads;
137 threads.reserve(numThreads);
138 for (size_t j = 0; j < numThreads; j++) {
139 threads.emplace_back([&arena, &mainTester]() {
140 ArenaTester tester(arena);
141 tester.allocate(500, 1 << 10);
142 tester.verify();
143 mainTester.merge(std::move(tester));
144 });
145 }
146 for (auto& t : threads) {
147 t.join();
148 }
149 }
150
151 mainTester.verify();
152}
153
154TEST(ThreadCachedArena, ThreadCachedArenaAllocator) {
155 using Map = std::unordered_map<
156 int,
157 int,
158 std::hash<int>,
159 std::equal_to<int>,
160 ThreadCachedArenaAllocator<std::pair<const int, int>>>;
161
162 static const size_t requestedBlockSize = 64;
163 ThreadCachedArena arena(requestedBlockSize);
164
165 Map map{0,
166 std::hash<int>(),
167 std::equal_to<int>(),
168 ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)};
169
170 for (int i = 0; i < 1000; i++) {
171 map[i] = i;
172 }
173
174 for (int i = 0; i < 1000; i++) {
175 EXPECT_EQ(i, map[i]);
176 }
177}
178
179namespace {
180
181static const int kNumValues = 10000;
182
183BENCHMARK(bmUMStandard, iters) {
184 using Map = std::unordered_map<int, int>;
185
186 while (iters--) {
187 Map map{0};
188 for (int i = 0; i < kNumValues; i++) {
189 map[i] = i;
190 }
191 }
192}
193
194BENCHMARK(bmUMArena, iters) {
195 using Map = std::unordered_map<
196 int,
197 int,
198 std::hash<int>,
199 std::equal_to<int>,
200 ThreadCachedArenaAllocator<std::pair<const int, int>>>;
201
202 while (iters--) {
203 ThreadCachedArena arena;
204
205 Map map{0,
206 std::hash<int>(),
207 std::equal_to<int>(),
208 ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)};
209
210 for (int i = 0; i < kNumValues; i++) {
211 map[i] = i;
212 }
213 }
214}
215
216BENCHMARK_DRAW_LINE();
217
218BENCHMARK(bmMStandard, iters) {
219 using Map = std::map<int, int>;
220
221 while (iters--) {
222 Map map;
223 for (int i = 0; i < kNumValues; i++) {
224 map[i] = i;
225 }
226 }
227}
228
229BENCHMARK_DRAW_LINE();
230
231BENCHMARK(bmMArena, iters) {
232 using Map = std::map<
233 int,
234 int,
235 std::less<int>,
236 ThreadCachedArenaAllocator<std::pair<const int, int>>>;
237
238 while (iters--) {
239 ThreadCachedArena arena;
240
241 Map map{std::less<int>(),
242 ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)};
243
244 for (int i = 0; i < kNumValues; i++) {
245 map[i] = i;
246 }
247 }
248}
249
250BENCHMARK_DRAW_LINE();
251
252} // namespace
253
254// Benchmark Iters Total t t/iter iter/sec
255// ----------------------------------------------------------------------------
256// Comparing benchmarks: bmUMStandard,bmUMArena
257// + 143% bmUMStandard 1570 2.005 s 1.277 ms 782.9
258// * bmUMArena 3817 2.003 s 524.7 us 1.861 k
259// ----------------------------------------------------------------------------
260// Comparing benchmarks: bmMStandard,bmMArena
261// +79.0% bmMStandard 1197 2.009 s 1.678 ms 595.8
262// * bmMArena 2135 2.002 s 937.6 us 1.042 k
263// ----------------------------------------------------------------------------
264
265int main(int argc, char* argv[]) {
266 testing::InitGoogleTest(&argc, argv);
267 gflags::ParseCommandLineFlags(&argc, &argv, true);
268 auto ret = RUN_ALL_TESTS();
269 if (!ret && FLAGS_benchmark) {
270 folly::runBenchmarks();
271 }
272 return ret;
273}
274