1 | /* |
2 | * Copyright 2012-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/memory/ThreadCachedArena.h> |
18 | |
19 | #include <algorithm> |
20 | #include <iterator> |
21 | #include <map> |
22 | #include <mutex> |
23 | #include <random> |
24 | #include <thread> |
25 | #include <unordered_map> |
26 | |
27 | #include <glog/logging.h> |
28 | |
29 | #include <folly/Benchmark.h> |
30 | #include <folly/Memory.h> |
31 | #include <folly/Range.h> |
32 | #include <folly/lang/Align.h> |
33 | #include <folly/portability/GTest.h> |
34 | |
35 | using namespace folly; |
36 | |
37 | namespace { |
38 | |
39 | class ArenaTester { |
40 | public: |
41 | explicit ArenaTester(ThreadCachedArena& arena) : arena_(&arena) {} |
42 | |
43 | void allocate(size_t count, size_t maxSize); |
44 | void verify(); |
45 | void merge(ArenaTester&& other); |
46 | |
47 | private: |
48 | std::mutex mergeMutex_; |
49 | std::vector<std::pair<uint8_t, Range<uint8_t*>>> areas_; |
50 | ThreadCachedArena* arena_; |
51 | }; |
52 | |
53 | void ArenaTester::allocate(size_t count, size_t maxSize) { |
54 | // Allocate chunks of memory of random sizes |
55 | std::random_device rd{}; |
56 | std::mt19937 rnd{rd()}; |
57 | std::uniform_int_distribution<uint32_t> sizeDist(1, maxSize - 1); |
58 | areas_.clear(); |
59 | areas_.reserve(count); |
60 | for (size_t i = 0; i < count; i++) { |
61 | size_t size = sizeDist(rnd); |
62 | uint8_t* p = static_cast<uint8_t*>(arena_->allocate(size)); |
63 | areas_.emplace_back(uint8_t(rnd() & 0xff), Range<uint8_t*>(p, size)); |
64 | } |
65 | |
66 | // Fill each area with a different value, to prove that they don't overlap |
67 | // Fill in random order. |
68 | std::shuffle(areas_.begin(), areas_.end(), rnd); |
69 | |
70 | for (auto& p : areas_) { |
71 | std::fill(p.second.begin(), p.second.end(), p.first); |
72 | } |
73 | } |
74 | |
75 | void ArenaTester::verify() { |
76 | for (auto& p : areas_) { |
77 | for (auto v : p.second) { |
78 | EXPECT_EQ(p.first, v); |
79 | } |
80 | } |
81 | } |
82 | |
83 | void ArenaTester::merge(ArenaTester&& other) { |
84 | { |
85 | std::lock_guard<std::mutex> lock(mergeMutex_); |
86 | std::move( |
87 | other.areas_.begin(), other.areas_.end(), std::back_inserter(areas_)); |
88 | } |
89 | other.areas_.clear(); |
90 | } |
91 | |
92 | } // namespace |
93 | |
94 | TEST(ThreadCachedArena, BlockSize) { |
95 | static const size_t alignment = max_align_v; |
96 | static const size_t requestedBlockSize = 64; |
97 | |
98 | ThreadCachedArena arena(requestedBlockSize); |
99 | size_t blockSize = alignment; |
100 | uint8_t* prev = static_cast<uint8_t*>(arena.allocate(1)); |
101 | |
102 | // Keep allocating until we're no longer one single alignment away from the |
103 | // previous allocation -- that's when we've gotten to the next block. |
104 | uint8_t* p; |
105 | while ((p = static_cast<uint8_t*>(arena.allocate(1))) == prev + alignment) { |
106 | prev = p; |
107 | blockSize += alignment; |
108 | } |
109 | |
110 | VLOG(1) << "Requested block size: " << requestedBlockSize |
111 | << ", actual: " << blockSize; |
112 | EXPECT_LE(requestedBlockSize, blockSize); |
113 | } |
114 | |
115 | TEST(ThreadCachedArena, SingleThreaded) { |
116 | static const size_t requestedBlockSize = 64; |
117 | ThreadCachedArena arena(requestedBlockSize); |
118 | EXPECT_EQ(arena.totalSize(), sizeof(ThreadCachedArena)); |
119 | |
120 | ArenaTester tester(arena); |
121 | tester.allocate(100, 100 << 10); |
122 | tester.verify(); |
123 | |
124 | EXPECT_GT(arena.totalSize(), sizeof(ThreadCachedArena)); |
125 | } |
126 | |
127 | TEST(ThreadCachedArena, MultiThreaded) { |
128 | static const size_t requestedBlockSize = 64; |
129 | ThreadCachedArena arena(requestedBlockSize); |
130 | ArenaTester mainTester(arena); |
131 | |
132 | // Do this twice, to catch the possibility that memory from the first |
133 | // round gets freed |
134 | static const size_t numThreads = 20; |
135 | for (size_t i = 0; i < 2; i++) { |
136 | std::vector<std::thread> threads; |
137 | threads.reserve(numThreads); |
138 | for (size_t j = 0; j < numThreads; j++) { |
139 | threads.emplace_back([&arena, &mainTester]() { |
140 | ArenaTester tester(arena); |
141 | tester.allocate(500, 1 << 10); |
142 | tester.verify(); |
143 | mainTester.merge(std::move(tester)); |
144 | }); |
145 | } |
146 | for (auto& t : threads) { |
147 | t.join(); |
148 | } |
149 | } |
150 | |
151 | mainTester.verify(); |
152 | } |
153 | |
154 | TEST(ThreadCachedArena, ThreadCachedArenaAllocator) { |
155 | using Map = std::unordered_map< |
156 | int, |
157 | int, |
158 | std::hash<int>, |
159 | std::equal_to<int>, |
160 | ThreadCachedArenaAllocator<std::pair<const int, int>>>; |
161 | |
162 | static const size_t requestedBlockSize = 64; |
163 | ThreadCachedArena arena(requestedBlockSize); |
164 | |
165 | Map map{0, |
166 | std::hash<int>(), |
167 | std::equal_to<int>(), |
168 | ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)}; |
169 | |
170 | for (int i = 0; i < 1000; i++) { |
171 | map[i] = i; |
172 | } |
173 | |
174 | for (int i = 0; i < 1000; i++) { |
175 | EXPECT_EQ(i, map[i]); |
176 | } |
177 | } |
178 | |
179 | namespace { |
180 | |
181 | static const int kNumValues = 10000; |
182 | |
183 | BENCHMARK(bmUMStandard, iters) { |
184 | using Map = std::unordered_map<int, int>; |
185 | |
186 | while (iters--) { |
187 | Map map{0}; |
188 | for (int i = 0; i < kNumValues; i++) { |
189 | map[i] = i; |
190 | } |
191 | } |
192 | } |
193 | |
194 | BENCHMARK(bmUMArena, iters) { |
195 | using Map = std::unordered_map< |
196 | int, |
197 | int, |
198 | std::hash<int>, |
199 | std::equal_to<int>, |
200 | ThreadCachedArenaAllocator<std::pair<const int, int>>>; |
201 | |
202 | while (iters--) { |
203 | ThreadCachedArena arena; |
204 | |
205 | Map map{0, |
206 | std::hash<int>(), |
207 | std::equal_to<int>(), |
208 | ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)}; |
209 | |
210 | for (int i = 0; i < kNumValues; i++) { |
211 | map[i] = i; |
212 | } |
213 | } |
214 | } |
215 | |
216 | BENCHMARK_DRAW_LINE(); |
217 | |
218 | BENCHMARK(bmMStandard, iters) { |
219 | using Map = std::map<int, int>; |
220 | |
221 | while (iters--) { |
222 | Map map; |
223 | for (int i = 0; i < kNumValues; i++) { |
224 | map[i] = i; |
225 | } |
226 | } |
227 | } |
228 | |
229 | BENCHMARK_DRAW_LINE(); |
230 | |
231 | BENCHMARK(bmMArena, iters) { |
232 | using Map = std::map< |
233 | int, |
234 | int, |
235 | std::less<int>, |
236 | ThreadCachedArenaAllocator<std::pair<const int, int>>>; |
237 | |
238 | while (iters--) { |
239 | ThreadCachedArena arena; |
240 | |
241 | Map map{std::less<int>(), |
242 | ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)}; |
243 | |
244 | for (int i = 0; i < kNumValues; i++) { |
245 | map[i] = i; |
246 | } |
247 | } |
248 | } |
249 | |
250 | BENCHMARK_DRAW_LINE(); |
251 | |
252 | } // namespace |
253 | |
254 | // Benchmark Iters Total t t/iter iter/sec |
255 | // ---------------------------------------------------------------------------- |
256 | // Comparing benchmarks: bmUMStandard,bmUMArena |
257 | // + 143% bmUMStandard 1570 2.005 s 1.277 ms 782.9 |
258 | // * bmUMArena 3817 2.003 s 524.7 us 1.861 k |
259 | // ---------------------------------------------------------------------------- |
260 | // Comparing benchmarks: bmMStandard,bmMArena |
261 | // +79.0% bmMStandard 1197 2.009 s 1.678 ms 595.8 |
262 | // * bmMArena 2135 2.002 s 937.6 us 1.042 k |
263 | // ---------------------------------------------------------------------------- |
264 | |
265 | int main(int argc, char* argv[]) { |
266 | testing::InitGoogleTest(&argc, argv); |
267 | gflags::ParseCommandLineFlags(&argc, &argv, true); |
268 | auto ret = RUN_ALL_TESTS(); |
269 | if (!ret && FLAGS_benchmark) { |
270 | folly::runBenchmarks(); |
271 | } |
272 | return ret; |
273 | } |
274 | |