1 | /* |
2 | * Copyright 2013-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <algorithm> |
18 | #include <limits> |
19 | #include <numeric> |
20 | #include <random> |
21 | #include <vector> |
22 | |
23 | #include <folly/Benchmark.h> |
24 | #include <folly/experimental/EliasFanoCoding.h> |
25 | #include <folly/experimental/Select64.h> |
26 | #include <folly/experimental/test/CodingTestUtils.h> |
27 | #include <folly/init/Init.h> |
28 | |
29 | using namespace folly::compression; |
30 | |
31 | namespace { |
32 | |
33 | uint8_t slowDefaultNumLowerBits(size_t upperBound, size_t size) { |
34 | if (size == 0 || upperBound < size) { |
35 | return 0; |
36 | } |
37 | // floor(log(upperBound / size)); |
38 | return uint8_t(folly::findLastSet(upperBound / size) - 1); |
39 | } |
40 | |
41 | } // namespace |
42 | |
43 | TEST(EliasFanoCoding, defaultNumLowerBits) { |
44 | // Verify that slowDefaultNumLowerBits and optimized |
45 | // Encoder::defaultNumLowerBits agree. |
46 | static constexpr size_t kNumIterations = 2500; |
47 | auto compare = [](size_t upperBound, size_t size) { |
48 | using Encoder = EliasFanoEncoderV2<size_t>; |
49 | EXPECT_EQ( |
50 | int(slowDefaultNumLowerBits(upperBound, size)), |
51 | int(Encoder::defaultNumLowerBits(upperBound, size))) |
52 | << upperBound << " " << size; |
53 | }; |
54 | auto batch = [&compare](size_t initialUpperBound) { |
55 | for (size_t upperBound = initialUpperBound, i = 0; i < kNumIterations; |
56 | ++i, --upperBound) { |
57 | // Test "size" values close to "upperBound". |
58 | for (size_t size = upperBound, j = 0; j < kNumIterations; ++j, --size) { |
59 | compare(upperBound, size); |
60 | } |
61 | // Sample "size" values between [0, upperBound]. |
62 | for (size_t size = upperBound; size > 1 + upperBound / kNumIterations; |
63 | size -= 1 + upperBound / kNumIterations) { |
64 | compare(upperBound, size); |
65 | } |
66 | // Test "size" values close to 0. |
67 | for (size_t size = 0; size < kNumIterations; ++size) { |
68 | compare(upperBound, size); |
69 | } |
70 | } |
71 | }; |
72 | batch(std::numeric_limits<size_t>::max()); |
73 | batch(kNumIterations + 1312213123); |
74 | batch(kNumIterations); |
75 | |
76 | std::mt19937 gen; |
77 | std::uniform_int_distribution<size_t> distribution; |
78 | for (size_t i = 0; i < kNumIterations; ++i) { |
79 | const auto a = distribution(gen); |
80 | const auto b = distribution(gen); |
81 | compare(std::max(a, b), std::min(a, b)); |
82 | } |
83 | } |
84 | |
85 | class EliasFanoCodingTest : public ::testing::Test { |
86 | public: |
87 | void doTestEmpty() { |
88 | typedef EliasFanoEncoderV2<uint32_t, size_t> Encoder; |
89 | typedef EliasFanoReader<Encoder> Reader; |
90 | testEmpty<Reader, Encoder>(); |
91 | } |
92 | |
93 | template <size_t kSkipQuantum, size_t kForwardQuantum, class SizeType> |
94 | void doTestAll() { |
95 | typedef EliasFanoEncoderV2< |
96 | uint32_t, |
97 | uint32_t, |
98 | kSkipQuantum, |
99 | kForwardQuantum> |
100 | Encoder; |
101 | using Reader = |
102 | EliasFanoReader<Encoder, instructions::Default, false, SizeType>; |
103 | testAll<Reader, Encoder>({0}); |
104 | testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000)); |
105 | testAll<Reader, Encoder>(generateSeqList(1, 100000, 100)); |
106 | } |
107 | }; |
108 | |
109 | TEST_F(EliasFanoCodingTest, Empty) { |
110 | doTestEmpty(); |
111 | } |
112 | |
113 | TEST_F(EliasFanoCodingTest, Simple) { |
114 | doTestAll<0, 0, uint32_t>(); |
115 | doTestAll<0, 0, size_t>(); |
116 | } |
117 | |
118 | TEST_F(EliasFanoCodingTest, SkipPointers) { |
119 | doTestAll<128, 0, uint32_t>(); |
120 | doTestAll<128, 0, size_t>(); |
121 | } |
122 | |
123 | TEST_F(EliasFanoCodingTest, ForwardPointers) { |
124 | doTestAll<0, 128, uint32_t>(); |
125 | doTestAll<0, 128, size_t>(); |
126 | } |
127 | |
128 | TEST_F(EliasFanoCodingTest, SkipForwardPointers) { |
129 | doTestAll<128, 128, uint32_t>(); |
130 | doTestAll<128, 128, size_t>(); |
131 | } |
132 | |
133 | TEST_F(EliasFanoCodingTest, BugLargeGapInUpperBits) { // t16274876 |
134 | typedef EliasFanoEncoderV2<uint32_t, uint32_t, 2, 2> Encoder; |
135 | typedef EliasFanoReader<Encoder, instructions::Default> Reader; |
136 | constexpr uint32_t kLargeValue = 127; |
137 | |
138 | // Build a list where the upper bits have a large gap after the |
139 | // first element, so that we need to reposition in the upper bits |
140 | // using skips to position the iterator on the second element. |
141 | std::vector<uint32_t> data = {0, kLargeValue}; |
142 | for (uint32_t i = 0; i < kLargeValue; ++i) { |
143 | data.push_back(data.back() + 1); |
144 | } |
145 | auto list = Encoder::encode(data.begin(), data.end()); |
146 | |
147 | { |
148 | Reader reader(list); |
149 | ASSERT_TRUE(reader.skipTo(kLargeValue - 1)); |
150 | ASSERT_EQ(kLargeValue, reader.value()); |
151 | ASSERT_EQ(0, reader.previousValue()); |
152 | } |
153 | |
154 | list.free(); |
155 | } |
156 | |
157 | namespace bm { |
158 | |
159 | typedef EliasFanoEncoderV2<uint32_t, uint32_t, 128, 128> Encoder; |
160 | |
161 | std::vector<uint32_t> data; |
162 | std::vector<size_t> order; |
163 | |
164 | std::vector<uint32_t> encodeSmallData; |
165 | std::vector<uint32_t> encodeLargeData; |
166 | |
167 | std::vector<std::pair<size_t, size_t>> numLowerBitsInput; |
168 | |
169 | typename Encoder::MutableCompressedList list; |
170 | |
171 | void init() { |
172 | std::mt19937 gen; |
173 | |
174 | data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen); |
175 | list = Encoder::encode(data.begin(), data.end()); |
176 | |
177 | order.resize(data.size()); |
178 | std::iota(order.begin(), order.end(), size_t()); |
179 | std::shuffle(order.begin(), order.end(), gen); |
180 | |
181 | encodeSmallData = generateRandomList(10, 100 * 1000, gen); |
182 | encodeLargeData = generateRandomList(1000 * 1000, 100 * 1000 * 1000, gen); |
183 | |
184 | std::uniform_int_distribution<size_t> distribution; |
185 | for (size_t i = 0; i < 10000; ++i) { |
186 | const auto a = distribution(gen); |
187 | const auto b = distribution(gen); |
188 | numLowerBitsInput.emplace_back(std::max(a, b), std::min(a, b)); |
189 | } |
190 | } |
191 | |
192 | void free() { |
193 | list.free(); |
194 | } |
195 | |
196 | } // namespace bm |
197 | |
198 | BENCHMARK(Next, iters) { |
199 | dispatchInstructions([&](auto instructions) { |
200 | bmNext<EliasFanoReader<bm::Encoder, decltype(instructions)>>( |
201 | bm::list, bm::data, iters); |
202 | }); |
203 | } |
204 | |
205 | size_t Skip_ForwardQ128(size_t iters, size_t logAvgSkip) { |
206 | dispatchInstructions([&](auto instructions) { |
207 | bmSkip<EliasFanoReader<bm::Encoder, decltype(instructions)>>( |
208 | bm::list, bm::data, logAvgSkip, iters); |
209 | }); |
210 | return iters; |
211 | } |
212 | |
213 | BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1, 0) |
214 | BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 2, 1) |
215 | BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 4_pm_1, 2) |
216 | BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 16_pm_4, 4) |
217 | BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 64_pm_16, 6) |
218 | BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 256_pm_64, 8) |
219 | BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1024_pm_256, 10) |
220 | |
221 | BENCHMARK(Jump_ForwardQ128, iters) { |
222 | dispatchInstructions([&](auto instructions) { |
223 | bmJump<EliasFanoReader<bm::Encoder, decltype(instructions)>>( |
224 | bm::list, bm::data, bm::order, iters); |
225 | }); |
226 | } |
227 | |
228 | BENCHMARK_DRAW_LINE(); |
229 | |
230 | size_t SkipTo_SkipQ128(size_t iters, size_t logAvgSkip) { |
231 | dispatchInstructions([&](auto instructions) { |
232 | bmSkipTo<EliasFanoReader<bm::Encoder, decltype(instructions)>>( |
233 | bm::list, bm::data, logAvgSkip, iters); |
234 | }); |
235 | return iters; |
236 | } |
237 | |
238 | BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1, 0) |
239 | BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 2, 1) |
240 | BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 4_pm_1, 2) |
241 | BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 16_pm_4, 4) |
242 | BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 64_pm_16, 6) |
243 | BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 256_pm_64, 8) |
244 | BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1024_pm_256, 10) |
245 | |
246 | BENCHMARK(JumpTo_SkipQ128, iters) { |
247 | dispatchInstructions([&](auto instructions) { |
248 | bmJumpTo<EliasFanoReader<bm::Encoder, decltype(instructions)>>( |
249 | bm::list, bm::data, bm::order, iters); |
250 | }); |
251 | } |
252 | |
253 | BENCHMARK_DRAW_LINE(); |
254 | |
255 | BENCHMARK(Encode_10) { |
256 | auto list = bm::Encoder::encode( |
257 | bm::encodeSmallData.begin(), bm::encodeSmallData.end()); |
258 | list.free(); |
259 | } |
260 | |
261 | BENCHMARK(Encode) { |
262 | auto list = bm::Encoder::encode( |
263 | bm::encodeLargeData.begin(), bm::encodeLargeData.end()); |
264 | list.free(); |
265 | } |
266 | |
267 | BENCHMARK_DRAW_LINE(); |
268 | |
269 | BENCHMARK(defaultNumLowerBits, iters) { |
270 | using Encoder = EliasFanoEncoderV2<size_t>; |
271 | |
272 | size_t i = 0; |
273 | while (iters--) { |
274 | const auto& p = bm::numLowerBitsInput[i]; |
275 | folly::doNotOptimizeAway(Encoder::defaultNumLowerBits(p.first, p.second)); |
276 | if (++i == bm::numLowerBitsInput.size()) { |
277 | i = 0; |
278 | } |
279 | } |
280 | } |
281 | |
282 | BENCHMARK(slowDefaultNumLowerBits, iters) { |
283 | size_t i = 0; |
284 | while (iters--) { |
285 | const auto& p = bm::numLowerBitsInput[i]; |
286 | folly::doNotOptimizeAway(slowDefaultNumLowerBits(p.first, p.second)); |
287 | if (++i == bm::numLowerBitsInput.size()) { |
288 | i = 0; |
289 | } |
290 | } |
291 | } |
292 | |
293 | #if 0 |
294 | // Intel(R) Xeon(R) CPU E5-2678 v3 @ 2.50GHz (turbo on), |
295 | // Using GCC 5 with --bm_min_usec 100000. |
296 | V1008 12:29:33.646595 87744 Instructions.h:161] Will use folly::compression::instructions::Haswell |
297 | ============================================================================ |
298 | folly/experimental/test/EliasFanoCodingTest.cpp relative time/iter iters/s |
299 | ============================================================================ |
300 | Next 2.47ns 405.58M |
301 | Skip_ForwardQ128(1) 6.68ns 149.67M |
302 | Skip_ForwardQ128(2) 7.67ns 130.30M |
303 | Skip_ForwardQ128(4_pm_1) 9.12ns 109.65M |
304 | Skip_ForwardQ128(16_pm_4) 9.95ns 100.53M |
305 | Skip_ForwardQ128(64_pm_16) 12.76ns 78.40M |
306 | Skip_ForwardQ128(256_pm_64) 18.09ns 55.27M |
307 | Skip_ForwardQ128(1024_pm_256) 19.13ns 52.28M |
308 | Jump_ForwardQ128 20.27ns 49.33M |
309 | ---------------------------------------------------------------------------- |
310 | SkipTo_SkipQ128(1) 8.35ns 119.76M |
311 | SkipTo_SkipQ128(2) 12.37ns 80.85M |
312 | SkipTo_SkipQ128(4_pm_1) 15.05ns 66.44M |
313 | SkipTo_SkipQ128(16_pm_4) 22.90ns 43.66M |
314 | SkipTo_SkipQ128(64_pm_16) 34.11ns 29.31M |
315 | SkipTo_SkipQ128(256_pm_64) 38.68ns 25.85M |
316 | SkipTo_SkipQ128(1024_pm_256) 41.75ns 23.95M |
317 | JumpTo_SkipQ128 44.79ns 22.33M |
318 | ---------------------------------------------------------------------------- |
319 | Encode_10 120.33ns 8.31M |
320 | Encode 7.61ms 131.32 |
321 | ---------------------------------------------------------------------------- |
322 | defaultNumLowerBits 3.69ns 270.74M |
323 | slowDefaultNumLowerBits 10.90ns 91.73M |
324 | ============================================================================ |
325 | #endif |
326 | |
327 | int main(int argc, char** argv) { |
328 | testing::InitGoogleTest(&argc, argv); |
329 | folly::init(&argc, &argv); |
330 | gflags::ParseCommandLineFlags(&argc, &argv, true); |
331 | |
332 | auto ret = RUN_ALL_TESTS(); |
333 | if (ret == 0 && FLAGS_benchmark) { |
334 | bm::init(); |
335 | folly::runBenchmarks(); |
336 | bm::free(); |
337 | } |
338 | |
339 | return ret; |
340 | } |
341 | |