1/*
2 * Copyright 2013-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <algorithm>
18#include <limits>
19#include <numeric>
20#include <random>
21#include <vector>
22
23#include <folly/Benchmark.h>
24#include <folly/experimental/EliasFanoCoding.h>
25#include <folly/experimental/Select64.h>
26#include <folly/experimental/test/CodingTestUtils.h>
27#include <folly/init/Init.h>
28
29using namespace folly::compression;
30
31namespace {
32
33uint8_t slowDefaultNumLowerBits(size_t upperBound, size_t size) {
34 if (size == 0 || upperBound < size) {
35 return 0;
36 }
37 // floor(log(upperBound / size));
38 return uint8_t(folly::findLastSet(upperBound / size) - 1);
39}
40
41} // namespace
42
43TEST(EliasFanoCoding, defaultNumLowerBits) {
44 // Verify that slowDefaultNumLowerBits and optimized
45 // Encoder::defaultNumLowerBits agree.
46 static constexpr size_t kNumIterations = 2500;
47 auto compare = [](size_t upperBound, size_t size) {
48 using Encoder = EliasFanoEncoderV2<size_t>;
49 EXPECT_EQ(
50 int(slowDefaultNumLowerBits(upperBound, size)),
51 int(Encoder::defaultNumLowerBits(upperBound, size)))
52 << upperBound << " " << size;
53 };
54 auto batch = [&compare](size_t initialUpperBound) {
55 for (size_t upperBound = initialUpperBound, i = 0; i < kNumIterations;
56 ++i, --upperBound) {
57 // Test "size" values close to "upperBound".
58 for (size_t size = upperBound, j = 0; j < kNumIterations; ++j, --size) {
59 compare(upperBound, size);
60 }
61 // Sample "size" values between [0, upperBound].
62 for (size_t size = upperBound; size > 1 + upperBound / kNumIterations;
63 size -= 1 + upperBound / kNumIterations) {
64 compare(upperBound, size);
65 }
66 // Test "size" values close to 0.
67 for (size_t size = 0; size < kNumIterations; ++size) {
68 compare(upperBound, size);
69 }
70 }
71 };
72 batch(std::numeric_limits<size_t>::max());
73 batch(kNumIterations + 1312213123);
74 batch(kNumIterations);
75
76 std::mt19937 gen;
77 std::uniform_int_distribution<size_t> distribution;
78 for (size_t i = 0; i < kNumIterations; ++i) {
79 const auto a = distribution(gen);
80 const auto b = distribution(gen);
81 compare(std::max(a, b), std::min(a, b));
82 }
83}
84
85class EliasFanoCodingTest : public ::testing::Test {
86 public:
87 void doTestEmpty() {
88 typedef EliasFanoEncoderV2<uint32_t, size_t> Encoder;
89 typedef EliasFanoReader<Encoder> Reader;
90 testEmpty<Reader, Encoder>();
91 }
92
93 template <size_t kSkipQuantum, size_t kForwardQuantum, class SizeType>
94 void doTestAll() {
95 typedef EliasFanoEncoderV2<
96 uint32_t,
97 uint32_t,
98 kSkipQuantum,
99 kForwardQuantum>
100 Encoder;
101 using Reader =
102 EliasFanoReader<Encoder, instructions::Default, false, SizeType>;
103 testAll<Reader, Encoder>({0});
104 testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
105 testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
106 }
107};
108
109TEST_F(EliasFanoCodingTest, Empty) {
110 doTestEmpty();
111}
112
113TEST_F(EliasFanoCodingTest, Simple) {
114 doTestAll<0, 0, uint32_t>();
115 doTestAll<0, 0, size_t>();
116}
117
118TEST_F(EliasFanoCodingTest, SkipPointers) {
119 doTestAll<128, 0, uint32_t>();
120 doTestAll<128, 0, size_t>();
121}
122
123TEST_F(EliasFanoCodingTest, ForwardPointers) {
124 doTestAll<0, 128, uint32_t>();
125 doTestAll<0, 128, size_t>();
126}
127
128TEST_F(EliasFanoCodingTest, SkipForwardPointers) {
129 doTestAll<128, 128, uint32_t>();
130 doTestAll<128, 128, size_t>();
131}
132
133TEST_F(EliasFanoCodingTest, BugLargeGapInUpperBits) { // t16274876
134 typedef EliasFanoEncoderV2<uint32_t, uint32_t, 2, 2> Encoder;
135 typedef EliasFanoReader<Encoder, instructions::Default> Reader;
136 constexpr uint32_t kLargeValue = 127;
137
138 // Build a list where the upper bits have a large gap after the
139 // first element, so that we need to reposition in the upper bits
140 // using skips to position the iterator on the second element.
141 std::vector<uint32_t> data = {0, kLargeValue};
142 for (uint32_t i = 0; i < kLargeValue; ++i) {
143 data.push_back(data.back() + 1);
144 }
145 auto list = Encoder::encode(data.begin(), data.end());
146
147 {
148 Reader reader(list);
149 ASSERT_TRUE(reader.skipTo(kLargeValue - 1));
150 ASSERT_EQ(kLargeValue, reader.value());
151 ASSERT_EQ(0, reader.previousValue());
152 }
153
154 list.free();
155}
156
157namespace bm {
158
159typedef EliasFanoEncoderV2<uint32_t, uint32_t, 128, 128> Encoder;
160
161std::vector<uint32_t> data;
162std::vector<size_t> order;
163
164std::vector<uint32_t> encodeSmallData;
165std::vector<uint32_t> encodeLargeData;
166
167std::vector<std::pair<size_t, size_t>> numLowerBitsInput;
168
169typename Encoder::MutableCompressedList list;
170
171void init() {
172 std::mt19937 gen;
173
174 data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen);
175 list = Encoder::encode(data.begin(), data.end());
176
177 order.resize(data.size());
178 std::iota(order.begin(), order.end(), size_t());
179 std::shuffle(order.begin(), order.end(), gen);
180
181 encodeSmallData = generateRandomList(10, 100 * 1000, gen);
182 encodeLargeData = generateRandomList(1000 * 1000, 100 * 1000 * 1000, gen);
183
184 std::uniform_int_distribution<size_t> distribution;
185 for (size_t i = 0; i < 10000; ++i) {
186 const auto a = distribution(gen);
187 const auto b = distribution(gen);
188 numLowerBitsInput.emplace_back(std::max(a, b), std::min(a, b));
189 }
190}
191
192void free() {
193 list.free();
194}
195
196} // namespace bm
197
198BENCHMARK(Next, iters) {
199 dispatchInstructions([&](auto instructions) {
200 bmNext<EliasFanoReader<bm::Encoder, decltype(instructions)>>(
201 bm::list, bm::data, iters);
202 });
203}
204
205size_t Skip_ForwardQ128(size_t iters, size_t logAvgSkip) {
206 dispatchInstructions([&](auto instructions) {
207 bmSkip<EliasFanoReader<bm::Encoder, decltype(instructions)>>(
208 bm::list, bm::data, logAvgSkip, iters);
209 });
210 return iters;
211}
212
213BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1, 0)
214BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 2, 1)
215BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 4_pm_1, 2)
216BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 16_pm_4, 4)
217BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 64_pm_16, 6)
218BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 256_pm_64, 8)
219BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1024_pm_256, 10)
220
221BENCHMARK(Jump_ForwardQ128, iters) {
222 dispatchInstructions([&](auto instructions) {
223 bmJump<EliasFanoReader<bm::Encoder, decltype(instructions)>>(
224 bm::list, bm::data, bm::order, iters);
225 });
226}
227
228BENCHMARK_DRAW_LINE();
229
230size_t SkipTo_SkipQ128(size_t iters, size_t logAvgSkip) {
231 dispatchInstructions([&](auto instructions) {
232 bmSkipTo<EliasFanoReader<bm::Encoder, decltype(instructions)>>(
233 bm::list, bm::data, logAvgSkip, iters);
234 });
235 return iters;
236}
237
238BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1, 0)
239BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 2, 1)
240BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 4_pm_1, 2)
241BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 16_pm_4, 4)
242BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 64_pm_16, 6)
243BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 256_pm_64, 8)
244BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1024_pm_256, 10)
245
246BENCHMARK(JumpTo_SkipQ128, iters) {
247 dispatchInstructions([&](auto instructions) {
248 bmJumpTo<EliasFanoReader<bm::Encoder, decltype(instructions)>>(
249 bm::list, bm::data, bm::order, iters);
250 });
251}
252
253BENCHMARK_DRAW_LINE();
254
255BENCHMARK(Encode_10) {
256 auto list = bm::Encoder::encode(
257 bm::encodeSmallData.begin(), bm::encodeSmallData.end());
258 list.free();
259}
260
261BENCHMARK(Encode) {
262 auto list = bm::Encoder::encode(
263 bm::encodeLargeData.begin(), bm::encodeLargeData.end());
264 list.free();
265}
266
267BENCHMARK_DRAW_LINE();
268
269BENCHMARK(defaultNumLowerBits, iters) {
270 using Encoder = EliasFanoEncoderV2<size_t>;
271
272 size_t i = 0;
273 while (iters--) {
274 const auto& p = bm::numLowerBitsInput[i];
275 folly::doNotOptimizeAway(Encoder::defaultNumLowerBits(p.first, p.second));
276 if (++i == bm::numLowerBitsInput.size()) {
277 i = 0;
278 }
279 }
280}
281
282BENCHMARK(slowDefaultNumLowerBits, iters) {
283 size_t i = 0;
284 while (iters--) {
285 const auto& p = bm::numLowerBitsInput[i];
286 folly::doNotOptimizeAway(slowDefaultNumLowerBits(p.first, p.second));
287 if (++i == bm::numLowerBitsInput.size()) {
288 i = 0;
289 }
290 }
291}
292
293#if 0
294// Intel(R) Xeon(R) CPU E5-2678 v3 @ 2.50GHz (turbo on),
295// Using GCC 5 with --bm_min_usec 100000.
296V1008 12:29:33.646595 87744 Instructions.h:161] Will use folly::compression::instructions::Haswell
297============================================================================
298folly/experimental/test/EliasFanoCodingTest.cpp relative time/iter iters/s
299============================================================================
300Next 2.47ns 405.58M
301Skip_ForwardQ128(1) 6.68ns 149.67M
302Skip_ForwardQ128(2) 7.67ns 130.30M
303Skip_ForwardQ128(4_pm_1) 9.12ns 109.65M
304Skip_ForwardQ128(16_pm_4) 9.95ns 100.53M
305Skip_ForwardQ128(64_pm_16) 12.76ns 78.40M
306Skip_ForwardQ128(256_pm_64) 18.09ns 55.27M
307Skip_ForwardQ128(1024_pm_256) 19.13ns 52.28M
308Jump_ForwardQ128 20.27ns 49.33M
309----------------------------------------------------------------------------
310SkipTo_SkipQ128(1) 8.35ns 119.76M
311SkipTo_SkipQ128(2) 12.37ns 80.85M
312SkipTo_SkipQ128(4_pm_1) 15.05ns 66.44M
313SkipTo_SkipQ128(16_pm_4) 22.90ns 43.66M
314SkipTo_SkipQ128(64_pm_16) 34.11ns 29.31M
315SkipTo_SkipQ128(256_pm_64) 38.68ns 25.85M
316SkipTo_SkipQ128(1024_pm_256) 41.75ns 23.95M
317JumpTo_SkipQ128 44.79ns 22.33M
318----------------------------------------------------------------------------
319Encode_10 120.33ns 8.31M
320Encode 7.61ms 131.32
321----------------------------------------------------------------------------
322defaultNumLowerBits 3.69ns 270.74M
323slowDefaultNumLowerBits 10.90ns 91.73M
324============================================================================
325#endif
326
327int main(int argc, char** argv) {
328 testing::InitGoogleTest(&argc, argv);
329 folly::init(&argc, &argv);
330 gflags::ParseCommandLineFlags(&argc, &argv, true);
331
332 auto ret = RUN_ALL_TESTS();
333 if (ret == 0 && FLAGS_benchmark) {
334 bm::init();
335 folly::runBenchmarks();
336 bm::free();
337 }
338
339 return ret;
340}
341