1 | /* |
2 | * Copyright 2013-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #pragma once |
18 | |
19 | #include <algorithm> |
20 | #include <fstream> |
21 | #include <limits> |
22 | #include <random> |
23 | #include <string> |
24 | #include <unordered_set> |
25 | #include <vector> |
26 | |
27 | #include <glog/logging.h> |
28 | |
29 | #include <folly/Benchmark.h> |
30 | #include <folly/Likely.h> |
31 | #include <folly/Optional.h> |
32 | #include <folly/experimental/Instructions.h> |
33 | #include <folly/portability/GTest.h> |
34 | |
35 | namespace folly { |
36 | namespace compression { |
37 | |
38 | template <class URNG> |
39 | std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId, URNG&& g) { |
40 | CHECK_LT(n, 2 * maxId); |
41 | std::uniform_int_distribution<> uid(1, maxId); |
42 | std::unordered_set<uint32_t> dataset; |
43 | while (dataset.size() < n) { |
44 | uint32_t value = uid(g); |
45 | if (dataset.count(value) == 0) { |
46 | dataset.insert(value); |
47 | } |
48 | } |
49 | |
50 | std::vector<uint32_t> ids(dataset.begin(), dataset.end()); |
51 | std::sort(ids.begin(), ids.end()); |
52 | return ids; |
53 | } |
54 | |
55 | inline std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) { |
56 | std::mt19937 gen; |
57 | return generateRandomList(n, maxId, gen); |
58 | } |
59 | |
60 | inline std::vector<uint32_t> |
61 | generateSeqList(uint32_t minId, uint32_t maxId, uint32_t step = 1) { |
62 | CHECK_LE(minId, maxId); |
63 | CHECK_GT(step, 0); |
64 | std::vector<uint32_t> ids; |
65 | ids.reserve((maxId - minId) / step + 1); |
66 | for (uint32_t i = minId; i <= maxId; i += step) { |
67 | ids.push_back(i); |
68 | } |
69 | return ids; |
70 | } |
71 | |
72 | inline std::vector<uint32_t> loadList(const std::string& filename) { |
73 | std::ifstream fin(filename); |
74 | std::vector<uint32_t> result; |
75 | uint32_t id; |
76 | while (fin >> id) { |
77 | result.push_back(id); |
78 | } |
79 | return result; |
80 | } |
81 | |
82 | // Test previousValue only if Reader has it. |
83 | template <class... Args> |
84 | void maybeTestPreviousValue(Args&&...) {} |
85 | |
86 | // Make all the arguments template because if the types are not exact, |
87 | // the above overload will be picked (for example i could be size_t or |
88 | // ssize_t). |
89 | template <class Vector, class Reader, class Index> |
90 | auto maybeTestPreviousValue(const Vector& data, Reader& reader, Index i) |
91 | -> decltype(reader.previousValue(), void()) { |
92 | if (i != 0) { |
93 | EXPECT_EQ(reader.previousValue(), data[i - 1]); |
94 | } |
95 | } |
96 | |
97 | // Test previous only if Reader has it. |
98 | template <class... Args> |
99 | void maybeTestPrevious(Args&&...) {} |
100 | |
101 | // Make all the arguments template because if the types are not exact, |
102 | // the above overload will be picked (for example i could be size_t or |
103 | // ssize_t). |
104 | template <class Vector, class Reader, class Index> |
105 | auto maybeTestPrevious(const Vector& data, Reader& reader, Index i) |
106 | -> decltype(reader.previous(), void()) { |
107 | auto r = reader.previous(); |
108 | if (i != 0) { |
109 | EXPECT_TRUE(r); |
110 | EXPECT_EQ(reader.value(), data[i - 1]); |
111 | } else { |
112 | EXPECT_FALSE(r); |
113 | } |
114 | reader.next(); |
115 | EXPECT_EQ(reader.value(), data[i]); |
116 | } |
117 | |
118 | template <class Reader, class List> |
119 | void testNext(const std::vector<uint32_t>& data, const List& list) { |
120 | Reader reader(list); |
121 | EXPECT_FALSE(reader.valid()); |
122 | |
123 | for (size_t i = 0; i < data.size(); ++i) { |
124 | EXPECT_TRUE(reader.next()); |
125 | EXPECT_TRUE(reader.valid()); |
126 | EXPECT_EQ(reader.value(), data[i]); |
127 | EXPECT_EQ(reader.position(), i); |
128 | maybeTestPreviousValue(data, reader, i); |
129 | maybeTestPrevious(data, reader, i); |
130 | } |
131 | EXPECT_FALSE(reader.next()); |
132 | EXPECT_FALSE(reader.valid()); |
133 | EXPECT_EQ(reader.position(), reader.size()); |
134 | } |
135 | |
136 | template <class Reader, class List> |
137 | void testSkip( |
138 | const std::vector<uint32_t>& data, |
139 | const List& list, |
140 | size_t skipStep) { |
141 | CHECK_GT(skipStep, 0); |
142 | Reader reader(list); |
143 | |
144 | for (size_t i = skipStep - 1; i < data.size(); i += skipStep) { |
145 | EXPECT_TRUE(reader.skip(skipStep)); |
146 | EXPECT_TRUE(reader.valid()); |
147 | EXPECT_EQ(reader.value(), data[i]); |
148 | EXPECT_EQ(reader.position(), i); |
149 | maybeTestPreviousValue(data, reader, i); |
150 | maybeTestPrevious(data, reader, i); |
151 | } |
152 | EXPECT_FALSE(reader.skip(skipStep)); |
153 | EXPECT_FALSE(reader.valid()); |
154 | EXPECT_EQ(reader.position(), reader.size()); |
155 | EXPECT_FALSE(reader.next()); |
156 | } |
157 | |
158 | template <class Reader, class List> |
159 | void testSkip(const std::vector<uint32_t>& data, const List& list) { |
160 | for (size_t skipStep = 1; skipStep < 25; ++skipStep) { |
161 | testSkip<Reader, List>(data, list, skipStep); |
162 | } |
163 | for (size_t skipStep = 25; skipStep <= 500; skipStep += 25) { |
164 | testSkip<Reader, List>(data, list, skipStep); |
165 | } |
166 | } |
167 | |
168 | template <class Reader, class List> |
169 | void testSkipTo( |
170 | const std::vector<uint32_t>& data, |
171 | const List& list, |
172 | size_t skipToStep) { |
173 | CHECK_GT(skipToStep, 0); |
174 | Reader reader(list); |
175 | |
176 | const uint32_t delta = std::max<uint32_t>(1, data.back() / skipToStep); |
177 | uint32_t value = delta; |
178 | auto it = data.begin(); |
179 | while (true) { |
180 | it = std::lower_bound(it, data.end(), value); |
181 | if (it == data.end()) { |
182 | EXPECT_FALSE(reader.skipTo(value)); |
183 | break; |
184 | } |
185 | EXPECT_TRUE(reader.skipTo(value)); |
186 | EXPECT_TRUE(reader.valid()); |
187 | EXPECT_EQ(reader.value(), *it); |
188 | EXPECT_EQ(reader.position(), std::distance(data.begin(), it)); |
189 | value = reader.value() + delta; |
190 | maybeTestPreviousValue(data, reader, std::distance(data.begin(), it)); |
191 | maybeTestPrevious(data, reader, std::distance(data.begin(), it)); |
192 | } |
193 | EXPECT_FALSE(reader.valid()); |
194 | EXPECT_EQ(reader.position(), reader.size()); |
195 | EXPECT_FALSE(reader.next()); |
196 | } |
197 | |
198 | template <class Reader, class List> |
199 | void testSkipTo(const std::vector<uint32_t>& data, const List& list) { |
200 | for (size_t steps = 10; steps < 100; steps += 10) { |
201 | testSkipTo<Reader, List>(data, list, steps); |
202 | } |
203 | for (size_t steps = 100; steps <= 1000; steps += 100) { |
204 | testSkipTo<Reader, List>(data, list, steps); |
205 | } |
206 | testSkipTo<Reader, List>(data, list, std::numeric_limits<size_t>::max()); |
207 | { |
208 | // Skip to the first element. |
209 | Reader reader(list); |
210 | EXPECT_TRUE(reader.skipTo(data[0])); |
211 | EXPECT_EQ(reader.value(), data[0]); |
212 | EXPECT_EQ(reader.position(), 0); |
213 | } |
214 | { |
215 | // Skip past the last element. |
216 | Reader reader(list); |
217 | EXPECT_FALSE(reader.skipTo(data.back() + 1)); |
218 | EXPECT_FALSE(reader.valid()); |
219 | EXPECT_EQ(reader.position(), reader.size()); |
220 | EXPECT_FALSE(reader.next()); |
221 | } |
222 | { |
223 | // Skip to maximum integer. |
224 | Reader reader(list); |
225 | using ValueType = typename Reader::ValueType; |
226 | EXPECT_FALSE(reader.skipTo(std::numeric_limits<ValueType>::max())); |
227 | EXPECT_FALSE(reader.valid()); |
228 | EXPECT_EQ(reader.position(), reader.size()); |
229 | EXPECT_FALSE(reader.next()); |
230 | } |
231 | } |
232 | |
233 | template <class Reader, class List> |
234 | void testJump(const std::vector<uint32_t>& data, const List& list) { |
235 | std::mt19937 gen; |
236 | std::vector<size_t> is(data.size()); |
237 | for (size_t i = 0; i < data.size(); ++i) { |
238 | is[i] = i; |
239 | } |
240 | std::shuffle(is.begin(), is.end(), gen); |
241 | if (Reader::EncoderType::forwardQuantum == 0) { |
242 | is.resize(std::min<size_t>(is.size(), 100)); |
243 | } |
244 | |
245 | Reader reader(list); |
246 | for (auto i : is) { |
247 | EXPECT_TRUE(reader.jump(i)); |
248 | EXPECT_EQ(reader.value(), data[i]); |
249 | EXPECT_EQ(reader.position(), i); |
250 | maybeTestPreviousValue(data, reader, i); |
251 | maybeTestPrevious(data, reader, i); |
252 | } |
253 | EXPECT_FALSE(reader.jump(data.size())); |
254 | EXPECT_FALSE(reader.valid()); |
255 | EXPECT_EQ(reader.position(), reader.size()); |
256 | } |
257 | |
258 | template <class Reader, class List> |
259 | void testJumpTo(const std::vector<uint32_t>& data, const List& list) { |
260 | CHECK(!data.empty()); |
261 | |
262 | Reader reader(list); |
263 | |
264 | std::mt19937 gen; |
265 | std::uniform_int_distribution<> values(0, data.back()); |
266 | const size_t iters = Reader::EncoderType::skipQuantum == 0 ? 100 : 10000; |
267 | for (size_t i = 0; i < iters; ++i) { |
268 | const uint32_t value = values(gen); |
269 | auto it = std::lower_bound(data.begin(), data.end(), value); |
270 | CHECK(it != data.end()); |
271 | EXPECT_TRUE(reader.jumpTo(value)); |
272 | EXPECT_EQ(reader.value(), *it); |
273 | EXPECT_EQ(reader.position(), std::distance(data.begin(), it)); |
274 | } |
275 | |
276 | EXPECT_TRUE(reader.jumpTo(0)); |
277 | EXPECT_EQ(reader.value(), data[0]); |
278 | EXPECT_EQ(reader.position(), 0); |
279 | |
280 | EXPECT_TRUE(reader.jumpTo(data.back())); |
281 | EXPECT_EQ(reader.value(), data.back()); |
282 | EXPECT_EQ(reader.position(), reader.size() - 1); |
283 | |
284 | EXPECT_FALSE(reader.jumpTo(data.back() + 1)); |
285 | EXPECT_FALSE(reader.valid()); |
286 | EXPECT_EQ(reader.position(), reader.size()); |
287 | } |
288 | |
289 | template <class Reader, class Encoder> |
290 | void testEmpty() { |
291 | const typename Encoder::ValueType* const data = nullptr; |
292 | auto list = Encoder::encode(data, data); |
293 | { |
294 | Reader reader(list); |
295 | EXPECT_FALSE(reader.next()); |
296 | EXPECT_EQ(reader.size(), 0); |
297 | } |
298 | { |
299 | Reader reader(list); |
300 | EXPECT_FALSE(reader.skip(1)); |
301 | EXPECT_FALSE(reader.skip(10)); |
302 | EXPECT_FALSE(reader.jump(0)); |
303 | EXPECT_FALSE(reader.jump(10)); |
304 | } |
305 | { |
306 | Reader reader(list); |
307 | EXPECT_FALSE(reader.skipTo(1)); |
308 | EXPECT_FALSE(reader.jumpTo(1)); |
309 | } |
310 | } |
311 | |
312 | template <class Reader, class Encoder> |
313 | void testAll(const std::vector<uint32_t>& data) { |
314 | auto list = Encoder::encode(data.begin(), data.end()); |
315 | testNext<Reader>(data, list); |
316 | testSkip<Reader>(data, list); |
317 | testSkipTo<Reader>(data, list); |
318 | testJump<Reader>(data, list); |
319 | testJumpTo<Reader>(data, list); |
320 | list.free(); |
321 | } |
322 | |
323 | template <class Reader, class List> |
324 | void bmNext(const List& list, const std::vector<uint32_t>& data, size_t iters) { |
325 | if (data.empty()) { |
326 | return; |
327 | } |
328 | |
329 | Reader reader(list); |
330 | for (size_t i = 0; i < iters; ++i) { |
331 | if (LIKELY(reader.next())) { |
332 | folly::doNotOptimizeAway(reader.value()); |
333 | } else { |
334 | reader.reset(); |
335 | } |
336 | } |
337 | } |
338 | |
339 | template <class Reader, class List> |
340 | void bmSkip( |
341 | const List& list, |
342 | const std::vector<uint32_t>& /* data */, |
343 | size_t logAvgSkip, |
344 | size_t iters) { |
345 | size_t avg = (size_t(1) << logAvgSkip); |
346 | size_t base = avg - (avg >> 2); |
347 | size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0; |
348 | |
349 | Reader reader(list); |
350 | for (size_t i = 0; i < iters; ++i) { |
351 | size_t skip = base + (i & mask); |
352 | if (LIKELY(reader.skip(skip))) { |
353 | folly::doNotOptimizeAway(reader.value()); |
354 | } else { |
355 | reader.reset(); |
356 | } |
357 | } |
358 | } |
359 | |
360 | template <class Reader, class List> |
361 | void bmSkipTo( |
362 | const List& list, |
363 | const std::vector<uint32_t>& data, |
364 | size_t logAvgSkip, |
365 | size_t iters) { |
366 | size_t avg = (size_t(1) << logAvgSkip); |
367 | size_t base = avg - (avg >> 2); |
368 | size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0; |
369 | |
370 | Reader reader(list); |
371 | for (size_t i = 0, j = -1; i < iters; ++i) { |
372 | size_t skip = base + (i & mask); |
373 | j += skip; |
374 | if (j >= data.size()) { |
375 | reader.reset(); |
376 | j = -1; |
377 | } |
378 | |
379 | reader.skipTo(data[j]); |
380 | folly::doNotOptimizeAway(reader.value()); |
381 | } |
382 | } |
383 | |
384 | template <class Reader, class List> |
385 | void bmJump( |
386 | const List& list, |
387 | const std::vector<uint32_t>& data, |
388 | const std::vector<size_t>& order, |
389 | size_t iters) { |
390 | CHECK(!data.empty()); |
391 | CHECK_EQ(data.size(), order.size()); |
392 | |
393 | Reader reader(list); |
394 | for (size_t i = 0, j = 0; i < iters; ++i, ++j) { |
395 | if (j == order.size()) { |
396 | j = 0; |
397 | } |
398 | reader.jump(order[j]); |
399 | folly::doNotOptimizeAway(reader.value()); |
400 | } |
401 | } |
402 | |
403 | template <class Reader, class List> |
404 | void bmJumpTo( |
405 | const List& list, |
406 | const std::vector<uint32_t>& data, |
407 | const std::vector<size_t>& order, |
408 | size_t iters) { |
409 | CHECK(!data.empty()); |
410 | CHECK_EQ(data.size(), order.size()); |
411 | |
412 | Reader reader(list); |
413 | for (size_t i = 0, j = 0; i < iters; ++i, ++j) { |
414 | if (j == order.size()) { |
415 | j = 0; |
416 | } |
417 | reader.jumpTo(data[order[j]]); |
418 | folly::doNotOptimizeAway(reader.value()); |
419 | } |
420 | } |
421 | |
422 | folly::Optional<instructions::Type> instructionsOverride(); |
423 | |
424 | template <class F> |
425 | auto dispatchInstructions(F&& f) |
426 | -> decltype(f(std::declval<instructions::Default>())) { |
427 | if (auto type = instructionsOverride()) { |
428 | return instructions::dispatch(*type, std::forward<F>(f)); |
429 | } else { |
430 | return instructions::dispatch(std::forward<F>(f)); |
431 | } |
432 | } |
433 | |
434 | } // namespace compression |
435 | } // namespace folly |
436 | |