1/*
2 * Copyright 2013-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <algorithm>
20#include <fstream>
21#include <limits>
22#include <random>
23#include <string>
24#include <unordered_set>
25#include <vector>
26
27#include <glog/logging.h>
28
29#include <folly/Benchmark.h>
30#include <folly/Likely.h>
31#include <folly/Optional.h>
32#include <folly/experimental/Instructions.h>
33#include <folly/portability/GTest.h>
34
35namespace folly {
36namespace compression {
37
38template <class URNG>
39std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId, URNG&& g) {
40 CHECK_LT(n, 2 * maxId);
41 std::uniform_int_distribution<> uid(1, maxId);
42 std::unordered_set<uint32_t> dataset;
43 while (dataset.size() < n) {
44 uint32_t value = uid(g);
45 if (dataset.count(value) == 0) {
46 dataset.insert(value);
47 }
48 }
49
50 std::vector<uint32_t> ids(dataset.begin(), dataset.end());
51 std::sort(ids.begin(), ids.end());
52 return ids;
53}
54
55inline std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) {
56 std::mt19937 gen;
57 return generateRandomList(n, maxId, gen);
58}
59
60inline std::vector<uint32_t>
61generateSeqList(uint32_t minId, uint32_t maxId, uint32_t step = 1) {
62 CHECK_LE(minId, maxId);
63 CHECK_GT(step, 0);
64 std::vector<uint32_t> ids;
65 ids.reserve((maxId - minId) / step + 1);
66 for (uint32_t i = minId; i <= maxId; i += step) {
67 ids.push_back(i);
68 }
69 return ids;
70}
71
72inline std::vector<uint32_t> loadList(const std::string& filename) {
73 std::ifstream fin(filename);
74 std::vector<uint32_t> result;
75 uint32_t id;
76 while (fin >> id) {
77 result.push_back(id);
78 }
79 return result;
80}
81
82// Test previousValue only if Reader has it.
83template <class... Args>
84void maybeTestPreviousValue(Args&&...) {}
85
86// Make all the arguments template because if the types are not exact,
87// the above overload will be picked (for example i could be size_t or
88// ssize_t).
89template <class Vector, class Reader, class Index>
90auto maybeTestPreviousValue(const Vector& data, Reader& reader, Index i)
91 -> decltype(reader.previousValue(), void()) {
92 if (i != 0) {
93 EXPECT_EQ(reader.previousValue(), data[i - 1]);
94 }
95}
96
97// Test previous only if Reader has it.
98template <class... Args>
99void maybeTestPrevious(Args&&...) {}
100
101// Make all the arguments template because if the types are not exact,
102// the above overload will be picked (for example i could be size_t or
103// ssize_t).
104template <class Vector, class Reader, class Index>
105auto maybeTestPrevious(const Vector& data, Reader& reader, Index i)
106 -> decltype(reader.previous(), void()) {
107 auto r = reader.previous();
108 if (i != 0) {
109 EXPECT_TRUE(r);
110 EXPECT_EQ(reader.value(), data[i - 1]);
111 } else {
112 EXPECT_FALSE(r);
113 }
114 reader.next();
115 EXPECT_EQ(reader.value(), data[i]);
116}
117
118template <class Reader, class List>
119void testNext(const std::vector<uint32_t>& data, const List& list) {
120 Reader reader(list);
121 EXPECT_FALSE(reader.valid());
122
123 for (size_t i = 0; i < data.size(); ++i) {
124 EXPECT_TRUE(reader.next());
125 EXPECT_TRUE(reader.valid());
126 EXPECT_EQ(reader.value(), data[i]);
127 EXPECT_EQ(reader.position(), i);
128 maybeTestPreviousValue(data, reader, i);
129 maybeTestPrevious(data, reader, i);
130 }
131 EXPECT_FALSE(reader.next());
132 EXPECT_FALSE(reader.valid());
133 EXPECT_EQ(reader.position(), reader.size());
134}
135
136template <class Reader, class List>
137void testSkip(
138 const std::vector<uint32_t>& data,
139 const List& list,
140 size_t skipStep) {
141 CHECK_GT(skipStep, 0);
142 Reader reader(list);
143
144 for (size_t i = skipStep - 1; i < data.size(); i += skipStep) {
145 EXPECT_TRUE(reader.skip(skipStep));
146 EXPECT_TRUE(reader.valid());
147 EXPECT_EQ(reader.value(), data[i]);
148 EXPECT_EQ(reader.position(), i);
149 maybeTestPreviousValue(data, reader, i);
150 maybeTestPrevious(data, reader, i);
151 }
152 EXPECT_FALSE(reader.skip(skipStep));
153 EXPECT_FALSE(reader.valid());
154 EXPECT_EQ(reader.position(), reader.size());
155 EXPECT_FALSE(reader.next());
156}
157
158template <class Reader, class List>
159void testSkip(const std::vector<uint32_t>& data, const List& list) {
160 for (size_t skipStep = 1; skipStep < 25; ++skipStep) {
161 testSkip<Reader, List>(data, list, skipStep);
162 }
163 for (size_t skipStep = 25; skipStep <= 500; skipStep += 25) {
164 testSkip<Reader, List>(data, list, skipStep);
165 }
166}
167
168template <class Reader, class List>
169void testSkipTo(
170 const std::vector<uint32_t>& data,
171 const List& list,
172 size_t skipToStep) {
173 CHECK_GT(skipToStep, 0);
174 Reader reader(list);
175
176 const uint32_t delta = std::max<uint32_t>(1, data.back() / skipToStep);
177 uint32_t value = delta;
178 auto it = data.begin();
179 while (true) {
180 it = std::lower_bound(it, data.end(), value);
181 if (it == data.end()) {
182 EXPECT_FALSE(reader.skipTo(value));
183 break;
184 }
185 EXPECT_TRUE(reader.skipTo(value));
186 EXPECT_TRUE(reader.valid());
187 EXPECT_EQ(reader.value(), *it);
188 EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
189 value = reader.value() + delta;
190 maybeTestPreviousValue(data, reader, std::distance(data.begin(), it));
191 maybeTestPrevious(data, reader, std::distance(data.begin(), it));
192 }
193 EXPECT_FALSE(reader.valid());
194 EXPECT_EQ(reader.position(), reader.size());
195 EXPECT_FALSE(reader.next());
196}
197
198template <class Reader, class List>
199void testSkipTo(const std::vector<uint32_t>& data, const List& list) {
200 for (size_t steps = 10; steps < 100; steps += 10) {
201 testSkipTo<Reader, List>(data, list, steps);
202 }
203 for (size_t steps = 100; steps <= 1000; steps += 100) {
204 testSkipTo<Reader, List>(data, list, steps);
205 }
206 testSkipTo<Reader, List>(data, list, std::numeric_limits<size_t>::max());
207 {
208 // Skip to the first element.
209 Reader reader(list);
210 EXPECT_TRUE(reader.skipTo(data[0]));
211 EXPECT_EQ(reader.value(), data[0]);
212 EXPECT_EQ(reader.position(), 0);
213 }
214 {
215 // Skip past the last element.
216 Reader reader(list);
217 EXPECT_FALSE(reader.skipTo(data.back() + 1));
218 EXPECT_FALSE(reader.valid());
219 EXPECT_EQ(reader.position(), reader.size());
220 EXPECT_FALSE(reader.next());
221 }
222 {
223 // Skip to maximum integer.
224 Reader reader(list);
225 using ValueType = typename Reader::ValueType;
226 EXPECT_FALSE(reader.skipTo(std::numeric_limits<ValueType>::max()));
227 EXPECT_FALSE(reader.valid());
228 EXPECT_EQ(reader.position(), reader.size());
229 EXPECT_FALSE(reader.next());
230 }
231}
232
233template <class Reader, class List>
234void testJump(const std::vector<uint32_t>& data, const List& list) {
235 std::mt19937 gen;
236 std::vector<size_t> is(data.size());
237 for (size_t i = 0; i < data.size(); ++i) {
238 is[i] = i;
239 }
240 std::shuffle(is.begin(), is.end(), gen);
241 if (Reader::EncoderType::forwardQuantum == 0) {
242 is.resize(std::min<size_t>(is.size(), 100));
243 }
244
245 Reader reader(list);
246 for (auto i : is) {
247 EXPECT_TRUE(reader.jump(i));
248 EXPECT_EQ(reader.value(), data[i]);
249 EXPECT_EQ(reader.position(), i);
250 maybeTestPreviousValue(data, reader, i);
251 maybeTestPrevious(data, reader, i);
252 }
253 EXPECT_FALSE(reader.jump(data.size()));
254 EXPECT_FALSE(reader.valid());
255 EXPECT_EQ(reader.position(), reader.size());
256}
257
258template <class Reader, class List>
259void testJumpTo(const std::vector<uint32_t>& data, const List& list) {
260 CHECK(!data.empty());
261
262 Reader reader(list);
263
264 std::mt19937 gen;
265 std::uniform_int_distribution<> values(0, data.back());
266 const size_t iters = Reader::EncoderType::skipQuantum == 0 ? 100 : 10000;
267 for (size_t i = 0; i < iters; ++i) {
268 const uint32_t value = values(gen);
269 auto it = std::lower_bound(data.begin(), data.end(), value);
270 CHECK(it != data.end());
271 EXPECT_TRUE(reader.jumpTo(value));
272 EXPECT_EQ(reader.value(), *it);
273 EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
274 }
275
276 EXPECT_TRUE(reader.jumpTo(0));
277 EXPECT_EQ(reader.value(), data[0]);
278 EXPECT_EQ(reader.position(), 0);
279
280 EXPECT_TRUE(reader.jumpTo(data.back()));
281 EXPECT_EQ(reader.value(), data.back());
282 EXPECT_EQ(reader.position(), reader.size() - 1);
283
284 EXPECT_FALSE(reader.jumpTo(data.back() + 1));
285 EXPECT_FALSE(reader.valid());
286 EXPECT_EQ(reader.position(), reader.size());
287}
288
289template <class Reader, class Encoder>
290void testEmpty() {
291 const typename Encoder::ValueType* const data = nullptr;
292 auto list = Encoder::encode(data, data);
293 {
294 Reader reader(list);
295 EXPECT_FALSE(reader.next());
296 EXPECT_EQ(reader.size(), 0);
297 }
298 {
299 Reader reader(list);
300 EXPECT_FALSE(reader.skip(1));
301 EXPECT_FALSE(reader.skip(10));
302 EXPECT_FALSE(reader.jump(0));
303 EXPECT_FALSE(reader.jump(10));
304 }
305 {
306 Reader reader(list);
307 EXPECT_FALSE(reader.skipTo(1));
308 EXPECT_FALSE(reader.jumpTo(1));
309 }
310}
311
312template <class Reader, class Encoder>
313void testAll(const std::vector<uint32_t>& data) {
314 auto list = Encoder::encode(data.begin(), data.end());
315 testNext<Reader>(data, list);
316 testSkip<Reader>(data, list);
317 testSkipTo<Reader>(data, list);
318 testJump<Reader>(data, list);
319 testJumpTo<Reader>(data, list);
320 list.free();
321}
322
323template <class Reader, class List>
324void bmNext(const List& list, const std::vector<uint32_t>& data, size_t iters) {
325 if (data.empty()) {
326 return;
327 }
328
329 Reader reader(list);
330 for (size_t i = 0; i < iters; ++i) {
331 if (LIKELY(reader.next())) {
332 folly::doNotOptimizeAway(reader.value());
333 } else {
334 reader.reset();
335 }
336 }
337}
338
339template <class Reader, class List>
340void bmSkip(
341 const List& list,
342 const std::vector<uint32_t>& /* data */,
343 size_t logAvgSkip,
344 size_t iters) {
345 size_t avg = (size_t(1) << logAvgSkip);
346 size_t base = avg - (avg >> 2);
347 size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0;
348
349 Reader reader(list);
350 for (size_t i = 0; i < iters; ++i) {
351 size_t skip = base + (i & mask);
352 if (LIKELY(reader.skip(skip))) {
353 folly::doNotOptimizeAway(reader.value());
354 } else {
355 reader.reset();
356 }
357 }
358}
359
360template <class Reader, class List>
361void bmSkipTo(
362 const List& list,
363 const std::vector<uint32_t>& data,
364 size_t logAvgSkip,
365 size_t iters) {
366 size_t avg = (size_t(1) << logAvgSkip);
367 size_t base = avg - (avg >> 2);
368 size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0;
369
370 Reader reader(list);
371 for (size_t i = 0, j = -1; i < iters; ++i) {
372 size_t skip = base + (i & mask);
373 j += skip;
374 if (j >= data.size()) {
375 reader.reset();
376 j = -1;
377 }
378
379 reader.skipTo(data[j]);
380 folly::doNotOptimizeAway(reader.value());
381 }
382}
383
384template <class Reader, class List>
385void bmJump(
386 const List& list,
387 const std::vector<uint32_t>& data,
388 const std::vector<size_t>& order,
389 size_t iters) {
390 CHECK(!data.empty());
391 CHECK_EQ(data.size(), order.size());
392
393 Reader reader(list);
394 for (size_t i = 0, j = 0; i < iters; ++i, ++j) {
395 if (j == order.size()) {
396 j = 0;
397 }
398 reader.jump(order[j]);
399 folly::doNotOptimizeAway(reader.value());
400 }
401}
402
403template <class Reader, class List>
404void bmJumpTo(
405 const List& list,
406 const std::vector<uint32_t>& data,
407 const std::vector<size_t>& order,
408 size_t iters) {
409 CHECK(!data.empty());
410 CHECK_EQ(data.size(), order.size());
411
412 Reader reader(list);
413 for (size_t i = 0, j = 0; i < iters; ++i, ++j) {
414 if (j == order.size()) {
415 j = 0;
416 }
417 reader.jumpTo(data[order[j]]);
418 folly::doNotOptimizeAway(reader.value());
419 }
420}
421
422folly::Optional<instructions::Type> instructionsOverride();
423
424template <class F>
425auto dispatchInstructions(F&& f)
426 -> decltype(f(std::declval<instructions::Default>())) {
427 if (auto type = instructionsOverride()) {
428 return instructions::dispatch(*type, std::forward<F>(f));
429 } else {
430 return instructions::dispatch(std::forward<F>(f));
431 }
432}
433
434} // namespace compression
435} // namespace folly
436