1/*
2 * Copyright 2015-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <cstdlib>
20#include <limits>
21#include <type_traits>
22
23#include <folly/Likely.h>
24#include <folly/Portability.h>
25#include <folly/Range.h>
26#include <folly/experimental/Bits.h>
27#include <folly/experimental/CodingDetail.h>
28#include <folly/experimental/Instructions.h>
29#include <folly/experimental/Select64.h>
30#include <folly/lang/Bits.h>
31#include <glog/logging.h>
32
33#if !FOLLY_X64
34#error BitVectorCoding.h requires x86_64
35#endif
36
37namespace folly {
38namespace compression {
39
40static_assert(kIsLittleEndian, "BitVectorCoding.h requires little endianness");
41
42template <class Pointer>
43struct BitVectorCompressedListBase {
44 BitVectorCompressedListBase() = default;
45
46 template <class OtherPointer>
47 BitVectorCompressedListBase(
48 const BitVectorCompressedListBase<OtherPointer>& other)
49 : size(other.size),
50 upperBound(other.upperBound),
51 data(other.data),
52 bits(reinterpret_cast<Pointer>(other.bits)),
53 skipPointers(reinterpret_cast<Pointer>(other.skipPointers)),
54 forwardPointers(reinterpret_cast<Pointer>(other.forwardPointers)) {}
55
56 template <class T = Pointer>
57 auto free() -> decltype(::free(T(nullptr))) {
58 return ::free(data.data());
59 }
60
61 size_t size = 0;
62 size_t upperBound = 0;
63
64 folly::Range<Pointer> data;
65
66 Pointer bits = nullptr;
67 Pointer skipPointers = nullptr;
68 Pointer forwardPointers = nullptr;
69};
70
71typedef BitVectorCompressedListBase<const uint8_t*> BitVectorCompressedList;
72typedef BitVectorCompressedListBase<uint8_t*> MutableBitVectorCompressedList;
73
74template <
75 class Value,
76 class SkipValue,
77 size_t kSkipQuantum = 0,
78 size_t kForwardQuantum = 0>
79struct BitVectorEncoder {
80 static_assert(
81 std::is_integral<Value>::value && std::is_unsigned<Value>::value,
82 "Value should be unsigned integral");
83
84 typedef BitVectorCompressedList CompressedList;
85 typedef MutableBitVectorCompressedList MutableCompressedList;
86
87 typedef Value ValueType;
88 typedef SkipValue SkipValueType;
89 struct Layout;
90
91 static constexpr size_t skipQuantum = kSkipQuantum;
92 static constexpr size_t forwardQuantum = kForwardQuantum;
93
94 template <class RandomAccessIterator>
95 static MutableCompressedList encode(
96 RandomAccessIterator begin,
97 RandomAccessIterator end) {
98 if (begin == end) {
99 return MutableCompressedList();
100 }
101 BitVectorEncoder encoder(size_t(end - begin), *(end - 1));
102 for (; begin != end; ++begin) {
103 encoder.add(*begin);
104 }
105 return encoder.finish();
106 }
107
108 explicit BitVectorEncoder(const MutableCompressedList& result)
109 : bits_(result.bits),
110 skipPointers_(result.skipPointers),
111 forwardPointers_(result.forwardPointers),
112 result_(result) {
113 memset(result.data.data(), 0, result.data.size());
114 }
115
116 BitVectorEncoder(size_t size, ValueType upperBound)
117 : BitVectorEncoder(
118 Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
119
120 void add(ValueType value) {
121 CHECK_LT(value, std::numeric_limits<ValueType>::max());
122 // Also works when lastValue_ == -1.
123 CHECK_GT(value + 1, lastValue_ + 1)
124 << "BitVectorCoding only supports stricly monotone lists";
125
126 auto block = bits_ + (value / 64) * sizeof(uint64_t);
127 size_t inner = value % 64;
128 folly::Bits<folly::Unaligned<uint64_t>>::set(
129 reinterpret_cast<folly::Unaligned<uint64_t>*>(block), inner);
130
131 if (skipQuantum != 0) {
132 size_t nextSkipPointerSize = value / skipQuantum;
133 while (skipPointersSize_ < nextSkipPointerSize) {
134 auto pos = skipPointersSize_++;
135 folly::storeUnaligned<SkipValueType>(
136 skipPointers_ + pos * sizeof(SkipValueType), size_);
137 }
138 }
139
140 if (forwardQuantum != 0) {
141 if (size_ != 0 && (size_ % forwardQuantum == 0)) {
142 const auto pos = size_ / forwardQuantum - 1;
143 folly::storeUnaligned<SkipValueType>(
144 forwardPointers_ + pos * sizeof(SkipValueType), value);
145 }
146 }
147
148 lastValue_ = value;
149 ++size_;
150 }
151
152 const MutableCompressedList& finish() const {
153 CHECK_EQ(size_, result_.size);
154 // TODO(ott): Relax this assumption.
155 CHECK_EQ(result_.upperBound, lastValue_);
156 return result_;
157 }
158
159 private:
160 uint8_t* const bits_ = nullptr;
161 uint8_t* const skipPointers_ = nullptr;
162 uint8_t* const forwardPointers_ = nullptr;
163
164 ValueType lastValue_ = -1;
165 size_t size_ = 0;
166 size_t skipPointersSize_ = 0;
167
168 MutableCompressedList result_;
169};
170
171template <
172 class Value,
173 class SkipValue,
174 size_t kSkipQuantum,
175 size_t kForwardQuantum>
176struct BitVectorEncoder<Value, SkipValue, kSkipQuantum, kForwardQuantum>::
177 Layout {
178 static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) {
179 Layout layout;
180 layout.size = size;
181 layout.upperBound = upperBound;
182
183 size_t bitVectorSizeInBytes = (upperBound / 8) + 1;
184 layout.bits = bitVectorSizeInBytes;
185
186 if (skipQuantum != 0) {
187 size_t numSkipPointers = upperBound / skipQuantum;
188 layout.skipPointers = numSkipPointers * sizeof(SkipValueType);
189 }
190 if (forwardQuantum != 0) {
191 size_t numForwardPointers = size / forwardQuantum;
192 layout.forwardPointers = numForwardPointers * sizeof(SkipValueType);
193 }
194
195 CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
196
197 return layout;
198 }
199
200 size_t bytes() const {
201 return bits + skipPointers + forwardPointers;
202 }
203
204 template <class Range>
205 BitVectorCompressedListBase<typename Range::iterator> openList(
206 Range& buf) const {
207 BitVectorCompressedListBase<typename Range::iterator> result;
208 result.size = size;
209 result.upperBound = upperBound;
210 result.data = buf.subpiece(0, bytes());
211 auto advance = [&](size_t n) {
212 auto begin = buf.data();
213 buf.advance(n);
214 return begin;
215 };
216
217 result.bits = advance(bits);
218 result.skipPointers = advance(skipPointers);
219 result.forwardPointers = advance(forwardPointers);
220 CHECK_EQ(buf.data() - result.data.data(), bytes());
221
222 return result;
223 }
224
225 MutableCompressedList allocList() const {
226 uint8_t* buf = nullptr;
227 if (size > 0) {
228 buf = static_cast<uint8_t*>(malloc(bytes() + 7));
229 }
230 folly::MutableByteRange bufRange(buf, bytes());
231 return openList(bufRange);
232 }
233
234 size_t size = 0;
235 size_t upperBound = 0;
236
237 // Sizes in bytes.
238 size_t bits = 0;
239 size_t skipPointers = 0;
240 size_t forwardPointers = 0;
241};
242
243template <
244 class Encoder,
245 class Instructions = instructions::Default,
246 bool kUnchecked = false>
247class BitVectorReader : detail::ForwardPointers<Encoder::forwardQuantum>,
248 detail::SkipPointers<Encoder::skipQuantum> {
249 public:
250 typedef Encoder EncoderType;
251 typedef typename Encoder::ValueType ValueType;
252 // A bitvector can only be as large as its largest value.
253 typedef typename Encoder::ValueType SizeType;
254 typedef typename Encoder::SkipValueType SkipValueType;
255
256 explicit BitVectorReader(const typename Encoder::CompressedList& list)
257 : detail::ForwardPointers<Encoder::forwardQuantum>(list.forwardPointers),
258 detail::SkipPointers<Encoder::skipQuantum>(list.skipPointers),
259 bits_(list.bits),
260 size_(list.size),
261 upperBound_(
262 (kUnchecked || UNLIKELY(list.size == 0)) ? 0 : list.upperBound) {
263 reset();
264 }
265
266 void reset() {
267 block_ = (bits_ != nullptr) ? folly::loadUnaligned<uint64_t>(bits_) : 0;
268 outer_ = 0;
269 position_ = -1;
270 value_ = kInvalidValue;
271 }
272
273 bool next() {
274 if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) {
275 return setDone();
276 }
277
278 while (block_ == 0) {
279 outer_ += sizeof(uint64_t);
280 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
281 }
282
283 ++position_;
284 auto inner = Instructions::ctz(block_);
285 block_ = Instructions::blsr(block_);
286
287 return setValue(inner);
288 }
289
290 bool skip(SizeType n) {
291 CHECK_GT(n, 0);
292
293 if (!kUnchecked && position() + n >= size_) {
294 return setDone();
295 }
296 // Small skip optimization.
297 if (LIKELY(n < kLinearScanThreshold)) {
298 for (size_t i = 0; i < n; ++i) {
299 next();
300 }
301 return true;
302 }
303
304 position_ += n;
305
306 // Use forward pointer.
307 if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) {
308 const size_t steps = position_ / Encoder::forwardQuantum;
309 const size_t dest = folly::loadUnaligned<SkipValueType>(
310 this->forwardPointers_ + (steps - 1) * sizeof(SkipValueType));
311
312 reposition(dest);
313 n = position_ + 1 - steps * Encoder::forwardQuantum;
314 }
315
316 size_t cnt;
317 // Find necessary block.
318 while ((cnt = Instructions::popcount(block_)) < n) {
319 n -= cnt;
320 outer_ += sizeof(uint64_t);
321 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
322 }
323
324 // Skip to the n-th one in the block.
325 DCHECK_GT(n, 0);
326 auto inner = select64<Instructions>(block_, n - 1);
327 block_ &= (uint64_t(-1) << inner) << 1;
328
329 return setValue(inner);
330 }
331
332 bool skipTo(ValueType v) {
333 // Also works when value_ == kInvalidValue.
334 if (v != kInvalidValue) {
335 DCHECK_GE(v + 1, value_ + 1);
336 }
337
338 if (!kUnchecked && v > upperBound_) {
339 return setDone();
340 } else if (v == value_) {
341 return true;
342 }
343
344 // Small skip optimization.
345 if (v - value_ < kLinearScanThreshold) {
346 do {
347 next();
348 } while (value() < v);
349
350 return true;
351 }
352
353 if (Encoder::skipQuantum > 0 && v - value_ > Encoder::skipQuantum) {
354 size_t q = v / Encoder::skipQuantum;
355 auto skipPointer = folly::loadUnaligned<SkipValueType>(
356 this->skipPointers_ + (q - 1) * sizeof(SkipValueType));
357 position_ = static_cast<SizeType>(skipPointer) - 1;
358
359 reposition(q * Encoder::skipQuantum);
360 }
361
362 // Find the value.
363 size_t outer = v / 64 * 8;
364
365 while (outer_ < outer) {
366 position_ += Instructions::popcount(block_);
367 outer_ += sizeof(uint64_t);
368 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
369 }
370
371 DCHECK_EQ(outer_, outer);
372 uint64_t mask = ~((uint64_t(1) << (v % 64)) - 1);
373 position_ += Instructions::popcount(block_ & ~mask) + 1;
374 block_ &= mask;
375
376 while (block_ == 0) {
377 outer_ += sizeof(uint64_t);
378 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
379 }
380
381 auto inner = Instructions::ctz(block_);
382 block_ = Instructions::blsr(block_);
383
384 setValue(inner);
385 return true;
386 }
387
388 SizeType size() const {
389 return size_;
390 }
391
392 bool valid() const {
393 return position() < size(); // Also checks that position() != -1.
394 }
395
396 SizeType position() const {
397 return position_;
398 }
399 ValueType value() const {
400 DCHECK(valid());
401 return value_;
402 }
403
404 bool jump(SizeType n) {
405 reset();
406 return skip(n + 1);
407 }
408
409 bool jumpTo(ValueType v) {
410 reset();
411 return skipTo(v);
412 }
413
414 bool setDone() {
415 value_ = kInvalidValue;
416 position_ = size_;
417 return false;
418 }
419
420 private:
421 constexpr static ValueType kInvalidValue =
422 std::numeric_limits<ValueType>::max(); // Must hold kInvalidValue + 1 ==
423 // 0.
424
425 bool setValue(size_t inner) {
426 value_ = static_cast<ValueType>(8 * outer_ + inner);
427 return true;
428 }
429
430 void reposition(size_t dest) {
431 outer_ = dest / 64 * 8;
432 // We maintain the invariant that outer_ is divisible by 8.
433 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
434 block_ &= ~((uint64_t(1) << (dest % 64)) - 1);
435 }
436
437 constexpr static size_t kLinearScanThreshold = 4;
438
439 const uint8_t* const bits_;
440 uint64_t block_;
441 SizeType outer_;
442 SizeType position_;
443 ValueType value_;
444
445 const SizeType size_;
446 const ValueType upperBound_;
447};
448
449} // namespace compression
450} // namespace folly
451