1 | /* |
2 | * Copyright 2013-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | /** |
18 | * @author Philip Pronin (philipp@fb.com) |
19 | * |
20 | * Based on the paper by Sebastiano Vigna, |
21 | * "Quasi-succinct indices" (arxiv:1206.4300). |
22 | */ |
23 | |
24 | #pragma once |
25 | |
26 | #include <algorithm> |
27 | #include <cstdlib> |
28 | #include <limits> |
29 | #include <type_traits> |
30 | |
31 | #include <folly/Likely.h> |
32 | #include <folly/Portability.h> |
33 | #include <folly/Range.h> |
34 | #include <folly/experimental/CodingDetail.h> |
35 | #include <folly/experimental/Instructions.h> |
36 | #include <folly/experimental/Select64.h> |
37 | #include <folly/lang/Assume.h> |
38 | #include <folly/lang/Bits.h> |
39 | #include <glog/logging.h> |
40 | |
41 | #if !FOLLY_X64 |
42 | #error EliasFanoCoding.h requires x86_64 |
43 | #endif |
44 | |
45 | namespace folly { |
46 | namespace compression { |
47 | |
48 | static_assert(kIsLittleEndian, "EliasFanoCoding.h requires little endianness" ); |
49 | |
50 | constexpr size_t kCacheLineSize = 64; |
51 | |
52 | template <class Pointer> |
53 | struct EliasFanoCompressedListBase { |
54 | EliasFanoCompressedListBase() = default; |
55 | |
56 | template <class OtherPointer> |
57 | EliasFanoCompressedListBase( |
58 | const EliasFanoCompressedListBase<OtherPointer>& other) |
59 | : size(other.size), |
60 | numLowerBits(other.numLowerBits), |
61 | data(other.data), |
62 | skipPointers(reinterpret_cast<Pointer>(other.skipPointers)), |
63 | forwardPointers(reinterpret_cast<Pointer>(other.forwardPointers)), |
64 | lower(reinterpret_cast<Pointer>(other.lower)), |
65 | upper(reinterpret_cast<Pointer>(other.upper)) {} |
66 | |
67 | template <class T = Pointer> |
68 | auto free() -> decltype(::free(T(nullptr))) { |
69 | return ::free(data.data()); |
70 | } |
71 | |
72 | size_t upperSize() const { |
73 | return size_t(data.end() - upper); |
74 | } |
75 | |
76 | size_t size = 0; |
77 | uint8_t numLowerBits = 0; |
78 | |
79 | // WARNING: EliasFanoCompressedList has no ownership of data. The 7 |
80 | // bytes following the last byte should be readable. |
81 | folly::Range<Pointer> data; |
82 | |
83 | Pointer skipPointers = nullptr; |
84 | Pointer forwardPointers = nullptr; |
85 | Pointer lower = nullptr; |
86 | Pointer upper = nullptr; |
87 | }; |
88 | |
89 | typedef EliasFanoCompressedListBase<const uint8_t*> EliasFanoCompressedList; |
90 | typedef EliasFanoCompressedListBase<uint8_t*> MutableEliasFanoCompressedList; |
91 | |
92 | template < |
93 | class Value, |
94 | class SkipValue = size_t, |
95 | size_t kSkipQuantum = 0, // 0 = disabled |
96 | size_t kForwardQuantum = 0, // 0 = disabled |
97 | bool kUpperFirst = false> |
98 | struct EliasFanoEncoderV2 { |
99 | static_assert( |
100 | std::is_integral<Value>::value && std::is_unsigned<Value>::value, |
101 | "Value should be unsigned integral" ); |
102 | |
103 | typedef EliasFanoCompressedList CompressedList; |
104 | typedef MutableEliasFanoCompressedList MutableCompressedList; |
105 | |
106 | typedef Value ValueType; |
107 | typedef SkipValue SkipValueType; |
108 | struct Layout; |
109 | |
110 | static constexpr size_t skipQuantum = kSkipQuantum; |
111 | static constexpr size_t forwardQuantum = kForwardQuantum; |
112 | |
113 | static uint8_t defaultNumLowerBits(size_t upperBound, size_t size) { |
114 | if (UNLIKELY(size == 0 || upperBound < size)) { |
115 | return 0; |
116 | } |
117 | // Result that should be returned is "floor(log(upperBound / size))". |
118 | // In order to avoid expensive division, we rely on |
119 | // "floor(a) - floor(b) - 1 <= floor(a - b) <= floor(a) - floor(b)". |
120 | // Assuming "candidate = floor(log(upperBound)) - floor(log(upperBound))", |
121 | // then result is either "candidate - 1" or "candidate". |
122 | auto candidate = folly::findLastSet(upperBound) - folly::findLastSet(size); |
123 | // NOTE: As size != 0, "candidate" is always < 64. |
124 | return (size > (upperBound >> candidate)) ? candidate - 1 : candidate; |
125 | } |
126 | |
127 | // Requires: input range (begin, end) is sorted (encoding |
128 | // crashes if it's not). |
129 | // WARNING: encode() mallocates EliasFanoCompressedList::data. As |
130 | // EliasFanoCompressedList has no ownership of it, you need to call |
131 | // free() explicitly. |
132 | template <class RandomAccessIterator> |
133 | static MutableCompressedList encode( |
134 | RandomAccessIterator begin, |
135 | RandomAccessIterator end) { |
136 | if (begin == end) { |
137 | return MutableCompressedList(); |
138 | } |
139 | EliasFanoEncoderV2 encoder(size_t(end - begin), *(end - 1)); |
140 | for (; begin != end; ++begin) { |
141 | encoder.add(*begin); |
142 | } |
143 | return encoder.finish(); |
144 | } |
145 | |
146 | explicit EliasFanoEncoderV2(const MutableCompressedList& result) |
147 | : lower_(result.lower), |
148 | upper_(result.upper), |
149 | skipPointers_(reinterpret_cast<SkipValueType*>(result.skipPointers)), |
150 | forwardPointers_( |
151 | reinterpret_cast<SkipValueType*>(result.forwardPointers)), |
152 | result_(result) { |
153 | std::fill(result.data.begin(), result.data.end(), '\0'); |
154 | } |
155 | |
156 | EliasFanoEncoderV2(size_t size, ValueType upperBound) |
157 | : EliasFanoEncoderV2( |
158 | Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {} |
159 | |
160 | void add(ValueType value) { |
161 | CHECK_LT(value, std::numeric_limits<ValueType>::max()); |
162 | CHECK_GE(value, lastValue_); |
163 | |
164 | const auto numLowerBits = result_.numLowerBits; |
165 | const ValueType upperBits = value >> numLowerBits; |
166 | |
167 | // Upper sequence consists of upperBits 0-bits and (size_ + 1) 1-bits. |
168 | const size_t pos = upperBits + size_; |
169 | upper_[pos / 8] |= 1U << (pos % 8); |
170 | // Append numLowerBits bits to lower sequence. |
171 | if (numLowerBits != 0) { |
172 | const ValueType lowerBits = value & ((ValueType(1) << numLowerBits) - 1); |
173 | writeBits56(lower_, size_ * numLowerBits, numLowerBits, lowerBits); |
174 | } |
175 | |
176 | if /* constexpr */ (skipQuantum != 0) { |
177 | while ((skipPointersSize_ + 1) * skipQuantum <= upperBits) { |
178 | // Store the number of preceding 1-bits. |
179 | skipPointers_[skipPointersSize_++] = SkipValue(size_); |
180 | } |
181 | } |
182 | |
183 | if /* constexpr */ (forwardQuantum != 0) { |
184 | if ((size_ + 1) % forwardQuantum == 0) { |
185 | const auto k = size_ / forwardQuantum; |
186 | // Store the number of preceding 0-bits. |
187 | forwardPointers_[k] = upperBits; |
188 | } |
189 | } |
190 | |
191 | lastValue_ = value; |
192 | ++size_; |
193 | } |
194 | |
195 | const MutableCompressedList& finish() const { |
196 | CHECK_EQ(size_, result_.size); |
197 | return result_; |
198 | } |
199 | |
200 | private: |
201 | // Writes value (with len up to 56 bits) to data starting at pos-th bit. |
202 | static void |
203 | writeBits56(unsigned char* data, size_t pos, uint8_t len, uint64_t value) { |
204 | DCHECK_LE(uint32_t(len), 56); |
205 | DCHECK_EQ(0, value & ~((uint64_t(1) << len) - 1)); |
206 | unsigned char* const ptr = data + (pos / 8); |
207 | uint64_t ptrv = folly::loadUnaligned<uint64_t>(ptr); |
208 | ptrv |= value << (pos % 8); |
209 | folly::storeUnaligned<uint64_t>(ptr, ptrv); |
210 | } |
211 | |
212 | unsigned char* lower_ = nullptr; |
213 | unsigned char* upper_ = nullptr; |
214 | SkipValueType* skipPointers_ = nullptr; |
215 | SkipValueType* forwardPointers_ = nullptr; |
216 | |
217 | ValueType lastValue_ = 0; |
218 | size_t size_ = 0; |
219 | size_t = 0; |
220 | |
221 | MutableCompressedList result_; |
222 | }; |
223 | |
224 | template < |
225 | class Value, |
226 | class SkipValue, |
227 | size_t kSkipQuantum, |
228 | size_t kForwardQuantum, |
229 | bool kUpperFirst> |
230 | struct EliasFanoEncoderV2< |
231 | Value, |
232 | SkipValue, |
233 | kSkipQuantum, |
234 | kForwardQuantum, |
235 | kUpperFirst>::Layout { |
236 | static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) { |
237 | // numLowerBits can be at most 56 because of detail::writeBits56. |
238 | const uint8_t numLowerBits = |
239 | std::min(defaultNumLowerBits(upperBound, size), uint8_t(56)); |
240 | // *** Upper bits. |
241 | // Upper bits are stored using unary delta encoding. |
242 | // For example, (3 5 5 9) will be encoded as 1000011001000_2. |
243 | const size_t upperSizeBits = |
244 | (upperBound >> numLowerBits) + // Number of 0-bits to be stored. |
245 | size; // 1-bits. |
246 | const size_t upper = (upperSizeBits + 7) / 8; |
247 | |
248 | // *** Validity checks. |
249 | // Shift by numLowerBits must be valid. |
250 | CHECK_LT(numLowerBits, 8 * sizeof(Value)); |
251 | CHECK_LT(size, std::numeric_limits<SkipValueType>::max()); |
252 | CHECK_LT( |
253 | upperBound >> numLowerBits, std::numeric_limits<SkipValueType>::max()); |
254 | |
255 | return fromInternalSizes(numLowerBits, upper, size); |
256 | } |
257 | |
258 | static Layout |
259 | fromInternalSizes(uint8_t numLowerBits, size_t upper, size_t size) { |
260 | Layout layout; |
261 | layout.size = size; |
262 | layout.numLowerBits = numLowerBits; |
263 | |
264 | layout.lower = (numLowerBits * size + 7) / 8; |
265 | layout.upper = upper; |
266 | |
267 | // *** Skip pointers. |
268 | // Store (1-indexed) position of every skipQuantum-th |
269 | // 0-bit in upper bits sequence. |
270 | if /* constexpr */ (skipQuantum != 0) { |
271 | // 8 * upper is used here instead of upperSizeBits, as that is |
272 | // more serialization-friendly way (upperSizeBits doesn't need |
273 | // to be known by this function, unlike upper). |
274 | |
275 | size_t numSkipPointers = (8 * upper - size) / skipQuantum; |
276 | layout.skipPointers = numSkipPointers * sizeof(SkipValueType); |
277 | } |
278 | |
279 | // *** Forward pointers. |
280 | // Store (1-indexed) position of every forwardQuantum-th |
281 | // 1-bit in upper bits sequence. |
282 | if /* constexpr */ (forwardQuantum != 0) { |
283 | size_t numForwardPointers = size / forwardQuantum; |
284 | layout.forwardPointers = numForwardPointers * sizeof(SkipValueType); |
285 | } |
286 | |
287 | return layout; |
288 | } |
289 | |
290 | size_t bytes() const { |
291 | return lower + upper + skipPointers + forwardPointers; |
292 | } |
293 | |
294 | template <class Range> |
295 | EliasFanoCompressedListBase<typename Range::iterator> openList( |
296 | Range& buf) const { |
297 | EliasFanoCompressedListBase<typename Range::iterator> result; |
298 | result.size = size; |
299 | result.numLowerBits = numLowerBits; |
300 | result.data = buf.subpiece(0, bytes()); |
301 | |
302 | auto advance = [&](size_t n) { |
303 | auto begin = buf.data(); |
304 | buf.advance(n); |
305 | return begin; |
306 | }; |
307 | |
308 | result.skipPointers = advance(skipPointers); |
309 | result.forwardPointers = advance(forwardPointers); |
310 | if /* constexpr */ (kUpperFirst) { |
311 | result.upper = advance(upper); |
312 | result.lower = advance(lower); |
313 | } else { |
314 | result.lower = advance(lower); |
315 | result.upper = advance(upper); |
316 | } |
317 | |
318 | return result; |
319 | } |
320 | |
321 | MutableCompressedList allocList() const { |
322 | uint8_t* buf = nullptr; |
323 | // WARNING: Current read/write logic assumes that the 7 bytes |
324 | // following the last byte of lower and upper sequences are |
325 | // readable (stored value doesn't matter and won't be changed), so |
326 | // we allocate additional 7 bytes, but do not include them in size |
327 | // of returned value. |
328 | if (size > 0) { |
329 | buf = static_cast<uint8_t*>(malloc(bytes() + 7)); |
330 | } |
331 | folly::MutableByteRange bufRange(buf, bytes()); |
332 | return openList(bufRange); |
333 | } |
334 | |
335 | size_t size = 0; |
336 | uint8_t numLowerBits = 0; |
337 | |
338 | // Sizes in bytes. |
339 | size_t lower = 0; |
340 | size_t upper = 0; |
341 | size_t skipPointers = 0; |
342 | size_t forwardPointers = 0; |
343 | }; |
344 | |
345 | namespace detail { |
346 | |
347 | template <class Encoder, class Instructions, class SizeType> |
348 | class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, |
349 | SkipPointers<Encoder::skipQuantum> { |
350 | typedef typename Encoder::SkipValueType SkipValueType; |
351 | |
352 | public: |
353 | typedef typename Encoder::ValueType ValueType; |
354 | |
355 | explicit UpperBitsReader(const typename Encoder::CompressedList& list) |
356 | : ForwardPointers<Encoder::forwardQuantum>(list.forwardPointers), |
357 | SkipPointers<Encoder::skipQuantum>(list.skipPointers), |
358 | start_(list.upper) { |
359 | reset(); |
360 | } |
361 | |
362 | void reset() { |
363 | block_ = start_ != nullptr ? folly::loadUnaligned<block_t>(start_) : 0; |
364 | position_ = std::numeric_limits<SizeType>::max(); |
365 | outer_ = 0; |
366 | value_ = 0; |
367 | } |
368 | |
369 | SizeType position() const { |
370 | return position_; |
371 | } |
372 | ValueType value() const { |
373 | return value_; |
374 | } |
375 | |
376 | ValueType previous() { |
377 | size_t inner; |
378 | block_t block; |
379 | getPreviousInfo(block, inner, outer_); |
380 | block_ = folly::loadUnaligned<block_t>(start_ + outer_); |
381 | block_ ^= block; |
382 | --position_; |
383 | return setValue(inner); |
384 | } |
385 | |
386 | ValueType next() { |
387 | // Skip to the first non-zero block. |
388 | while (block_ == 0) { |
389 | outer_ += sizeof(block_t); |
390 | block_ = folly::loadUnaligned<block_t>(start_ + outer_); |
391 | } |
392 | |
393 | ++position_; |
394 | size_t inner = Instructions::ctz(block_); |
395 | block_ = Instructions::blsr(block_); |
396 | |
397 | return setValue(inner); |
398 | } |
399 | |
400 | ValueType skip(SizeType n) { |
401 | DCHECK_GT(n, 0); |
402 | |
403 | position_ += n; // n 1-bits will be read. |
404 | |
405 | // Use forward pointer. |
406 | if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) { |
407 | const size_t steps = position_ / Encoder::forwardQuantum; |
408 | const size_t dest = folly::loadUnaligned<SkipValueType>( |
409 | this->forwardPointers_ + (steps - 1) * sizeof(SkipValueType)); |
410 | |
411 | reposition(dest + steps * Encoder::forwardQuantum); |
412 | n = position_ + 1 - steps * Encoder::forwardQuantum; // n is > 0. |
413 | } |
414 | |
415 | size_t cnt; |
416 | // Find necessary block. |
417 | while ((cnt = Instructions::popcount(block_)) < n) { |
418 | n -= cnt; |
419 | outer_ += sizeof(block_t); |
420 | block_ = folly::loadUnaligned<block_t>(start_ + outer_); |
421 | } |
422 | |
423 | // Skip to the n-th one in the block. |
424 | DCHECK_GT(n, 0); |
425 | size_t inner = select64<Instructions>(block_, n - 1); |
426 | block_ &= (block_t(-1) << inner) << 1; |
427 | |
428 | return setValue(inner); |
429 | } |
430 | |
431 | // Skip to the first element that is >= v and located *after* the current |
432 | // one (so even if current value equals v, position will be increased by 1). |
433 | ValueType skipToNext(ValueType v) { |
434 | DCHECK_GE(v, value_); |
435 | |
436 | // Use skip pointer. |
437 | if (Encoder::skipQuantum > 0 && v >= value_ + Encoder::skipQuantum) { |
438 | const size_t steps = v / Encoder::skipQuantum; |
439 | const size_t dest = folly::loadUnaligned<SkipValueType>( |
440 | this->skipPointers_ + (steps - 1) * sizeof(SkipValueType)); |
441 | |
442 | reposition(dest + Encoder::skipQuantum * steps); |
443 | position_ = dest - 1; |
444 | |
445 | // Correct value_ will be set during the next() call at the end. |
446 | |
447 | // NOTE: Corresponding block of lower bits sequence may be |
448 | // prefetched here (via __builtin_prefetch), but experiments |
449 | // didn't show any significant improvements. |
450 | } |
451 | |
452 | // Skip by blocks. |
453 | size_t cnt; |
454 | size_t skip = v - (8 * outer_ - position_ - 1); |
455 | |
456 | constexpr size_t kBitsPerBlock = 8 * sizeof(block_t); |
457 | while ((cnt = Instructions::popcount(~block_)) < skip) { |
458 | skip -= cnt; |
459 | position_ += kBitsPerBlock - cnt; |
460 | outer_ += sizeof(block_t); |
461 | block_ = folly::loadUnaligned<block_t>(start_ + outer_); |
462 | } |
463 | |
464 | if (LIKELY(skip)) { |
465 | auto inner = select64<Instructions>(~block_, skip - 1); |
466 | position_ += inner - skip + 1; |
467 | block_ &= block_t(-1) << inner; |
468 | } |
469 | |
470 | next(); |
471 | return value_; |
472 | } |
473 | |
474 | /** |
475 | * Prepare to skip to `value`. This is a constant-time operation that will |
476 | * prefetch memory required for a `skipTo(value)` call. |
477 | * |
478 | * @return position of reader |
479 | */ |
480 | SizeType prepareSkipTo(ValueType v) const { |
481 | auto position = position_; |
482 | |
483 | if (Encoder::skipQuantum > 0 && v >= value_ + Encoder::skipQuantum) { |
484 | auto outer = outer_; |
485 | const size_t steps = v / Encoder::skipQuantum; |
486 | const size_t dest = folly::loadUnaligned<SkipValueType>( |
487 | this->skipPointers_ + (steps - 1) * sizeof(SkipValueType)); |
488 | |
489 | position = dest - 1; |
490 | outer = (dest + Encoder::skipQuantum * steps) / 8; |
491 | |
492 | // Prefetch up to the beginning of where we linear search. After that, |
493 | // hardware prefetching will outperform our own. In addition, this |
494 | // simplifies calculating what to prefetch as we don't have to calculate |
495 | // the entire destination address. Two cache lines are prefetched because |
496 | // this results in fewer cycles used (based on practical results) than |
497 | // one. However, three cache lines does not have any additional effect. |
498 | const auto addr = start_ + outer; |
499 | __builtin_prefetch(addr); |
500 | __builtin_prefetch(addr + kCacheLineSize); |
501 | } |
502 | |
503 | return position; |
504 | } |
505 | |
506 | ValueType jump(size_t n) { |
507 | if (Encoder::forwardQuantum == 0 || n <= Encoder::forwardQuantum) { |
508 | reset(); |
509 | } else { |
510 | // Avoid reading the head, skip() will reposition. |
511 | position_ = std::numeric_limits<SizeType>::max(); |
512 | } |
513 | return skip(n); |
514 | } |
515 | |
516 | ValueType jumpToNext(ValueType v) { |
517 | if (Encoder::skipQuantum == 0 || v < Encoder::skipQuantum) { |
518 | reset(); |
519 | } else { |
520 | value_ = 0; // Avoid reading the head, skipToNext() will reposition. |
521 | } |
522 | return skipToNext(v); |
523 | } |
524 | |
525 | ValueType previousValue() const { |
526 | block_t block; |
527 | size_t inner; |
528 | OuterType outer; |
529 | getPreviousInfo(block, inner, outer); |
530 | return static_cast<ValueType>(8 * outer + inner - (position_ - 1)); |
531 | } |
532 | |
533 | void setDone(SizeType endPos) { |
534 | position_ = endPos; |
535 | } |
536 | |
537 | private: |
538 | ValueType setValue(size_t inner) { |
539 | value_ = static_cast<ValueType>(8 * outer_ + inner - position_); |
540 | return value_; |
541 | } |
542 | |
543 | void reposition(SizeType dest) { |
544 | outer_ = dest / 8; |
545 | block_ = folly::loadUnaligned<block_t>(start_ + outer_); |
546 | block_ &= ~((block_t(1) << (dest % 8)) - 1); |
547 | } |
548 | |
549 | using block_t = uint64_t; |
550 | // The size in bytes of the upper bits is limited by n + universe / 8, |
551 | // so a type that can hold either sizes or values is sufficient. |
552 | using OuterType = typename std::common_type<ValueType, SizeType>::type; |
553 | |
554 | void getPreviousInfo(block_t& block, size_t& inner, OuterType& outer) const { |
555 | DCHECK_NE(position(), std::numeric_limits<SizeType>::max()); |
556 | DCHECK_GT(position(), 0); |
557 | |
558 | outer = outer_; |
559 | block = folly::loadUnaligned<block_t>(start_ + outer); |
560 | inner = size_t(value_) - 8 * outer_ + position_; |
561 | block &= (block_t(1) << inner) - 1; |
562 | while (UNLIKELY(block == 0)) { |
563 | DCHECK_GT(outer, 0); |
564 | outer -= std::min<OuterType>(sizeof(block_t), outer); |
565 | block = folly::loadUnaligned<block_t>(start_ + outer); |
566 | } |
567 | inner = 8 * sizeof(block_t) - 1 - Instructions::clz(block); |
568 | } |
569 | |
570 | const unsigned char* const start_; |
571 | block_t block_; |
572 | SizeType position_; // Index of current value (= #reads - 1). |
573 | OuterType outer_; // Outer offset: number of consumed bytes in upper. |
574 | ValueType value_; |
575 | }; |
576 | |
577 | } // namespace detail |
578 | |
579 | // If kUnchecked = true the caller must guarantee that all the |
580 | // operations return valid elements, i.e., they would never return |
581 | // false if checked. |
582 | template < |
583 | class Encoder, |
584 | class Instructions = instructions::Default, |
585 | bool kUnchecked = false, |
586 | class SizeType = size_t> |
587 | class EliasFanoReader { |
588 | public: |
589 | typedef Encoder EncoderType; |
590 | typedef typename Encoder::ValueType ValueType; |
591 | |
592 | explicit EliasFanoReader(const typename Encoder::CompressedList& list) |
593 | : upper_(list), |
594 | lower_(list.lower), |
595 | size_(list.size), |
596 | numLowerBits_(list.numLowerBits) { |
597 | DCHECK(Instructions::supported()); |
598 | // To avoid extra branching during skipTo() while reading |
599 | // upper sequence we need to know the last element. |
600 | // If kUnchecked == true, we do not check that skipTo() is called |
601 | // within the bounds, so we can avoid initializing lastValue_. |
602 | if (kUnchecked || UNLIKELY(list.size == 0)) { |
603 | lastValue_ = 0; |
604 | return; |
605 | } |
606 | ValueType lastUpperValue = ValueType(8 * list.upperSize() - size_); |
607 | auto it = list.upper + list.upperSize() - 1; |
608 | DCHECK_NE(*it, 0); |
609 | lastUpperValue -= 8 - folly::findLastSet(*it); |
610 | lastValue_ = readLowerPart(size_ - 1) | (lastUpperValue << numLowerBits_); |
611 | } |
612 | |
613 | void reset() { |
614 | upper_.reset(); |
615 | value_ = kInvalidValue; |
616 | } |
617 | |
618 | bool previous() { |
619 | if (!kUnchecked && UNLIKELY(position() == 0)) { |
620 | reset(); |
621 | return false; |
622 | } |
623 | upper_.previous(); |
624 | value_ = |
625 | readLowerPart(upper_.position()) | (upper_.value() << numLowerBits_); |
626 | return true; |
627 | } |
628 | |
629 | bool next() { |
630 | if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) { |
631 | return setDone(); |
632 | } |
633 | upper_.next(); |
634 | value_ = |
635 | readLowerPart(upper_.position()) | (upper_.value() << numLowerBits_); |
636 | return true; |
637 | } |
638 | |
639 | bool skip(SizeType n) { |
640 | CHECK_GT(n, 0); |
641 | |
642 | if (kUnchecked || LIKELY(position() + n < size_)) { |
643 | if (LIKELY(n < kLinearScanThreshold)) { |
644 | for (SizeType i = 0; i < n; ++i) { |
645 | upper_.next(); |
646 | } |
647 | } else { |
648 | upper_.skip(n); |
649 | } |
650 | value_ = |
651 | readLowerPart(upper_.position()) | (upper_.value() << numLowerBits_); |
652 | return true; |
653 | } |
654 | |
655 | return setDone(); |
656 | } |
657 | |
658 | bool skipTo(ValueType value) { |
659 | // Also works when value_ == kInvalidValue. |
660 | if (value != kInvalidValue) { |
661 | DCHECK_GE(value + 1, value_ + 1); |
662 | } |
663 | |
664 | if (!kUnchecked && value > lastValue_) { |
665 | return setDone(); |
666 | } else if (value == value_) { |
667 | return true; |
668 | } |
669 | |
670 | ValueType upperValue = (value >> numLowerBits_); |
671 | ValueType upperSkip = upperValue - upper_.value(); |
672 | // The average density of ones in upper bits is 1/2. |
673 | // LIKELY here seems to make things worse, even for small skips. |
674 | if (upperSkip < 2 * kLinearScanThreshold) { |
675 | do { |
676 | upper_.next(); |
677 | } while (UNLIKELY(upper_.value() < upperValue)); |
678 | } else { |
679 | upper_.skipToNext(upperValue); |
680 | } |
681 | |
682 | iterateTo(value); |
683 | return true; |
684 | } |
685 | |
686 | /** |
687 | * Prepare to skip to `value` by prefetching appropriate memory in both the |
688 | * upper and lower bits. |
689 | */ |
690 | void prepareSkipTo(ValueType value) const { |
691 | // Also works when value_ == kInvalidValue. |
692 | if (value != kInvalidValue) { |
693 | DCHECK_GE(value + 1, value_ + 1); |
694 | } |
695 | |
696 | if ((!kUnchecked && value > lastValue_) || (value == value_)) { |
697 | return; |
698 | } |
699 | |
700 | // Do minimal computation required to prefetch address used in |
701 | // `readLowerPart()`. |
702 | ValueType upperValue = (value >> numLowerBits_); |
703 | const auto upperPosition = upper_.prepareSkipTo(upperValue); |
704 | const auto addr = lower_ + (upperPosition * numLowerBits_ / 8); |
705 | __builtin_prefetch(addr); |
706 | __builtin_prefetch(addr + kCacheLineSize); |
707 | } |
708 | |
709 | bool jump(SizeType n) { |
710 | if (LIKELY(n < size_)) { // Also checks that n != -1. |
711 | value_ = readLowerPart(n) | (upper_.jump(n + 1) << numLowerBits_); |
712 | return true; |
713 | } |
714 | return setDone(); |
715 | } |
716 | |
717 | bool jumpTo(ValueType value) { |
718 | if (!kUnchecked && value > lastValue_) { |
719 | return setDone(); |
720 | } |
721 | |
722 | upper_.jumpToNext(value >> numLowerBits_); |
723 | iterateTo(value); |
724 | return true; |
725 | } |
726 | |
727 | ValueType lastValue() const { |
728 | CHECK(!kUnchecked); |
729 | return lastValue_; |
730 | } |
731 | |
732 | ValueType previousValue() const { |
733 | DCHECK_GT(position(), 0); |
734 | DCHECK_LT(position(), size()); |
735 | return readLowerPart(upper_.position() - 1) | |
736 | (upper_.previousValue() << numLowerBits_); |
737 | } |
738 | |
739 | SizeType size() const { |
740 | return size_; |
741 | } |
742 | |
743 | bool valid() const { |
744 | return position() < size(); // Also checks that position() != -1. |
745 | } |
746 | |
747 | SizeType position() const { |
748 | return upper_.position(); |
749 | } |
750 | ValueType value() const { |
751 | DCHECK(valid()); |
752 | return value_; |
753 | } |
754 | |
755 | private: |
756 | // Must hold kInvalidValue + 1 == 0. |
757 | constexpr static ValueType kInvalidValue = |
758 | std::numeric_limits<ValueType>::max(); |
759 | |
760 | bool setDone() { |
761 | value_ = kInvalidValue; |
762 | upper_.setDone(size_); |
763 | return false; |
764 | } |
765 | |
766 | ValueType readLowerPart(SizeType i) const { |
767 | DCHECK_LT(i, size_); |
768 | const size_t pos = i * numLowerBits_; |
769 | const unsigned char* ptr = lower_ + (pos / 8); |
770 | const uint64_t ptrv = folly::loadUnaligned<uint64_t>(ptr); |
771 | // This removes the branch in the fallback implementation of |
772 | // bzhi. The condition is verified at encoding time. |
773 | assume(numLowerBits_ < sizeof(ValueType) * 8); |
774 | return Instructions::bzhi(ptrv >> (pos % 8), numLowerBits_); |
775 | } |
776 | |
777 | void iterateTo(ValueType value) { |
778 | while (true) { |
779 | value_ = |
780 | readLowerPart(upper_.position()) | (upper_.value() << numLowerBits_); |
781 | if (LIKELY(value_ >= value)) { |
782 | break; |
783 | } |
784 | upper_.next(); |
785 | } |
786 | } |
787 | |
788 | constexpr static size_t kLinearScanThreshold = 8; |
789 | |
790 | detail::UpperBitsReader<Encoder, Instructions, SizeType> upper_; |
791 | const uint8_t* lower_; |
792 | SizeType size_; |
793 | ValueType value_ = kInvalidValue; |
794 | ValueType lastValue_; |
795 | uint8_t numLowerBits_; |
796 | }; |
797 | |
798 | } // namespace compression |
799 | } // namespace folly |
800 | |