1 | /* |
2 | * Copyright 2015-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #pragma once |
18 | |
19 | #include <array> |
20 | |
21 | #include <glog/logging.h> |
22 | |
23 | #include <folly/Portability.h> |
24 | #include <folly/experimental/Instructions.h> |
25 | |
26 | namespace folly { |
27 | |
28 | namespace detail { |
29 | |
30 | // kSelectInByte |
31 | // |
32 | // Described in: |
33 | // http://dsiutils.di.unimi.it/docs/it/unimi/dsi/bits/Fast.html#selectInByte |
34 | // |
35 | // A precomputed tabled containing the positions of the set bits in the binary |
36 | // representations of all 8-bit unsigned integers. |
37 | // |
38 | // For i: [0, 256) ranging over all 8-bit unsigned integers and for j: [0, 8) |
39 | // ranging over all 0-based bit positions in an 8-bit unsigned integer, the |
40 | // table entry kSelectInByte[i][j] is the 0-based bit position of the j-th set |
41 | // bit in the binary representation of i, or 8 if it has fewer than j set bits. |
42 | // |
43 | // Example: i: 17 (b00010001), j: [0, 8) |
44 | // kSelectInByte[b00010001][0] = 0 |
45 | // kSelectInByte[b00010001][1] = 4 |
46 | // kSelectInByte[b00010001][2] = 8 |
47 | // ... |
48 | // kSelectInByte[b00010001][7] = 8 |
49 | extern std::array<std::array<std::uint8_t, 256>, 8> const kSelectInByte; |
50 | |
51 | } // namespace detail |
52 | |
53 | /** |
54 | * Returns the position of the k-th 1 in the 64-bit word x. |
55 | * k is 0-based, so k=0 returns the position of the first 1. |
56 | * |
57 | * Uses the broadword selection algorithm by Vigna [1], improved by Gog |
58 | * and Petri [2] and Vigna [3]. |
59 | * |
60 | * [1] Sebastiano Vigna. Broadword Implementation of Rank/Select |
61 | * Queries. WEA, 2008 |
62 | * |
63 | * [2] Simon Gog, Matthias Petri. Optimized succinct data structures |
64 | * for massive data. Softw. Pract. Exper., 2014 |
65 | * |
66 | * [3] Sebastiano Vigna. MG4J 5.2.1. http://mg4j.di.unimi.it/ |
67 | */ |
68 | template <class Instructions> |
69 | inline uint64_t select64(uint64_t x, uint64_t k) { |
70 | DCHECK_LT(k, Instructions::popcount(x)); |
71 | |
72 | constexpr uint64_t kOnesStep4 = 0x1111111111111111ULL; |
73 | constexpr uint64_t kOnesStep8 = 0x0101010101010101ULL; |
74 | constexpr uint64_t kMSBsStep8 = 0x80ULL * kOnesStep8; |
75 | |
76 | auto s = x; |
77 | s = s - ((s & 0xA * kOnesStep4) >> 1); |
78 | s = (s & 0x3 * kOnesStep4) + ((s >> 2) & 0x3 * kOnesStep4); |
79 | s = (s + (s >> 4)) & 0xF * kOnesStep8; |
80 | uint64_t byteSums = s * kOnesStep8; |
81 | |
82 | uint64_t kStep8 = k * kOnesStep8; |
83 | uint64_t geqKStep8 = (((kStep8 | kMSBsStep8) - byteSums) & kMSBsStep8); |
84 | uint64_t place = Instructions::popcount(geqKStep8) * 8; |
85 | uint64_t byteRank = k - (((byteSums << 8) >> place) & uint64_t(0xFF)); |
86 | return place + detail::kSelectInByte[byteRank][((x >> place) & 0xFF)]; |
87 | } |
88 | |
89 | template <> |
90 | FOLLY_ALWAYS_INLINE uint64_t |
91 | select64<compression::instructions::Haswell>(uint64_t x, uint64_t k) { |
92 | #if defined(__GNUC__) || defined(__clang__) |
93 | // GCC and Clang won't inline the intrinsics. |
94 | uint64_t result = uint64_t(1) << k; |
95 | |
96 | asm("pdep %1, %0, %0\n\t" |
97 | "tzcnt %0, %0" |
98 | : "+r" (result) |
99 | : "r" (x)); |
100 | |
101 | return result; |
102 | #else |
103 | return _tzcnt_u64(_pdep_u64(1ULL << k, x)); |
104 | #endif |
105 | } |
106 | |
107 | } // namespace folly |
108 | |