1 | /* |
2 | * Copyright 2012-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/GroupVarint.h> |
18 | |
19 | #include <folly/container/Array.h> |
20 | |
21 | #if HAVE_GROUP_VARINT |
22 | namespace folly { |
23 | |
24 | const uint32_t GroupVarint32::kMask[] = { |
25 | 0xff, |
26 | 0xffff, |
27 | 0xffffff, |
28 | 0xffffffff, |
29 | }; |
30 | |
31 | const uint64_t GroupVarint64::kMask[] = { |
32 | 0xff, |
33 | 0xffff, |
34 | 0xffffff, |
35 | 0xffffffff, |
36 | 0xffffffffffULL, |
37 | 0xffffffffffffULL, |
38 | 0xffffffffffffffULL, |
39 | 0xffffffffffffffffULL, |
40 | }; |
41 | |
42 | namespace detail { |
43 | |
44 | struct group_varint_table_base_make_item { |
45 | constexpr std::size_t get_d(std::size_t index, std::size_t j) const { |
46 | return 1u + ((index >> (2 * j)) & 3u); |
47 | } |
48 | constexpr std::size_t get_offset(std::size_t index, std::size_t j) const { |
49 | // clang-format off |
50 | return |
51 | (j > 0 ? get_d(index, 0) : 0) + |
52 | (j > 1 ? get_d(index, 1) : 0) + |
53 | (j > 2 ? get_d(index, 2) : 0) + |
54 | (j > 3 ? get_d(index, 3) : 0) + |
55 | 0; |
56 | // clang-format on |
57 | } |
58 | }; |
59 | |
60 | struct group_varint_table_length_make_item : group_varint_table_base_make_item { |
61 | constexpr std::uint8_t operator()(std::size_t index) const { |
62 | return 1u + get_offset(index, 4); |
63 | } |
64 | }; |
65 | |
66 | // Reference: http://www.stepanovpapers.com/CIKM_2011.pdf |
67 | // |
68 | // From 17 encoded bytes, we may use between 5 and 17 bytes to encode 4 |
69 | // integers. The first byte is a key that indicates how many bytes each of |
70 | // the 4 integers takes: |
71 | // |
72 | // bit 0..1: length-1 of first integer |
73 | // bit 2..3: length-1 of second integer |
74 | // bit 4..5: length-1 of third integer |
75 | // bit 6..7: length-1 of fourth integer |
76 | // |
77 | // The value of the first byte is used as the index in a table which returns |
78 | // a mask value for the SSSE3 PSHUFB instruction, which takes an XMM register |
79 | // (16 bytes) and shuffles bytes from it into a destination XMM register |
80 | // (optionally setting some of them to 0) |
81 | // |
82 | // For example, if the key has value 4, that means that the first integer |
83 | // uses 1 byte, the second uses 2 bytes, the third and fourth use 1 byte each, |
84 | // so we set the mask value so that |
85 | // |
86 | // r[0] = a[0] |
87 | // r[1] = 0 |
88 | // r[2] = 0 |
89 | // r[3] = 0 |
90 | // |
91 | // r[4] = a[1] |
92 | // r[5] = a[2] |
93 | // r[6] = 0 |
94 | // r[7] = 0 |
95 | // |
96 | // r[8] = a[3] |
97 | // r[9] = 0 |
98 | // r[10] = 0 |
99 | // r[11] = 0 |
100 | // |
101 | // r[12] = a[4] |
102 | // r[13] = 0 |
103 | // r[14] = 0 |
104 | // r[15] = 0 |
105 | |
106 | struct group_varint_table_sse_mask_make_item |
107 | : group_varint_table_base_make_item { |
108 | constexpr auto partial_item(std::size_t d, std::size_t offset, std::size_t k) |
109 | const { |
110 | // if k < d, the j'th integer uses d bytes, consume them |
111 | // set remaining bytes in result to 0 |
112 | // 0xff: set corresponding byte in result to 0 |
113 | return std::uint32_t((k < d ? offset + k : std::size_t(0xff)) << (8 * k)); |
114 | } |
115 | |
116 | constexpr auto item_impl(std::size_t d, std::size_t offset) const { |
117 | // clang-format off |
118 | return |
119 | partial_item(d, offset, 0) | |
120 | partial_item(d, offset, 1) | |
121 | partial_item(d, offset, 2) | |
122 | partial_item(d, offset, 3) | |
123 | 0; |
124 | // clang-format on |
125 | } |
126 | |
127 | constexpr auto item(std::size_t index, std::size_t j) const { |
128 | return item_impl(get_d(index, j), get_offset(index, j)); |
129 | } |
130 | |
131 | constexpr auto operator()(std::size_t index) const { |
132 | return std::array<std::uint32_t, 4>{{ |
133 | item(index, 0), |
134 | item(index, 1), |
135 | item(index, 2), |
136 | item(index, 3), |
137 | }}; |
138 | } |
139 | }; |
140 | |
141 | #if FOLLY_SSE >= 3 |
142 | alignas(16) FOLLY_STORAGE_CONSTEXPR |
143 | decltype(groupVarintSSEMasks) groupVarintSSEMasks = |
144 | make_array_with<256>(group_varint_table_sse_mask_make_item{}); |
145 | #endif |
146 | |
147 | FOLLY_STORAGE_CONSTEXPR decltype(groupVarintLengths) groupVarintLengths = |
148 | make_array_with<256>(group_varint_table_length_make_item{}); |
149 | |
150 | } // namespace detail |
151 | |
152 | } // namespace folly |
153 | #endif |
154 | |