1 | /* |
2 | * Copyright 2012-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | /** |
18 | * Compute 64-, 96-, and 128-bit Rabin fingerprints, as described in |
19 | * Michael O. Rabin (1981) |
20 | * Fingerprinting by Random Polynomials |
21 | * Center for Research in Computing Technology, Harvard University |
22 | * Tech Report TR-CSE-03-01 |
23 | * |
24 | * The implementation follows the optimization described in |
25 | * Andrei Z. Broder (1993) |
26 | * Some applications of Rabin's fingerprinting method |
27 | * |
28 | * extended for fingerprints larger than 64 bits, and modified to use |
29 | * 64-bit instead of 32-bit integers for computation. |
30 | * |
31 | * The precomputed tables are in Fingerprint.cpp. |
32 | * |
33 | * Benchmarked on 10/13/2009 on a 2.5GHz quad-core Xeon L5420, |
34 | * - Fingerprint<64>::update64() takes about 12ns |
35 | * - Fingerprint<96>::update64() takes about 30ns |
36 | * - Fingerprint<128>::update128() takes about 30ns |
37 | * (unsurprisingly, Fingerprint<96> and Fingerprint<128> take the |
38 | * same amount of time, as they both use 128-bit operations; the least |
39 | * significant 32 bits of Fingerprint<96> will always be 0) |
40 | * |
41 | * @author Tudor Bosman (tudorb@facebook.com) |
42 | */ |
43 | |
44 | #pragma once |
45 | |
46 | #include <array> |
47 | #include <cstdint> |
48 | |
49 | #include <folly/Range.h> |
50 | |
51 | namespace folly { |
52 | |
53 | namespace detail { |
54 | |
55 | constexpr size_t poly_size(size_t bits) { |
56 | return 1 + (bits - 1) / 64; |
57 | } |
58 | |
59 | template <size_t Deg> |
60 | using poly_table = |
61 | std::array<std::array<std::array<uint64_t, poly_size(Deg)>, 256>, 8>; |
62 | |
63 | template <int BITS> |
64 | struct FingerprintTable { |
65 | static const uint64_t poly[poly_size(BITS)]; |
66 | static const poly_table<BITS> table; |
67 | }; |
68 | |
69 | template <int BITS> |
70 | const uint64_t FingerprintTable<BITS>::poly[poly_size(BITS)] = {}; |
71 | template <int BITS> |
72 | const poly_table<BITS> FingerprintTable<BITS>::table = {}; |
73 | |
74 | #ifndef _MSC_VER |
75 | // MSVC 2015 can't handle these extern specialization declarations, |
76 | // but they aren't needed for things to work right, so we just don't |
77 | // declare them in the header for MSVC. |
78 | |
79 | #define FOLLY_DECLARE_FINGERPRINT_TABLES(BITS) \ |
80 | template <> \ |
81 | const uint64_t FingerprintTable<BITS>::poly[poly_size(BITS)]; \ |
82 | template <> \ |
83 | const poly_table<BITS> FingerprintTable<BITS>::table |
84 | |
85 | FOLLY_DECLARE_FINGERPRINT_TABLES(64); |
86 | FOLLY_DECLARE_FINGERPRINT_TABLES(96); |
87 | FOLLY_DECLARE_FINGERPRINT_TABLES(128); |
88 | |
89 | #undef FOLLY_DECLARE_FINGERPRINT_TABLES |
90 | #endif |
91 | |
92 | } // namespace detail |
93 | |
94 | /** |
95 | * Compute the Rabin fingerprint. |
96 | * |
97 | * TODO(tudorb): Extend this to allow removing values from the computed |
98 | * fingerprint (so we can fingerprint a sliding window, as in the Rabin-Karp |
99 | * string matching algorithm) |
100 | * |
101 | * update* methods return *this, so you can chain them together: |
102 | * Fingerprint<96>().update8(x).update(str).update64(val).write(output); |
103 | */ |
104 | template <int BITS> |
105 | class Fingerprint { |
106 | public: |
107 | Fingerprint() { |
108 | // Use a non-zero starting value. We'll use (1 << (BITS-1)) |
109 | fp_[0] = 1ULL << 63; |
110 | for (int i = 1; i < size(); i++) { |
111 | fp_[i] = 0; |
112 | } |
113 | } |
114 | |
115 | Fingerprint& update8(uint8_t v) { |
116 | uint8_t out = shlor8(v); |
117 | xortab(detail::FingerprintTable<BITS>::table[0][out]); |
118 | return *this; |
119 | } |
120 | |
121 | // update32 and update64 are convenience functions to update the fingerprint |
122 | // with 4 and 8 bytes at a time. They are faster than calling update8 |
123 | // in a loop. They process the bytes in big-endian order. |
124 | Fingerprint& update32(uint32_t v) { |
125 | uint32_t out = shlor32(v); |
126 | for (int i = 0; i < 4; i++) { |
127 | xortab(detail::FingerprintTable<BITS>::table[i][out & 0xff]); |
128 | out >>= 8; |
129 | } |
130 | return *this; |
131 | } |
132 | |
133 | Fingerprint& update64(uint64_t v) { |
134 | uint64_t out = shlor64(v); |
135 | for (int i = 0; i < 8; i++) { |
136 | xortab(detail::FingerprintTable<BITS>::table[i][out & 0xff]); |
137 | out >>= 8; |
138 | } |
139 | return *this; |
140 | } |
141 | |
142 | Fingerprint& update(StringPiece str) { |
143 | // TODO(tudorb): We could be smart and do update64 or update32 if aligned |
144 | for (auto c : str) { |
145 | update8(uint8_t(c)); |
146 | } |
147 | return *this; |
148 | } |
149 | |
150 | /** |
151 | * Return the number of uint64s needed to hold the fingerprint value. |
152 | */ |
153 | constexpr static int size() { |
154 | return detail::poly_size(BITS); |
155 | } |
156 | |
157 | /** |
158 | * Write the computed fingeprint to an array of size() uint64_t's. |
159 | * For Fingerprint<64>, size()==1; we write 64 bits in out[0] |
160 | * For Fingerprint<96>, size()==2; we write 64 bits in out[0] and |
161 | * the most significant 32 bits of out[1] |
162 | * For Fingerprint<128>, size()==2; we write 64 bits in out[0] and |
163 | * 64 bits in out[1]. |
164 | */ |
165 | void write(uint64_t* out) const { |
166 | for (int i = 0; i < size(); i++) { |
167 | out[i] = fp_[i]; |
168 | } |
169 | } |
170 | |
171 | private: |
172 | // XOR the fingerprint with a value from one of the tables. |
173 | void xortab(std::array<uint64_t, detail::poly_size(BITS)> const& tab) { |
174 | for (int i = 0; i < size(); i++) { |
175 | fp_[i] ^= tab[i]; |
176 | } |
177 | } |
178 | |
179 | // Helper functions: shift the fingerprint value left by 8/32/64 bits, |
180 | // return the "out" value (the bits that were shifted out), and add "v" |
181 | // in the bits on the right. |
182 | uint8_t shlor8(uint8_t v); |
183 | uint32_t shlor32(uint32_t v); |
184 | uint64_t shlor64(uint64_t v); |
185 | |
186 | uint64_t fp_[detail::poly_size(BITS)]; |
187 | }; |
188 | |
189 | // Convenience functions |
190 | |
191 | /** |
192 | * Return the 64-bit Rabin fingerprint of a string. |
193 | */ |
194 | inline uint64_t fingerprint64(StringPiece str) { |
195 | uint64_t fp; |
196 | Fingerprint<64>().update(str).write(&fp); |
197 | return fp; |
198 | } |
199 | |
200 | /** |
201 | * Compute the 96-bit Rabin fingerprint of a string. |
202 | * Return the 64 most significant bits in *msb, and the 32 least significant |
203 | * bits in *lsb. |
204 | */ |
205 | inline void fingerprint96(StringPiece str, uint64_t* msb, uint32_t* lsb) { |
206 | uint64_t fp[2]; |
207 | Fingerprint<96>().update(str).write(fp); |
208 | *msb = fp[0]; |
209 | *lsb = (uint32_t)(fp[1] >> 32); |
210 | } |
211 | |
212 | /** |
213 | * Compute the 128-bit Rabin fingerprint of a string. |
214 | * Return the 64 most significant bits in *msb, and the 64 least significant |
215 | * bits in *lsb. |
216 | */ |
217 | inline void fingerprint128(StringPiece str, uint64_t* msb, uint64_t* lsb) { |
218 | uint64_t fp[2]; |
219 | Fingerprint<128>().update(str).write(fp); |
220 | *msb = fp[0]; |
221 | *lsb = fp[1]; |
222 | } |
223 | |
224 | template <> |
225 | inline uint8_t Fingerprint<64>::shlor8(uint8_t v) { |
226 | uint8_t out = (uint8_t)(fp_[0] >> 56); |
227 | fp_[0] = (fp_[0] << 8) | ((uint64_t)v); |
228 | return out; |
229 | } |
230 | |
231 | template <> |
232 | inline uint32_t Fingerprint<64>::shlor32(uint32_t v) { |
233 | uint32_t out = (uint32_t)(fp_[0] >> 32); |
234 | fp_[0] = (fp_[0] << 32) | ((uint64_t)v); |
235 | return out; |
236 | } |
237 | |
238 | template <> |
239 | inline uint64_t Fingerprint<64>::shlor64(uint64_t v) { |
240 | uint64_t out = fp_[0]; |
241 | fp_[0] = v; |
242 | return out; |
243 | } |
244 | |
245 | template <> |
246 | inline uint8_t Fingerprint<96>::shlor8(uint8_t v) { |
247 | uint8_t out = (uint8_t)(fp_[0] >> 56); |
248 | fp_[0] = (fp_[0] << 8) | (fp_[1] >> 56); |
249 | fp_[1] = (fp_[1] << 8) | ((uint64_t)v << 32); |
250 | return out; |
251 | } |
252 | |
253 | template <> |
254 | inline uint32_t Fingerprint<96>::shlor32(uint32_t v) { |
255 | uint32_t out = (uint32_t)(fp_[0] >> 32); |
256 | fp_[0] = (fp_[0] << 32) | (fp_[1] >> 32); |
257 | fp_[1] = ((uint64_t)v << 32); |
258 | return out; |
259 | } |
260 | |
261 | template <> |
262 | inline uint64_t Fingerprint<96>::shlor64(uint64_t v) { |
263 | uint64_t out = fp_[0]; |
264 | fp_[0] = fp_[1] | (v >> 32); |
265 | fp_[1] = v << 32; |
266 | return out; |
267 | } |
268 | |
269 | template <> |
270 | inline uint8_t Fingerprint<128>::shlor8(uint8_t v) { |
271 | uint8_t out = (uint8_t)(fp_[0] >> 56); |
272 | fp_[0] = (fp_[0] << 8) | (fp_[1] >> 56); |
273 | fp_[1] = (fp_[1] << 8) | ((uint64_t)v); |
274 | return out; |
275 | } |
276 | |
277 | template <> |
278 | inline uint32_t Fingerprint<128>::shlor32(uint32_t v) { |
279 | uint32_t out = (uint32_t)(fp_[0] >> 32); |
280 | fp_[0] = (fp_[0] << 32) | (fp_[1] >> 32); |
281 | fp_[1] = (fp_[1] << 32) | ((uint64_t)v); |
282 | return out; |
283 | } |
284 | |
285 | template <> |
286 | inline uint64_t Fingerprint<128>::shlor64(uint64_t v) { |
287 | uint64_t out = fp_[0]; |
288 | fp_[0] = fp_[1]; |
289 | fp_[1] = v; |
290 | return out; |
291 | } |
292 | |
293 | } // namespace folly |
294 | |