1/*
2 * Copyright 2012-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * Compute 64-, 96-, and 128-bit Rabin fingerprints, as described in
19 * Michael O. Rabin (1981)
20 * Fingerprinting by Random Polynomials
21 * Center for Research in Computing Technology, Harvard University
22 * Tech Report TR-CSE-03-01
23 *
24 * The implementation follows the optimization described in
25 * Andrei Z. Broder (1993)
26 * Some applications of Rabin's fingerprinting method
27 *
28 * extended for fingerprints larger than 64 bits, and modified to use
29 * 64-bit instead of 32-bit integers for computation.
30 *
31 * The precomputed tables are in Fingerprint.cpp.
32 *
33 * Benchmarked on 10/13/2009 on a 2.5GHz quad-core Xeon L5420,
34 * - Fingerprint<64>::update64() takes about 12ns
35 * - Fingerprint<96>::update64() takes about 30ns
36 * - Fingerprint<128>::update128() takes about 30ns
37 * (unsurprisingly, Fingerprint<96> and Fingerprint<128> take the
38 * same amount of time, as they both use 128-bit operations; the least
39 * significant 32 bits of Fingerprint<96> will always be 0)
40 *
41 * @author Tudor Bosman (tudorb@facebook.com)
42 */
43
44#pragma once
45
46#include <array>
47#include <cstdint>
48
49#include <folly/Range.h>
50
51namespace folly {
52
53namespace detail {
54
55constexpr size_t poly_size(size_t bits) {
56 return 1 + (bits - 1) / 64;
57}
58
59template <size_t Deg>
60using poly_table =
61 std::array<std::array<std::array<uint64_t, poly_size(Deg)>, 256>, 8>;
62
63template <int BITS>
64struct FingerprintTable {
65 static const uint64_t poly[poly_size(BITS)];
66 static const poly_table<BITS> table;
67};
68
69template <int BITS>
70const uint64_t FingerprintTable<BITS>::poly[poly_size(BITS)] = {};
71template <int BITS>
72const poly_table<BITS> FingerprintTable<BITS>::table = {};
73
74#ifndef _MSC_VER
75// MSVC 2015 can't handle these extern specialization declarations,
76// but they aren't needed for things to work right, so we just don't
77// declare them in the header for MSVC.
78
79#define FOLLY_DECLARE_FINGERPRINT_TABLES(BITS) \
80 template <> \
81 const uint64_t FingerprintTable<BITS>::poly[poly_size(BITS)]; \
82 template <> \
83 const poly_table<BITS> FingerprintTable<BITS>::table
84
85FOLLY_DECLARE_FINGERPRINT_TABLES(64);
86FOLLY_DECLARE_FINGERPRINT_TABLES(96);
87FOLLY_DECLARE_FINGERPRINT_TABLES(128);
88
89#undef FOLLY_DECLARE_FINGERPRINT_TABLES
90#endif
91
92} // namespace detail
93
94/**
95 * Compute the Rabin fingerprint.
96 *
97 * TODO(tudorb): Extend this to allow removing values from the computed
98 * fingerprint (so we can fingerprint a sliding window, as in the Rabin-Karp
99 * string matching algorithm)
100 *
101 * update* methods return *this, so you can chain them together:
102 * Fingerprint<96>().update8(x).update(str).update64(val).write(output);
103 */
104template <int BITS>
105class Fingerprint {
106 public:
107 Fingerprint() {
108 // Use a non-zero starting value. We'll use (1 << (BITS-1))
109 fp_[0] = 1ULL << 63;
110 for (int i = 1; i < size(); i++) {
111 fp_[i] = 0;
112 }
113 }
114
115 Fingerprint& update8(uint8_t v) {
116 uint8_t out = shlor8(v);
117 xortab(detail::FingerprintTable<BITS>::table[0][out]);
118 return *this;
119 }
120
121 // update32 and update64 are convenience functions to update the fingerprint
122 // with 4 and 8 bytes at a time. They are faster than calling update8
123 // in a loop. They process the bytes in big-endian order.
124 Fingerprint& update32(uint32_t v) {
125 uint32_t out = shlor32(v);
126 for (int i = 0; i < 4; i++) {
127 xortab(detail::FingerprintTable<BITS>::table[i][out & 0xff]);
128 out >>= 8;
129 }
130 return *this;
131 }
132
133 Fingerprint& update64(uint64_t v) {
134 uint64_t out = shlor64(v);
135 for (int i = 0; i < 8; i++) {
136 xortab(detail::FingerprintTable<BITS>::table[i][out & 0xff]);
137 out >>= 8;
138 }
139 return *this;
140 }
141
142 Fingerprint& update(StringPiece str) {
143 // TODO(tudorb): We could be smart and do update64 or update32 if aligned
144 for (auto c : str) {
145 update8(uint8_t(c));
146 }
147 return *this;
148 }
149
150 /**
151 * Return the number of uint64s needed to hold the fingerprint value.
152 */
153 constexpr static int size() {
154 return detail::poly_size(BITS);
155 }
156
157 /**
158 * Write the computed fingeprint to an array of size() uint64_t's.
159 * For Fingerprint<64>, size()==1; we write 64 bits in out[0]
160 * For Fingerprint<96>, size()==2; we write 64 bits in out[0] and
161 * the most significant 32 bits of out[1]
162 * For Fingerprint<128>, size()==2; we write 64 bits in out[0] and
163 * 64 bits in out[1].
164 */
165 void write(uint64_t* out) const {
166 for (int i = 0; i < size(); i++) {
167 out[i] = fp_[i];
168 }
169 }
170
171 private:
172 // XOR the fingerprint with a value from one of the tables.
173 void xortab(std::array<uint64_t, detail::poly_size(BITS)> const& tab) {
174 for (int i = 0; i < size(); i++) {
175 fp_[i] ^= tab[i];
176 }
177 }
178
179 // Helper functions: shift the fingerprint value left by 8/32/64 bits,
180 // return the "out" value (the bits that were shifted out), and add "v"
181 // in the bits on the right.
182 uint8_t shlor8(uint8_t v);
183 uint32_t shlor32(uint32_t v);
184 uint64_t shlor64(uint64_t v);
185
186 uint64_t fp_[detail::poly_size(BITS)];
187};
188
189// Convenience functions
190
191/**
192 * Return the 64-bit Rabin fingerprint of a string.
193 */
194inline uint64_t fingerprint64(StringPiece str) {
195 uint64_t fp;
196 Fingerprint<64>().update(str).write(&fp);
197 return fp;
198}
199
200/**
201 * Compute the 96-bit Rabin fingerprint of a string.
202 * Return the 64 most significant bits in *msb, and the 32 least significant
203 * bits in *lsb.
204 */
205inline void fingerprint96(StringPiece str, uint64_t* msb, uint32_t* lsb) {
206 uint64_t fp[2];
207 Fingerprint<96>().update(str).write(fp);
208 *msb = fp[0];
209 *lsb = (uint32_t)(fp[1] >> 32);
210}
211
212/**
213 * Compute the 128-bit Rabin fingerprint of a string.
214 * Return the 64 most significant bits in *msb, and the 64 least significant
215 * bits in *lsb.
216 */
217inline void fingerprint128(StringPiece str, uint64_t* msb, uint64_t* lsb) {
218 uint64_t fp[2];
219 Fingerprint<128>().update(str).write(fp);
220 *msb = fp[0];
221 *lsb = fp[1];
222}
223
224template <>
225inline uint8_t Fingerprint<64>::shlor8(uint8_t v) {
226 uint8_t out = (uint8_t)(fp_[0] >> 56);
227 fp_[0] = (fp_[0] << 8) | ((uint64_t)v);
228 return out;
229}
230
231template <>
232inline uint32_t Fingerprint<64>::shlor32(uint32_t v) {
233 uint32_t out = (uint32_t)(fp_[0] >> 32);
234 fp_[0] = (fp_[0] << 32) | ((uint64_t)v);
235 return out;
236}
237
238template <>
239inline uint64_t Fingerprint<64>::shlor64(uint64_t v) {
240 uint64_t out = fp_[0];
241 fp_[0] = v;
242 return out;
243}
244
245template <>
246inline uint8_t Fingerprint<96>::shlor8(uint8_t v) {
247 uint8_t out = (uint8_t)(fp_[0] >> 56);
248 fp_[0] = (fp_[0] << 8) | (fp_[1] >> 56);
249 fp_[1] = (fp_[1] << 8) | ((uint64_t)v << 32);
250 return out;
251}
252
253template <>
254inline uint32_t Fingerprint<96>::shlor32(uint32_t v) {
255 uint32_t out = (uint32_t)(fp_[0] >> 32);
256 fp_[0] = (fp_[0] << 32) | (fp_[1] >> 32);
257 fp_[1] = ((uint64_t)v << 32);
258 return out;
259}
260
261template <>
262inline uint64_t Fingerprint<96>::shlor64(uint64_t v) {
263 uint64_t out = fp_[0];
264 fp_[0] = fp_[1] | (v >> 32);
265 fp_[1] = v << 32;
266 return out;
267}
268
269template <>
270inline uint8_t Fingerprint<128>::shlor8(uint8_t v) {
271 uint8_t out = (uint8_t)(fp_[0] >> 56);
272 fp_[0] = (fp_[0] << 8) | (fp_[1] >> 56);
273 fp_[1] = (fp_[1] << 8) | ((uint64_t)v);
274 return out;
275}
276
277template <>
278inline uint32_t Fingerprint<128>::shlor32(uint32_t v) {
279 uint32_t out = (uint32_t)(fp_[0] >> 32);
280 fp_[0] = (fp_[0] << 32) | (fp_[1] >> 32);
281 fp_[1] = (fp_[1] << 32) | ((uint64_t)v);
282 return out;
283}
284
285template <>
286inline uint64_t Fingerprint<128>::shlor64(uint64_t v) {
287 uint64_t out = fp_[0];
288 fp_[0] = fp_[1];
289 fp_[1] = v;
290 return out;
291}
292
293} // namespace folly
294