1 | // Copyright (c) 2014 Google, Inc. |
2 | // |
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
4 | // of this software and associated documentation files (the "Software"), to deal |
5 | // in the Software without restriction, including without limitation the rights |
6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
7 | // copies of the Software, and to permit persons to whom the Software is |
8 | // furnished to do so, subject to the following conditions: |
9 | // |
10 | // The above copyright notice and this permission notice shall be included in |
11 | // all copies or substantial portions of the Software. |
12 | // |
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
19 | // THE SOFTWARE. |
20 | // |
21 | // FarmHash, by Geoff Pike |
22 | |
23 | // |
24 | // http://code.google.com/p/farmhash/ |
25 | // |
26 | // This file provides a few functions for hashing strings and other |
27 | // data. All of them are high-quality functions in the sense that |
28 | // they do well on standard tests such as Austin Appleby's SMHasher. |
29 | // They're also fast. FarmHash is the successor to CityHash. |
30 | // |
31 | // Functions in the FarmHash family are not suitable for cryptography. |
32 | // |
33 | // WARNING: This code has been only lightly tested on big-endian platforms! |
34 | // It is known to work well on little-endian platforms that have a small penalty |
35 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. |
36 | // It should work on all 32-bit and 64-bit platforms that allow unaligned reads; |
37 | // bug reports are welcome. |
38 | // |
39 | // By the way, for some hash functions, given strings a and b, the hash |
40 | // of a+b is easily derived from the hashes of a and b. This property |
41 | // doesn't hold for any hash functions in this file. |
42 | |
43 | #ifndef FARM_HASH_H_ |
44 | #define FARM_HASH_H_ |
45 | |
46 | #include <assert.h> |
47 | #include <stdint.h> |
48 | #include <stdlib.h> |
49 | #include <string.h> // for memcpy and memset |
50 | #include <utility> |
51 | |
52 | #ifndef NAMESPACE_FOR_HASH_FUNCTIONS |
53 | #define NAMESPACE_FOR_HASH_FUNCTIONS farmhash |
54 | #endif |
55 | |
56 | namespace NAMESPACE_FOR_HASH_FUNCTIONS { |
57 | |
58 | #if defined(FARMHASH_UINT128_T_DEFINED) |
59 | inline uint64_t Uint128Low64(const uint128_t x) { |
60 | return static_cast<uint64_t>(x); |
61 | } |
62 | inline uint64_t Uint128High64(const uint128_t x) { |
63 | return static_cast<uint64_t>(x >> 64); |
64 | } |
65 | inline uint128_t Uint128(uint64_t lo, uint64_t hi) { |
66 | return lo + (((uint128_t)hi) << 64); |
67 | } |
68 | #else |
69 | typedef std::pair<uint64_t, uint64_t> uint128_t; |
70 | inline uint64_t Uint128Low64(const uint128_t x) { return x.first; } |
71 | inline uint64_t Uint128High64(const uint128_t x) { return x.second; } |
72 | inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); } |
73 | #endif |
74 | |
75 | |
76 | // BASIC STRING HASHING |
77 | |
78 | // Hash function for a byte array. |
79 | // May change from time to time, may differ on different platforms, may differ |
80 | // depending on NDEBUG. |
81 | size_t Hash(const char* s, size_t len); |
82 | |
83 | // Hash function for a byte array. Most useful in 32-bit binaries. |
84 | // May change from time to time, may differ on different platforms, may differ |
85 | // depending on NDEBUG. |
86 | uint32_t Hash32(const char* s, size_t len); |
87 | |
88 | // Hash function for a byte array. For convenience, a 32-bit seed is also |
89 | // hashed into the result. |
90 | // May change from time to time, may differ on different platforms, may differ |
91 | // depending on NDEBUG. |
92 | uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed); |
93 | |
94 | // Hash 128 input bits down to 64 bits of output. |
95 | // Hash function for a byte array. |
96 | // May change from time to time, may differ on different platforms, may differ |
97 | // depending on NDEBUG. |
98 | uint64_t Hash64(const char* s, size_t len); |
99 | |
100 | // Hash function for a byte array. For convenience, a 64-bit seed is also |
101 | // hashed into the result. |
102 | // May change from time to time, may differ on different platforms, may differ |
103 | // depending on NDEBUG. |
104 | uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed); |
105 | |
106 | // Hash function for a byte array. For convenience, two seeds are also |
107 | // hashed into the result. |
108 | // May change from time to time, may differ on different platforms, may differ |
109 | // depending on NDEBUG. |
110 | uint64_t Hash64WithSeeds(const char* s, size_t len, |
111 | uint64_t seed0, uint64_t seed1); |
112 | |
113 | // Hash function for a byte array. |
114 | // May change from time to time, may differ on different platforms, may differ |
115 | // depending on NDEBUG. |
116 | uint128_t Hash128(const char* s, size_t len); |
117 | |
118 | // Hash function for a byte array. For convenience, a 128-bit seed is also |
119 | // hashed into the result. |
120 | // May change from time to time, may differ on different platforms, may differ |
121 | // depending on NDEBUG. |
122 | uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed); |
123 | |
124 | // BASIC NON-STRING HASHING |
125 | |
126 | // This is intended to be a reasonably good hash function. |
127 | // May change from time to time, may differ on different platforms, may differ |
128 | // depending on NDEBUG. |
129 | inline uint64_t Hash128to64(uint128_t x) { |
130 | // Murmur-inspired hashing. |
131 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; |
132 | uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; |
133 | a ^= (a >> 47); |
134 | uint64_t b = (Uint128High64(x) ^ a) * kMul; |
135 | b ^= (b >> 47); |
136 | b *= kMul; |
137 | return b; |
138 | } |
139 | |
140 | // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions) |
141 | |
142 | // Fingerprint function for a byte array. Most useful in 32-bit binaries. |
143 | uint32_t Fingerprint32(const char* s, size_t len); |
144 | |
145 | // Fingerprint function for a byte array. |
146 | uint64_t Fingerprint64(const char* s, size_t len); |
147 | |
148 | // Fingerprint function for a byte array. |
149 | uint128_t Fingerprint128(const char* s, size_t len); |
150 | |
151 | // This is intended to be a good fingerprinting primitive. |
152 | // See below for more overloads. |
153 | inline uint64_t Fingerprint(uint128_t x) { |
154 | // Murmur-inspired hashing. |
155 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; |
156 | uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; |
157 | a ^= (a >> 47); |
158 | uint64_t b = (Uint128High64(x) ^ a) * kMul; |
159 | b ^= (b >> 44); |
160 | b *= kMul; |
161 | b ^= (b >> 41); |
162 | b *= kMul; |
163 | return b; |
164 | } |
165 | |
166 | // This is intended to be a good fingerprinting primitive. |
167 | inline uint64_t Fingerprint(uint64_t x) { |
168 | // Murmur-inspired hashing. |
169 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; |
170 | uint64_t b = x * kMul; |
171 | b ^= (b >> 44); |
172 | b *= kMul; |
173 | b ^= (b >> 41); |
174 | b *= kMul; |
175 | return b; |
176 | } |
177 | |
178 | #ifndef FARMHASH_NO_CXX_STRING |
179 | |
180 | // Convenience functions to hash or fingerprint C++ strings. |
181 | // These require that Str::data() return a pointer to the first char |
182 | // (as a const char*) and that Str::length() return the string's length; |
183 | // they work with std::string, for example. |
184 | |
185 | // Hash function for a byte array. |
186 | // May change from time to time, may differ on different platforms, may differ |
187 | // depending on NDEBUG. |
188 | template <typename Str> |
189 | inline size_t Hash(const Str& s) { |
190 | assert(sizeof(s[0]) == 1); |
191 | return Hash(s.data(), s.length()); |
192 | } |
193 | |
194 | // Hash function for a byte array. Most useful in 32-bit binaries. |
195 | // May change from time to time, may differ on different platforms, may differ |
196 | // depending on NDEBUG. |
197 | template <typename Str> |
198 | inline uint32_t Hash32(const Str& s) { |
199 | assert(sizeof(s[0]) == 1); |
200 | return Hash32(s.data(), s.length()); |
201 | } |
202 | |
203 | // Hash function for a byte array. For convenience, a 32-bit seed is also |
204 | // hashed into the result. |
205 | // May change from time to time, may differ on different platforms, may differ |
206 | // depending on NDEBUG. |
207 | template <typename Str> |
208 | inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) { |
209 | assert(sizeof(s[0]) == 1); |
210 | return Hash32WithSeed(s.data(), s.length(), seed); |
211 | } |
212 | |
213 | // Hash 128 input bits down to 64 bits of output. |
214 | // Hash function for a byte array. |
215 | // May change from time to time, may differ on different platforms, may differ |
216 | // depending on NDEBUG. |
217 | template <typename Str> |
218 | inline uint64_t Hash64(const Str& s) { |
219 | assert(sizeof(s[0]) == 1); |
220 | return Hash64(s.data(), s.length()); |
221 | } |
222 | |
223 | // Hash function for a byte array. For convenience, a 64-bit seed is also |
224 | // hashed into the result. |
225 | // May change from time to time, may differ on different platforms, may differ |
226 | // depending on NDEBUG. |
227 | template <typename Str> |
228 | inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) { |
229 | assert(sizeof(s[0]) == 1); |
230 | return Hash64WithSeed(s.data(), s.length(), seed); |
231 | } |
232 | |
233 | // Hash function for a byte array. For convenience, two seeds are also |
234 | // hashed into the result. |
235 | // May change from time to time, may differ on different platforms, may differ |
236 | // depending on NDEBUG. |
237 | template <typename Str> |
238 | inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) { |
239 | assert(sizeof(s[0]) == 1); |
240 | return Hash64WithSeeds(s.data(), s.length(), seed0, seed1); |
241 | } |
242 | |
243 | // Hash function for a byte array. |
244 | // May change from time to time, may differ on different platforms, may differ |
245 | // depending on NDEBUG. |
246 | template <typename Str> |
247 | inline uint128_t Hash128(const Str& s) { |
248 | assert(sizeof(s[0]) == 1); |
249 | return Hash128(s.data(), s.length()); |
250 | } |
251 | |
252 | // Hash function for a byte array. For convenience, a 128-bit seed is also |
253 | // hashed into the result. |
254 | // May change from time to time, may differ on different platforms, may differ |
255 | // depending on NDEBUG. |
256 | template <typename Str> |
257 | inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) { |
258 | assert(sizeof(s[0]) == 1); |
259 | return Hash128(s.data(), s.length(), seed); |
260 | } |
261 | |
262 | // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions) |
263 | |
264 | // Fingerprint function for a byte array. Most useful in 32-bit binaries. |
265 | template <typename Str> |
266 | inline uint32_t Fingerprint32(const Str& s) { |
267 | assert(sizeof(s[0]) == 1); |
268 | return Fingerprint32(s.data(), s.length()); |
269 | } |
270 | |
271 | // Fingerprint 128 input bits down to 64 bits of output. |
272 | // Fingerprint function for a byte array. |
273 | template <typename Str> |
274 | inline uint64_t Fingerprint64(const Str& s) { |
275 | assert(sizeof(s[0]) == 1); |
276 | return Fingerprint64(s.data(), s.length()); |
277 | } |
278 | |
279 | // Fingerprint function for a byte array. |
280 | template <typename Str> |
281 | inline uint128_t Fingerprint128(const Str& s) { |
282 | assert(sizeof(s[0]) == 1); |
283 | return Fingerprint128(s.data(), s.length()); |
284 | } |
285 | |
286 | #endif |
287 | |
288 | } // namespace NAMESPACE_FOR_HASH_FUNCTIONS |
289 | |
290 | #endif // FARM_HASH_H_ |
291 | |