1// Copyright (c) 2014 Google, Inc.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20//
21// FarmHash, by Geoff Pike
22
23//
24// http://code.google.com/p/farmhash/
25//
26// This file provides a few functions for hashing strings and other
27// data. All of them are high-quality functions in the sense that
28// they do well on standard tests such as Austin Appleby's SMHasher.
29// They're also fast. FarmHash is the successor to CityHash.
30//
31// Functions in the FarmHash family are not suitable for cryptography.
32//
33// WARNING: This code has been only lightly tested on big-endian platforms!
34// It is known to work well on little-endian platforms that have a small penalty
35// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
36// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
37// bug reports are welcome.
38//
39// By the way, for some hash functions, given strings a and b, the hash
40// of a+b is easily derived from the hashes of a and b. This property
41// doesn't hold for any hash functions in this file.
42
43#ifndef FARM_HASH_H_
44#define FARM_HASH_H_
45
46#include <assert.h>
47#include <stdint.h>
48#include <stdlib.h>
49#include <string.h> // for memcpy and memset
50#include <utility>
51
52#ifndef NAMESPACE_FOR_HASH_FUNCTIONS
53#define NAMESPACE_FOR_HASH_FUNCTIONS farmhash
54#endif
55
56namespace NAMESPACE_FOR_HASH_FUNCTIONS {
57
58#if defined(FARMHASH_UINT128_T_DEFINED)
59inline uint64_t Uint128Low64(const uint128_t x) {
60 return static_cast<uint64_t>(x);
61}
62inline uint64_t Uint128High64(const uint128_t x) {
63 return static_cast<uint64_t>(x >> 64);
64}
65inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
66 return lo + (((uint128_t)hi) << 64);
67}
68#else
69typedef std::pair<uint64_t, uint64_t> uint128_t;
70inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
71inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
72inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
73#endif
74
75
76// BASIC STRING HASHING
77
78// Hash function for a byte array.
79// May change from time to time, may differ on different platforms, may differ
80// depending on NDEBUG.
81size_t Hash(const char* s, size_t len);
82
83// Hash function for a byte array. Most useful in 32-bit binaries.
84// May change from time to time, may differ on different platforms, may differ
85// depending on NDEBUG.
86uint32_t Hash32(const char* s, size_t len);
87
88// Hash function for a byte array. For convenience, a 32-bit seed is also
89// hashed into the result.
90// May change from time to time, may differ on different platforms, may differ
91// depending on NDEBUG.
92uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
93
94// Hash 128 input bits down to 64 bits of output.
95// Hash function for a byte array.
96// May change from time to time, may differ on different platforms, may differ
97// depending on NDEBUG.
98uint64_t Hash64(const char* s, size_t len);
99
100// Hash function for a byte array. For convenience, a 64-bit seed is also
101// hashed into the result.
102// May change from time to time, may differ on different platforms, may differ
103// depending on NDEBUG.
104uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
105
106// Hash function for a byte array. For convenience, two seeds are also
107// hashed into the result.
108// May change from time to time, may differ on different platforms, may differ
109// depending on NDEBUG.
110uint64_t Hash64WithSeeds(const char* s, size_t len,
111 uint64_t seed0, uint64_t seed1);
112
113// Hash function for a byte array.
114// May change from time to time, may differ on different platforms, may differ
115// depending on NDEBUG.
116uint128_t Hash128(const char* s, size_t len);
117
118// Hash function for a byte array. For convenience, a 128-bit seed is also
119// hashed into the result.
120// May change from time to time, may differ on different platforms, may differ
121// depending on NDEBUG.
122uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
123
124// BASIC NON-STRING HASHING
125
126// This is intended to be a reasonably good hash function.
127// May change from time to time, may differ on different platforms, may differ
128// depending on NDEBUG.
129inline uint64_t Hash128to64(uint128_t x) {
130 // Murmur-inspired hashing.
131 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
132 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
133 a ^= (a >> 47);
134 uint64_t b = (Uint128High64(x) ^ a) * kMul;
135 b ^= (b >> 47);
136 b *= kMul;
137 return b;
138}
139
140// FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
141
142// Fingerprint function for a byte array. Most useful in 32-bit binaries.
143uint32_t Fingerprint32(const char* s, size_t len);
144
145// Fingerprint function for a byte array.
146uint64_t Fingerprint64(const char* s, size_t len);
147
148// Fingerprint function for a byte array.
149uint128_t Fingerprint128(const char* s, size_t len);
150
151// This is intended to be a good fingerprinting primitive.
152// See below for more overloads.
153inline uint64_t Fingerprint(uint128_t x) {
154 // Murmur-inspired hashing.
155 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
156 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
157 a ^= (a >> 47);
158 uint64_t b = (Uint128High64(x) ^ a) * kMul;
159 b ^= (b >> 44);
160 b *= kMul;
161 b ^= (b >> 41);
162 b *= kMul;
163 return b;
164}
165
166// This is intended to be a good fingerprinting primitive.
167inline uint64_t Fingerprint(uint64_t x) {
168 // Murmur-inspired hashing.
169 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
170 uint64_t b = x * kMul;
171 b ^= (b >> 44);
172 b *= kMul;
173 b ^= (b >> 41);
174 b *= kMul;
175 return b;
176}
177
178#ifndef FARMHASH_NO_CXX_STRING
179
180// Convenience functions to hash or fingerprint C++ strings.
181// These require that Str::data() return a pointer to the first char
182// (as a const char*) and that Str::length() return the string's length;
183// they work with std::string, for example.
184
185// Hash function for a byte array.
186// May change from time to time, may differ on different platforms, may differ
187// depending on NDEBUG.
188template <typename Str>
189inline size_t Hash(const Str& s) {
190 assert(sizeof(s[0]) == 1);
191 return Hash(s.data(), s.length());
192}
193
194// Hash function for a byte array. Most useful in 32-bit binaries.
195// May change from time to time, may differ on different platforms, may differ
196// depending on NDEBUG.
197template <typename Str>
198inline uint32_t Hash32(const Str& s) {
199 assert(sizeof(s[0]) == 1);
200 return Hash32(s.data(), s.length());
201}
202
203// Hash function for a byte array. For convenience, a 32-bit seed is also
204// hashed into the result.
205// May change from time to time, may differ on different platforms, may differ
206// depending on NDEBUG.
207template <typename Str>
208inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
209 assert(sizeof(s[0]) == 1);
210 return Hash32WithSeed(s.data(), s.length(), seed);
211}
212
213// Hash 128 input bits down to 64 bits of output.
214// Hash function for a byte array.
215// May change from time to time, may differ on different platforms, may differ
216// depending on NDEBUG.
217template <typename Str>
218inline uint64_t Hash64(const Str& s) {
219 assert(sizeof(s[0]) == 1);
220 return Hash64(s.data(), s.length());
221}
222
223// Hash function for a byte array. For convenience, a 64-bit seed is also
224// hashed into the result.
225// May change from time to time, may differ on different platforms, may differ
226// depending on NDEBUG.
227template <typename Str>
228inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
229 assert(sizeof(s[0]) == 1);
230 return Hash64WithSeed(s.data(), s.length(), seed);
231}
232
233// Hash function for a byte array. For convenience, two seeds are also
234// hashed into the result.
235// May change from time to time, may differ on different platforms, may differ
236// depending on NDEBUG.
237template <typename Str>
238inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
239 assert(sizeof(s[0]) == 1);
240 return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
241}
242
243// Hash function for a byte array.
244// May change from time to time, may differ on different platforms, may differ
245// depending on NDEBUG.
246template <typename Str>
247inline uint128_t Hash128(const Str& s) {
248 assert(sizeof(s[0]) == 1);
249 return Hash128(s.data(), s.length());
250}
251
252// Hash function for a byte array. For convenience, a 128-bit seed is also
253// hashed into the result.
254// May change from time to time, may differ on different platforms, may differ
255// depending on NDEBUG.
256template <typename Str>
257inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
258 assert(sizeof(s[0]) == 1);
259 return Hash128(s.data(), s.length(), seed);
260}
261
262// FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
263
264// Fingerprint function for a byte array. Most useful in 32-bit binaries.
265template <typename Str>
266inline uint32_t Fingerprint32(const Str& s) {
267 assert(sizeof(s[0]) == 1);
268 return Fingerprint32(s.data(), s.length());
269}
270
271// Fingerprint 128 input bits down to 64 bits of output.
272// Fingerprint function for a byte array.
273template <typename Str>
274inline uint64_t Fingerprint64(const Str& s) {
275 assert(sizeof(s[0]) == 1);
276 return Fingerprint64(s.data(), s.length());
277}
278
279// Fingerprint function for a byte array.
280template <typename Str>
281inline uint128_t Fingerprint128(const Str& s) {
282 assert(sizeof(s[0]) == 1);
283 return Fingerprint128(s.data(), s.length());
284}
285
286#endif
287
288} // namespace NAMESPACE_FOR_HASH_FUNCTIONS
289
290#endif // FARM_HASH_H_
291