1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// From Apache Impala (incubating) as of 2016-02-22
19
20#ifndef ARROW_UTIL_HASH_UTIL_H
21#define ARROW_UTIL_HASH_UTIL_H
22
23#include <cassert>
24#include <cstdint>
25
26#include "arrow/util/logging.h"
27#include "arrow/util/macros.h"
28#include "arrow/util/neon-util.h"
29#include "arrow/util/sse-util.h"
30
31static inline uint32_t HW_crc32_u8(uint32_t crc, uint8_t v) {
32 DCHECK(false) << "Hardware CRC support is not enabled";
33 return 0;
34}
35
36static inline uint32_t HW_crc32_u16(uint32_t crc, uint16_t v) {
37 DCHECK(false) << "Hardware CRC support is not enabled";
38 return 0;
39}
40
41static inline uint32_t HW_crc32_u32(uint32_t crc, uint32_t v) {
42 DCHECK(false) << "Hardware CRC support is not enabled";
43 return 0;
44}
45
46static inline uint32_t HW_crc32_u64(uint32_t crc, uint64_t v) {
47 DCHECK(false) << "Hardware CRC support is not enabled";
48 return 0;
49}
50
51#ifdef ARROW_HAVE_SSE4_2
52#define HW_crc32_u8 SSE4_crc32_u8
53#define HW_crc32_u16 SSE4_crc32_u16
54#define HW_crc32_u32 SSE4_crc32_u32
55#define HW_crc32_u64 SSE4_crc32_u64
56#elif defined(ARROW_HAVE_ARM_CRC)
57#define HW_crc32_u8 ARMCE_crc32_u8
58#define HW_crc32_u16 ARMCE_crc32_u16
59#define HW_crc32_u32 ARMCE_crc32_u32
60#define HW_crc32_u64 ARMCE_crc32_u64
61#endif
62
63namespace arrow {
64
65/// Utility class to compute hash values.
66class HashUtil {
67 public:
68#if defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_ARM_CRC)
69 static constexpr bool have_hardware_crc32 = true;
70#else
71 static constexpr bool have_hardware_crc32 = false;
72#endif
73
74 /// Compute the Crc32 hash for data using SSE4/ArmCRC instructions. The input hash
75 /// parameter is the current hash/seed value.
76 /// This should only be called if SSE/ArmCRC is supported.
77 /// This is ~4x faster than Fnv/Boost Hash.
78 /// TODO: crc32 hashes with different seeds do not result in different hash functions.
79 /// The resulting hashes are correlated.
80 static uint32_t CrcHash(const void* data, int32_t nbytes, uint32_t hash) {
81 const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
82 const uint8_t* end = p + nbytes;
83
84 while (p <= end - 8) {
85 hash = HW_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(p));
86 p += 8;
87 }
88 while (p <= end - 4) {
89 hash = HW_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(p));
90 p += 4;
91 }
92 while (p < end) {
93 hash = HW_crc32_u8(hash, *p);
94 ++p;
95 }
96
97 // The lower half of the CRC hash has has poor uniformity, so swap the halves
98 // for anyone who only uses the first several bits of the hash.
99 hash = (hash << 16) | (hash >> 16);
100 return hash;
101 }
102
103 /// A variant of CRC32 hashing that computes two independent running CRCs
104 /// over interleaved halves of the input, giving out a 64-bit integer.
105 /// The result's quality should be improved by a finalization step.
106 ///
107 /// In addition to producing more bits of output, this should be twice
108 /// faster than CrcHash on CPUs that can overlap several independent
109 /// CRC computations.
110 static uint64_t DoubleCrcHash(const void* data, int32_t nbytes, uint64_t hash) {
111 const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
112
113 uint32_t h1 = static_cast<uint32_t>(hash >> 32);
114 uint32_t h2 = static_cast<uint32_t>(hash);
115
116 while (nbytes >= 16) {
117 h1 = HW_crc32_u64(h1, *reinterpret_cast<const uint64_t*>(p));
118 h2 = HW_crc32_u64(h2, *reinterpret_cast<const uint64_t*>(p + 8));
119 nbytes -= 16;
120 p += 16;
121 }
122 if (nbytes >= 8) {
123 h1 = HW_crc32_u32(h1, *reinterpret_cast<const uint32_t*>(p));
124 h2 = HW_crc32_u32(h2, *reinterpret_cast<const uint32_t*>(p + 4));
125 nbytes -= 8;
126 p += 8;
127 }
128 if (nbytes >= 4) {
129 h1 = HW_crc32_u16(h1, *reinterpret_cast<const uint16_t*>(p));
130 h2 = HW_crc32_u16(h2, *reinterpret_cast<const uint16_t*>(p + 2));
131 nbytes -= 4;
132 p += 4;
133 }
134 switch (nbytes) {
135 case 3:
136 h1 = HW_crc32_u8(h1, p[3]);
137 // fallthrough
138 case 2:
139 h2 = HW_crc32_u8(h2, p[2]);
140 // fallthrough
141 case 1:
142 h1 = HW_crc32_u8(h1, p[1]);
143 // fallthrough
144 case 0:
145 break;
146 default:
147 assert(0);
148 }
149
150 // A finalization step is recommended to mix up the result's bits
151 return (static_cast<uint64_t>(h1) << 32) + h2;
152 }
153
154 /// CrcHash() specialized for 1-byte data
155 static inline uint32_t CrcHash1(const void* v, uint32_t hash) {
156 const uint8_t* s = reinterpret_cast<const uint8_t*>(v);
157 hash = HW_crc32_u8(hash, *s);
158 hash = (hash << 16) | (hash >> 16);
159 return hash;
160 }
161
162 /// CrcHash() specialized for 2-byte data
163 static inline uint32_t CrcHash2(const void* v, uint32_t hash) {
164 const uint16_t* s = reinterpret_cast<const uint16_t*>(v);
165 hash = HW_crc32_u16(hash, *s);
166 hash = (hash << 16) | (hash >> 16);
167 return hash;
168 }
169
170 /// CrcHash() specialized for 4-byte data
171 static inline uint32_t CrcHash4(const void* v, uint32_t hash) {
172 const uint32_t* p = reinterpret_cast<const uint32_t*>(v);
173 hash = HW_crc32_u32(hash, *p);
174 hash = (hash << 16) | (hash >> 16);
175 return hash;
176 }
177
178 /// CrcHash() specialized for 8-byte data
179 static inline uint32_t CrcHash8(const void* v, uint32_t hash) {
180 const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
181 hash = HW_crc32_u64(hash, *p);
182 hash = (hash << 16) | (hash >> 16);
183 return hash;
184 }
185
186 /// CrcHash() specialized for 12-byte data
187 static inline uint32_t CrcHash12(const void* v, uint32_t hash) {
188 const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
189 hash = HW_crc32_u64(hash, *p);
190 ++p;
191 hash = HW_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(p));
192 hash = (hash << 16) | (hash >> 16);
193 return hash;
194 }
195
196 /// CrcHash() specialized for 16-byte data
197 static inline uint32_t CrcHash16(const void* v, uint32_t hash) {
198 const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
199 hash = HW_crc32_u64(hash, *p);
200 ++p;
201 hash = HW_crc32_u64(hash, *p);
202 hash = (hash << 16) | (hash >> 16);
203 return hash;
204 }
205
206 static const uint64_t MURMUR_PRIME = 0xc6a4a7935bd1e995;
207 static const int MURMUR_R = 47;
208
209 /// Murmur2 hash implementation returning 64-bit hashes.
210 static uint64_t MurmurHash2_64(const void* input, int len, uint64_t seed) {
211 uint64_t h = seed ^ (len * MURMUR_PRIME);
212
213 const uint64_t* data = reinterpret_cast<const uint64_t*>(input);
214 const uint64_t* end = data + (len / sizeof(uint64_t));
215
216 while (data != end) {
217 uint64_t k = *data++;
218 k *= MURMUR_PRIME;
219 k ^= k >> MURMUR_R;
220 k *= MURMUR_PRIME;
221 h ^= k;
222 h *= MURMUR_PRIME;
223 }
224
225 const uint8_t* data2 = reinterpret_cast<const uint8_t*>(data);
226 switch (len & 7) {
227 case 7:
228 h ^= uint64_t(data2[6]) << 48;
229 case 6:
230 h ^= uint64_t(data2[5]) << 40;
231 case 5:
232 h ^= uint64_t(data2[4]) << 32;
233 case 4:
234 h ^= uint64_t(data2[3]) << 24;
235 case 3:
236 h ^= uint64_t(data2[2]) << 16;
237 case 2:
238 h ^= uint64_t(data2[1]) << 8;
239 case 1:
240 h ^= uint64_t(data2[0]);
241 h *= MURMUR_PRIME;
242 }
243
244 h ^= h >> MURMUR_R;
245 h *= MURMUR_PRIME;
246 h ^= h >> MURMUR_R;
247 return h;
248 }
249
250 /// default values recommended by http://isthe.com/chongo/tech/comp/fnv/
251 static const uint32_t FNV_PRIME = 0x01000193; // 16777619
252 static const uint32_t FNV_SEED = 0x811C9DC5; // 2166136261
253 static const uint64_t FNV64_PRIME = 1099511628211UL;
254 static const uint64_t FNV64_SEED = 14695981039346656037UL;
255
256 /// Implementation of the Fowler-Noll-Vo hash function. This is not as performant
257 /// as boost's hash on int types (2x slower) but has bit entropy.
258 /// For ints, boost just returns the value of the int which can be pathological.
259 /// For example, if the data is <1000, 2000, 3000, 4000, ..> and then the mod of 1000
260 /// is taken on the hash, all values will collide to the same bucket.
261 /// For string values, Fnv is slightly faster than boost.
262 /// IMPORTANT: FNV hash suffers from poor diffusion of the least significant bit,
263 /// which can lead to poor results when input bytes are duplicated.
264 /// See FnvHash64to32() for how this can be mitigated.
265 static uint64_t FnvHash64(const void* data, int32_t bytes, uint64_t hash) {
266 const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
267 while (bytes--) {
268 hash = (*ptr ^ hash) * FNV64_PRIME;
269 ++ptr;
270 }
271 return hash;
272 }
273
274 /// Return a 32-bit hash computed by invoking FNV-64 and folding the result to 32-bits.
275 /// This technique is recommended instead of FNV-32 since the LSB of an FNV hash is the
276 /// XOR of the LSBs of its input bytes, leading to poor results for duplicate inputs.
277 /// The input seed 'hash' is duplicated so the top half of the seed is not all zero.
278 /// Data length must be at least 1 byte: zero-length data should be handled separately,
279 /// for example using CombineHash with a unique constant value to avoid returning the
280 /// hash argument. Zero-length data gives terrible results: the initial hash value is
281 /// xored with itself cancelling all bits.
282 static uint32_t FnvHash64to32(const void* data, int32_t bytes, uint32_t hash) {
283 // IMPALA-2270: this function should never be used for zero-byte inputs.
284 DCHECK_GT(bytes, 0);
285 uint64_t hash_u64 = hash | (static_cast<uint64_t>(hash) << 32);
286 hash_u64 = FnvHash64(data, bytes, hash_u64);
287 return static_cast<uint32_t>((hash_u64 >> 32) ^ (hash_u64 & 0xFFFFFFFF));
288 }
289
290 // Hash template
291 template <bool hw>
292 static inline int Hash(const void* data, int32_t bytes, uint32_t seed);
293
294 /// The magic number (used in hash_combine()) 0x9e3779b9 = 2^32 / (golden ratio).
295 static const uint32_t HASH_COMBINE_SEED = 0x9e3779b9;
296
297 /// Combine hashes 'value' and 'seed' to get a new hash value. Similar to
298 /// boost::hash_combine(), but for uint32_t. This function should be used with a
299 /// constant first argument to update the hash value for zero-length values such as
300 /// NULL, boolean, and empty strings.
301 static inline uint32_t HashCombine32(uint32_t value, uint32_t seed) {
302 return seed ^ (HASH_COMBINE_SEED + value + (seed << 6) + (seed >> 2));
303 }
304
305 // Get 32 more bits of randomness from a 32-bit hash:
306 static inline uint32_t Rehash32to32(const uint32_t hash) {
307 // Constants generated by uuidgen(1) with the -r flag
308 static const uint64_t m = 0x7850f11ec6d14889ull, a = 0x6773610597ca4c63ull;
309 // This is strongly universal hashing following Dietzfelbinger's "Universal hashing
310 // and k-wise independent random variables via integer arithmetic without primes". As
311 // such, for any two distinct uint32_t's hash1 and hash2, the probability (over the
312 // randomness of the constants) that any subset of bit positions of
313 // Rehash32to32(hash1) is equal to the same subset of bit positions
314 // Rehash32to32(hash2) is minimal.
315 return static_cast<uint32_t>((static_cast<uint64_t>(hash) * m + a) >> 32);
316 }
317
318 static inline uint64_t Rehash32to64(const uint32_t hash) {
319 static const uint64_t m1 = 0x47b6137a44974d91ull, m2 = 0x8824ad5ba2b7289cull,
320 a1 = 0x705495c62df1424aull, a2 = 0x9efc49475c6bfb31ull;
321 const uint64_t hash1 = (static_cast<uint64_t>(hash) * m1 + a1) >> 32;
322 const uint64_t hash2 = (static_cast<uint64_t>(hash) * m2 + a2) >> 32;
323 return hash1 | (hash2 << 32);
324 }
325};
326
327// HW Hash
328template <>
329inline int HashUtil::Hash<true>(const void* data, int32_t bytes, uint32_t seed) {
330#ifdef ARROW_HAVE_ARM_CRC
331 // Need run time check for Arm
332 // if not support, fall back to Murmur
333 if (!crc32c_runtime_check())
334 return static_cast<int>(HashUtil::MurmurHash2_64(data, bytes, seed));
335 else
336#endif
337 // Double CRC
338 return static_cast<int>(HashUtil::DoubleCrcHash(data, bytes, seed));
339}
340
341// Murmur Hash
342template <>
343inline int HashUtil::Hash<false>(const void* data, int32_t bytes, uint32_t seed) {
344 return static_cast<int>(HashUtil::MurmurHash2_64(data, bytes, seed));
345}
346
347} // namespace arrow
348
349#endif // ARROW_UTIL_HASH_UTIL_H
350