1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//-----------------------------------------------------------------------------
19// MurmurHash3 was written by Austin Appleby, and is placed in the public
20// domain. The author hereby disclaims copyright to this source code.
21
22// Note - The x86 and x64 versions do _not_ produce the same results, as the
23// algorithms are optimized for their respective platforms. You can still
24// compile and run any of them on any platform, but your performance with the
25// non-native version will be less than optimal.
26
27#include "parquet/murmur3.h"
28
29namespace parquet {
30
31#if defined(_MSC_VER)
32
33#define FORCE_INLINE __forceinline
34#define ROTL64(x, y) _rotl64(x, y)
35
36#else // defined(_MSC_VER)
37
38#define FORCE_INLINE inline __attribute__((always_inline))
39inline uint64_t rotl64(uint64_t x, int8_t r) { return (x << r) | (x >> (64 - r)); }
40#define ROTL64(x, y) rotl64(x, y)
41
42#endif // !defined(_MSC_VER)
43
44#define BIG_CONSTANT(x) (x##LLU)
45
46//-----------------------------------------------------------------------------
47// Block read - if your platform needs to do endian-swapping or can only
48// handle aligned reads, do the conversion here
49
50FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { return p[i]; }
51
52FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { return p[i]; }
53
54//-----------------------------------------------------------------------------
55// Finalization mix - force all bits of a hash block to avalanche
56
57FORCE_INLINE uint32_t fmix32(uint32_t h) {
58 h ^= h >> 16;
59 h *= 0x85ebca6b;
60 h ^= h >> 13;
61 h *= 0xc2b2ae35;
62 h ^= h >> 16;
63
64 return h;
65}
66
67//----------
68
69FORCE_INLINE uint64_t fmix64(uint64_t k) {
70 k ^= k >> 33;
71 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
72 k ^= k >> 33;
73 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
74 k ^= k >> 33;
75
76 return k;
77}
78
79//-----------------------------------------------------------------------------
80
81void Hash_x64_128(const void* key, const int len, const uint32_t seed, uint64_t out[2]) {
82 const uint8_t* data = (const uint8_t*)key;
83 const int nblocks = len / 16;
84
85 uint64_t h1 = seed;
86 uint64_t h2 = seed;
87
88 const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
89 const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
90
91 //----------
92 // body
93
94 const uint64_t* blocks = (const uint64_t*)(data);
95
96 for (int i = 0; i < nblocks; i++) {
97 uint64_t k1 = getblock64(blocks, i * 2 + 0);
98 uint64_t k2 = getblock64(blocks, i * 2 + 1);
99
100 k1 *= c1;
101 k1 = ROTL64(k1, 31);
102 k1 *= c2;
103 h1 ^= k1;
104
105 h1 = ROTL64(h1, 27);
106 h1 += h2;
107 h1 = h1 * 5 + 0x52dce729;
108
109 k2 *= c2;
110 k2 = ROTL64(k2, 33);
111 k2 *= c1;
112 h2 ^= k2;
113
114 h2 = ROTL64(h2, 31);
115 h2 += h1;
116 h2 = h2 * 5 + 0x38495ab5;
117 }
118
119 //----------
120 // tail
121
122 const uint8_t* tail = (const uint8_t*)(data + nblocks * 16);
123
124 uint64_t k1 = 0;
125 uint64_t k2 = 0;
126
127 switch (len & 15) {
128 case 15:
129 k2 ^= ((uint64_t)tail[14]) << 48;
130 case 14:
131 k2 ^= ((uint64_t)tail[13]) << 40;
132 case 13:
133 k2 ^= ((uint64_t)tail[12]) << 32;
134 case 12:
135 k2 ^= ((uint64_t)tail[11]) << 24;
136 case 11:
137 k2 ^= ((uint64_t)tail[10]) << 16;
138 case 10:
139 k2 ^= ((uint64_t)tail[9]) << 8;
140 case 9:
141 k2 ^= ((uint64_t)tail[8]) << 0;
142 k2 *= c2;
143 k2 = ROTL64(k2, 33);
144 k2 *= c1;
145 h2 ^= k2;
146
147 case 8:
148 k1 ^= ((uint64_t)tail[7]) << 56;
149 case 7:
150 k1 ^= ((uint64_t)tail[6]) << 48;
151 case 6:
152 k1 ^= ((uint64_t)tail[5]) << 40;
153 case 5:
154 k1 ^= ((uint64_t)tail[4]) << 32;
155 case 4:
156 k1 ^= ((uint64_t)tail[3]) << 24;
157 case 3:
158 k1 ^= ((uint64_t)tail[2]) << 16;
159 case 2:
160 k1 ^= ((uint64_t)tail[1]) << 8;
161 case 1:
162 k1 ^= ((uint64_t)tail[0]) << 0;
163 k1 *= c1;
164 k1 = ROTL64(k1, 31);
165 k1 *= c2;
166 h1 ^= k1;
167 }
168
169 //----------
170 // finalization
171
172 h1 ^= len;
173 h2 ^= len;
174
175 h1 += h2;
176 h2 += h1;
177
178 h1 = fmix64(h1);
179 h2 = fmix64(h2);
180
181 h1 += h2;
182 h2 += h1;
183
184 reinterpret_cast<uint64_t*>(out)[0] = h1;
185 reinterpret_cast<uint64_t*>(out)[1] = h2;
186}
187
188template <typename T>
189uint64_t HashHelper(T value, uint32_t seed) {
190 uint64_t output[2];
191 Hash_x64_128(reinterpret_cast<void*>(&value), sizeof(T), seed, output);
192 return output[0];
193}
194
195uint64_t MurmurHash3::Hash(int32_t value) const { return HashHelper(value, seed_); }
196
197uint64_t MurmurHash3::Hash(int64_t value) const { return HashHelper(value, seed_); }
198
199uint64_t MurmurHash3::Hash(float value) const { return HashHelper(value, seed_); }
200
201uint64_t MurmurHash3::Hash(double value) const { return HashHelper(value, seed_); }
202
203uint64_t MurmurHash3::Hash(const FLBA* value, uint32_t len) const {
204 uint64_t out[2];
205 Hash_x64_128(reinterpret_cast<const void*>(value->ptr), len, seed_, out);
206 return out[0];
207}
208
209uint64_t MurmurHash3::Hash(const Int96* value) const {
210 uint64_t out[2];
211 Hash_x64_128(reinterpret_cast<const void*>(value->value), sizeof(value->value), seed_,
212 out);
213 return out[0];
214}
215
216uint64_t MurmurHash3::Hash(const ByteArray* value) const {
217 uint64_t out[2];
218 Hash_x64_128(reinterpret_cast<const void*>(value->ptr), value->len, seed_, out);
219 return out[0];
220}
221
222} // namespace parquet
223