murmur3.cc source code [ClickHouse/contrib/arrow/cpp/src/parquet/murmur3.cc]

1	// Licensed to the Apache Software Foundation (ASF) under one
2	// or more contributor license agreements. See the NOTICE file
3	// distributed with this work for additional information
4	// regarding copyright ownership. The ASF licenses this file
5	// to you under the Apache License, Version 2.0 (the
6	// "License"); you may not use this file except in compliance
7	// with the License. You may obtain a copy of the License at
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing,
12	// software distributed under the License is distributed on an
13	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14	// KIND, either express or implied. See the License for the
15	// specific language governing permissions and limitations
16	// under the License.
17
18	//-----------------------------------------------------------------------------
19	// MurmurHash3 was written by Austin Appleby, and is placed in the public
20	// domain. The author hereby disclaims copyright to this source code.
21
22	// Note - The x86 and x64 versions do _not_ produce the same results, as the
23	// algorithms are optimized for their respective platforms. You can still
24	// compile and run any of them on any platform, but your performance with the
25	// non-native version will be less than optimal.
26
27	#include "parquet/murmur3.h"
28
29	namespace parquet {
30
31	#if defined(_MSC_VER)
32
33	#define FORCE_INLINE __forceinline
34	#define ROTL64(x, y) _rotl64(x, y)
35
36	#else // defined(_MSC_VER)
37
38	#define FORCE_INLINE inline __attribute__((always_inline))
39	inline uint64_t rotl64(uint64_t x, int8_t r) { return (x << r) \| (x >> (`64` - r)); }
40	#define ROTL64(x, y) rotl64(x, y)
41
42	#endif // !defined(_MSC_VER)
43
44	#define BIG_CONSTANT(x) (x##LLU)
45
46	//-----------------------------------------------------------------------------
47	// Block read - if your platform needs to do endian-swapping or can only
48	// handle aligned reads, do the conversion here
49
50	FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { return p[i]; }
51
52	FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { return p[i]; }
53
54	//-----------------------------------------------------------------------------
55	// Finalization mix - force all bits of a hash block to avalanche
56
57	FORCE_INLINE uint32_t fmix32(uint32_t h) {
58	h ^= h >> `16`;
59	h *= `0x85ebca6b`;
60	h ^= h >> `13`;
61	h *= `0xc2b2ae35`;
62	h ^= h >> `16`;
63
64	return h;
65	}
66
67	//----------
68
69	FORCE_INLINE uint64_t fmix64(uint64_t k) {
70	k ^= k >> `33`;
71	k *= BIG_CONSTANT(`0xff51afd7ed558ccd`);
72	k ^= k >> `33`;
73	k *= BIG_CONSTANT(`0xc4ceb9fe1a85ec53`);
74	k ^= k >> `33`;
75
76	return k;
77	}
78
79	//-----------------------------------------------------------------------------
80
81	void Hash_x64_128(const void* key, const int len, const uint32_t seed, uint64_t out[`2`]) {
82	const uint8_t* data = (const uint8_t*)key;
83	const int nblocks = len / `16`;
84
85	uint64_t h1 = seed;
86	uint64_t h2 = seed;
87
88	const uint64_t c1 = BIG_CONSTANT(`0x87c37b91114253d5`);
89	const uint64_t c2 = BIG_CONSTANT(`0x4cf5ad432745937f`);
90
91	//----------
92	// body
93
94	const uint64_t* blocks = (const uint64_t*)(data);
95
96	for (int i = `0`; i < nblocks; i++) {
97	uint64_t k1 = getblock64(blocks, i * `2` + `0`);
98	uint64_t k2 = getblock64(blocks, i * `2` + `1`);
99
100	k1 *= c1;
101	k1 = ROTL64(k1, `31`);
102	k1 *= c2;
103	h1 ^= k1;
104
105	h1 = ROTL64(h1, `27`);
106	h1 += h2;
107	h1 = h1 * `5` + `0x52dce729`;
108
109	k2 *= c2;
110	k2 = ROTL64(k2, `33`);
111	k2 *= c1;
112	h2 ^= k2;
113
114	h2 = ROTL64(h2, `31`);
115	h2 += h1;
116	h2 = h2 * `5` + `0x38495ab5`;
117	}
118
119	//----------
120	// tail
121
122	const uint8_t* tail = (const uint8_t)(data + nblocks `16`);
123
124	uint64_t k1 = `0`;
125	uint64_t k2 = `0`;
126
127	switch (len & `15`) {
128	case `15`:
129	k2 ^= ((uint64_t)tail[`14`]) << `48`; // fall through
130	case `14`:
131	k2 ^= ((uint64_t)tail[`13`]) << `40`; // fall through
132	case `13`:
133	k2 ^= ((uint64_t)tail[`12`]) << `32`; // fall through
134	case `12`:
135	k2 ^= ((uint64_t)tail[`11`]) << `24`; // fall through
136	case `11`:
137	k2 ^= ((uint64_t)tail[`10`]) << `16`; // fall through
138	case `10`:
139	k2 ^= ((uint64_t)tail[`9`]) << `8`; // fall through
140	case `9`:
141	k2 ^= ((uint64_t)tail[`8`]) << `0`;
142	k2 *= c2;
143	k2 = ROTL64(k2, `33`);
144	k2 *= c1;
145	h2 ^= k2; // fall through
146
147	case `8`:
148	k1 ^= ((uint64_t)tail[`7`]) << `56`; // fall through
149	case `7`:
150	k1 ^= ((uint64_t)tail[`6`]) << `48`; // fall through
151	case `6`:
152	k1 ^= ((uint64_t)tail[`5`]) << `40`; // fall through
153	case `5`:
154	k1 ^= ((uint64_t)tail[`4`]) << `32`; // fall through
155	case `4`:
156	k1 ^= ((uint64_t)tail[`3`]) << `24`; // fall through
157	case `3`:
158	k1 ^= ((uint64_t)tail[`2`]) << `16`; // fall through
159	case `2`:
160	k1 ^= ((uint64_t)tail[`1`]) << `8`; // fall through
161	case `1`:
162	k1 ^= ((uint64_t)tail[`0`]) << `0`;
163	k1 *= c1;
164	k1 = ROTL64(k1, `31`);
165	k1 *= c2;
166	h1 ^= k1;
167	}
168
169	//----------
170	// finalization
171
172	h1 ^= len;
173	h2 ^= len;
174
175	h1 += h2;
176	h2 += h1;
177
178	h1 = fmix64(h1);
179	h2 = fmix64(h2);
180
181	h1 += h2;
182	h2 += h1;
183
184	reinterpret_cast<uint64_t*>(out)[`0`] = h1;
185	reinterpret_cast<uint64_t*>(out)[`1`] = h2;
186	}
187
188	template <typename T>
189	uint64_t HashHelper(T value, uint32_t seed) {
190	uint64_t output[`2`];
191	Hash_x64_128(reinterpret_cast<void>(&value), sizeof*(T), seed, output);
192	return output[`0`];
193	}
194
195	uint64_t MurmurHash3::Hash(int32_t value) const { return HashHelper(value, seed_); }
196
197	uint64_t MurmurHash3::Hash(int64_t value) const { return HashHelper(value, seed_); }
198
199	uint64_t MurmurHash3::Hash(float value) const { return HashHelper(value, seed_); }
200
201	uint64_t MurmurHash3::Hash(double value) const { return HashHelper(value, seed_); }
202
203	uint64_t MurmurHash3::Hash(const FLBA* value, uint32_t len) const {
204	uint64_t out[`2`];
205	Hash_x64_128(reinterpret_cast<const void*>(value->ptr), len, seed_, out);
206	return out[`0`];
207	}
208
209	uint64_t MurmurHash3::Hash(const Int96* value) const {
210	uint64_t out[`2`];
211	Hash_x64_128(reinterpret_cast<const void>(value->value), sizeof*(value->value), seed_,
212	out);
213	return out[`0`];
214	}
215
216	uint64_t MurmurHash3::Hash(const ByteArray* value) const {
217	uint64_t out[`2`];
218	Hash_x64_128(reinterpret_cast<const void*>(value->ptr), value->len, seed_, out);
219	return out[`0`];
220	}
221
222	} // namespace parquet
223

Browse the source code of ClickHouse/contrib/arrow/cpp/src/parquet/murmur3.cc