SipHash.h source code [ClickHouse/dbms/src/Common/SipHash.h]

1	#pragma once
2
3	/* SipHash is a fast cryptographic hash function for short strings.*
4	* Taken from here: https://www.131002.net/siphash/
5	*
6	* This is SipHash 2-4 variant.
7	*
8	* Two changes are made:
9	* - returns also 128 bits, not only 64;
10	* - done streaming (can be calculated in parts).
11	*
12	* On short strings (URL, search phrases) more than 3 times faster than MD5 from OpenSSL.
13	* (~ 700 MB/sec, 15 million strings per second)
14	*/
15
16	#include <common/Types.h>
17	#include <common/unaligned.h>
18	#include <string>
19	#include <type_traits>
20	#include <Core/Defines.h>
21
22	#define ROTL(x, b) static_cast<UInt64>(((x) << (b)) \| ((x) >> (64 - (b))))
23
24	#define SIPROUND \
25	do \
26	{ \
27	v0 += v1; v1 = ROTL(v1, 13); v1 ^= v0; v0 = ROTL(v0, 32); \
28	v2 += v3; v3 = ROTL(v3, 16); v3 ^= v2; \
29	v0 += v3; v3 = ROTL(v3, 21); v3 ^= v0; \
30	v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \
31	} while(0)
32
33
34	class SipHash
35	{
36	private:
37	/// State.
38	UInt64 v0;
39	UInt64 v1;
40	UInt64 v2;
41	UInt64 v3;
42
43	/// How many bytes have been processed.
44	UInt64 cnt;
45
46	/// The current 8 bytes of input data.
47	union
48	{
49	UInt64 current_word;
50	UInt8 current_bytes[`8`];
51	};
52
53	ALWAYS_INLINE void finalize()
54	{
55	/// In the last free byte, we write the remainder of the division by 256.
56	current_bytes[`7`] = cnt;
57
58	v3 ^= current_word;
59	SIPROUND;
60	SIPROUND;
61	v0 ^= current_word;
62
63	v2 ^= `0xff`;
64	SIPROUND;
65	SIPROUND;
66	SIPROUND;
67	SIPROUND;
68	}
69
70	public:
71	/// Arguments - seed.
72	SipHash(UInt64 k0 = `0`, UInt64 k1 = `0`)
73	{
74	/// Initialize the state with some random bytes and seed.
75	v0 = `0x736f6d6570736575ULL` ^ k0;
76	v1 = `0x646f72616e646f6dULL` ^ k1;
77	v2 = `0x6c7967656e657261ULL` ^ k0;
78	v3 = `0x7465646279746573ULL` ^ k1;
79
80	cnt = `0`;
81	current_word = `0`;
82	}
83
84	void update(const char * data, UInt64 size)
85	{
86	const char * end = data + size;
87
88	/// We'll finish to process the remainder of the previous update, if any.
89	if (cnt & `7`)
90	{
91	while (cnt & `7` && data < end)
92	{
93	current_bytes[cnt & `7`] = *data;
94	++data;
95	++cnt;
96	}
97
98	/// If we still do not have enough bytes to an 8-byte word.
99	if (cnt & `7`)
100	return;
101
102	v3 ^= current_word;
103	SIPROUND;
104	SIPROUND;
105	v0 ^= current_word;
106	}
107
108	cnt += end - data;
109
110	while (data + `8` <= end)
111	{
112	current_word = unalignedLoad<UInt64>(data);
113
114	v3 ^= current_word;
115	SIPROUND;
116	SIPROUND;
117	v0 ^= current_word;
118
119	data += `8`;
120	}
121
122	/// Pad the remainder, which is missing up to an 8-byte word.
123	current_word = `0`;
124	switch (end - data)
125	{
126	case `7`: current_bytes[`6`] = data[`6`]; [[fallthrough]];
127	case `6`: current_bytes[`5`] = data[`5`]; [[fallthrough]];
128	case `5`: current_bytes[`4`] = data[`4`]; [[fallthrough]];
129	case `4`: current_bytes[`3`] = data[`3`]; [[fallthrough]];
130	case `3`: current_bytes[`2`] = data[`2`]; [[fallthrough]];
131	case `2`: current_bytes[`1`] = data[`1`]; [[fallthrough]];
132	case `1`: current_bytes[`0`] = data[`0`]; [[fallthrough]];
133	case `0`: break;
134	}
135	}
136
137	/// NOTE: std::has_unique_object_representations is only available since clang 6. As of Mar 2017 we still use clang 5 sometimes.
138	template <typename T>
139	std::enable_if_t<std::/has_unique_object_representations_v/is_standard_layout_v<T>, void> update(const T & x)
140	{
141	update(reinterpret_cast<const char >(&x), sizeof*(x));
142	}
143
144	void update(const std::string & x)
145	{
146	update(x.data(), x.length());
147	}
148
149	/// Get the result in some form. This can only be done once!
150
151	void get128(char * out)
152	{
153	finalize();
154	reinterpret_cast<UInt64 *>(out)[`0`] = v0 ^ v1;
155	reinterpret_cast<UInt64 *>(out)[`1`] = v2 ^ v3;
156	}
157
158	/// template for avoiding 'unsigned long long' vs 'unsigned long' problem on old poco in macos
159	template <typename T>
160	ALWAYS_INLINE void get128(T & lo, T & hi)
161	{
162	static_assert(sizeof(T) == `8`);
163	finalize();
164	lo = v0 ^ v1;
165	hi = v2 ^ v3;
166	}
167
168	UInt64 get64()
169	{
170	finalize();
171	return v0 ^ v1 ^ v2 ^ v3;
172	}
173	};
174
175
176	#undef ROTL
177	#undef SIPROUND
178
179	#include <cstddef>
180
181	inline void sipHash128(const char * data, const size_t size, char * out)
182	{
183	SipHash hash;
184	hash.update(data, size);
185	hash.get128(out);
186	}
187
188	inline UInt64 sipHash64(const char * data, const size_t size)
189	{
190	SipHash hash;
191	hash.update(data, size);
192	return hash.get64();
193	}
194
195	template <typename T>
196	std::enable_if_t<std::/has_unique_object_representations_v/is_standard_layout_v<T>, UInt64> sipHash64(const T & x)
197	{
198	SipHash hash;
199	hash.update(x);
200	return hash.get64();
201	}
202
203	inline UInt64 sipHash64(const std::string & s)
204	{
205	return sipHash64(s.data(), s.size());
206	}
207

Browse the source code of ClickHouse/dbms/src/Common/SipHash.h