1 | #pragma once |
2 | |
3 | #include <string> |
4 | #include <vector> |
5 | #include <functional> |
6 | #include <ostream> |
7 | |
8 | #include <common/Types.h> |
9 | #include <common/unaligned.h> |
10 | |
11 | #include <city.h> |
12 | |
13 | #if defined(__SSE2__) |
14 | #include <emmintrin.h> |
15 | #endif |
16 | |
17 | #if defined(__SSE4_2__) |
18 | #include <smmintrin.h> |
19 | #include <nmmintrin.h> |
20 | #endif |
21 | |
22 | |
23 | /// The thing to avoid creating strings to find substrings in the hash table. |
24 | struct StringRef |
25 | { |
26 | const char * data = nullptr; |
27 | size_t size = 0; |
28 | |
29 | StringRef(const char * data_, size_t size_) : data(data_), size(size_) {} |
30 | StringRef(const unsigned char * data_, size_t size_) : data(reinterpret_cast<const char *>(data_)), size(size_) {} |
31 | StringRef(const std::string & s) : data(s.data()), size(s.size()) {} |
32 | StringRef() = default; |
33 | |
34 | std::string toString() const { return std::string(data, size); } |
35 | |
36 | explicit operator std::string() const { return toString(); } |
37 | }; |
38 | |
39 | using StringRefs = std::vector<StringRef>; |
40 | |
41 | |
42 | #if defined(__SSE2__) |
43 | |
44 | /** Compare strings for equality. |
45 | * The approach is controversial and does not win in all cases. |
46 | * For more information, see hash_map_string_2.cpp |
47 | */ |
48 | |
49 | inline bool compareSSE2(const char * p1, const char * p2) |
50 | { |
51 | return 0xFFFF == _mm_movemask_epi8(_mm_cmpeq_epi8( |
52 | _mm_loadu_si128(reinterpret_cast<const __m128i *>(p1)), |
53 | _mm_loadu_si128(reinterpret_cast<const __m128i *>(p2)))); |
54 | } |
55 | |
56 | inline bool compareSSE2x4(const char * p1, const char * p2) |
57 | { |
58 | return 0xFFFF == _mm_movemask_epi8( |
59 | _mm_and_si128( |
60 | _mm_and_si128( |
61 | _mm_cmpeq_epi8( |
62 | _mm_loadu_si128(reinterpret_cast<const __m128i *>(p1)), |
63 | _mm_loadu_si128(reinterpret_cast<const __m128i *>(p2))), |
64 | _mm_cmpeq_epi8( |
65 | _mm_loadu_si128(reinterpret_cast<const __m128i *>(p1) + 1), |
66 | _mm_loadu_si128(reinterpret_cast<const __m128i *>(p2) + 1))), |
67 | _mm_and_si128( |
68 | _mm_cmpeq_epi8( |
69 | _mm_loadu_si128(reinterpret_cast<const __m128i *>(p1) + 2), |
70 | _mm_loadu_si128(reinterpret_cast<const __m128i *>(p2) + 2)), |
71 | _mm_cmpeq_epi8( |
72 | _mm_loadu_si128(reinterpret_cast<const __m128i *>(p1) + 3), |
73 | _mm_loadu_si128(reinterpret_cast<const __m128i *>(p2) + 3))))); |
74 | } |
75 | |
76 | inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) |
77 | { |
78 | while (size >= 64) |
79 | { |
80 | if (compareSSE2x4(p1, p2)) |
81 | { |
82 | p1 += 64; |
83 | p2 += 64; |
84 | size -= 64; |
85 | } |
86 | else |
87 | return false; |
88 | } |
89 | |
90 | switch ((size % 64) / 16) |
91 | { |
92 | case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]]; |
93 | case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]]; |
94 | case 1: if (!compareSSE2(p1 , p2 )) return false; [[fallthrough]]; |
95 | case 0: break; |
96 | } |
97 | |
98 | p1 += (size % 64) / 16 * 16; |
99 | p2 += (size % 64) / 16 * 16; |
100 | |
101 | switch (size % 16) |
102 | { |
103 | case 15: if (p1[14] != p2[14]) return false; [[fallthrough]]; |
104 | case 14: if (p1[13] != p2[13]) return false; [[fallthrough]]; |
105 | case 13: if (p1[12] != p2[12]) return false; [[fallthrough]]; |
106 | case 12: if (unalignedLoad<uint32_t>(p1 + 8) == unalignedLoad<uint32_t>(p2 + 8)) goto l8; else return false; |
107 | case 11: if (p1[10] != p2[10]) return false; [[fallthrough]]; |
108 | case 10: if (p1[9] != p2[9]) return false; [[fallthrough]]; |
109 | case 9: if (p1[8] != p2[8]) return false; |
110 | l8: [[fallthrough]]; |
111 | case 8: return unalignedLoad<uint64_t>(p1) == unalignedLoad<uint64_t>(p2); |
112 | case 7: if (p1[6] != p2[6]) return false; [[fallthrough]]; |
113 | case 6: if (p1[5] != p2[5]) return false; [[fallthrough]]; |
114 | case 5: if (p1[4] != p2[4]) return false; [[fallthrough]]; |
115 | case 4: return unalignedLoad<uint32_t>(p1) == unalignedLoad<uint32_t>(p2); |
116 | case 3: if (p1[2] != p2[2]) return false; [[fallthrough]]; |
117 | case 2: return unalignedLoad<uint16_t>(p1) == unalignedLoad<uint16_t>(p2); |
118 | case 1: if (p1[0] != p2[0]) return false; [[fallthrough]]; |
119 | case 0: break; |
120 | } |
121 | |
122 | return true; |
123 | } |
124 | |
125 | #endif |
126 | |
127 | |
128 | inline bool operator== (StringRef lhs, StringRef rhs) |
129 | { |
130 | if (lhs.size != rhs.size) |
131 | return false; |
132 | |
133 | if (lhs.size == 0) |
134 | return true; |
135 | |
136 | #if defined(__SSE2__) |
137 | return memequalSSE2Wide(lhs.data, rhs.data, lhs.size); |
138 | #else |
139 | return 0 == memcmp(lhs.data, rhs.data, lhs.size); |
140 | #endif |
141 | } |
142 | |
143 | inline bool operator!= (StringRef lhs, StringRef rhs) |
144 | { |
145 | return !(lhs == rhs); |
146 | } |
147 | |
148 | inline bool operator< (StringRef lhs, StringRef rhs) |
149 | { |
150 | int cmp = memcmp(lhs.data, rhs.data, std::min(lhs.size, rhs.size)); |
151 | return cmp < 0 || (cmp == 0 && lhs.size < rhs.size); |
152 | } |
153 | |
154 | inline bool operator> (StringRef lhs, StringRef rhs) |
155 | { |
156 | int cmp = memcmp(lhs.data, rhs.data, std::min(lhs.size, rhs.size)); |
157 | return cmp > 0 || (cmp == 0 && lhs.size > rhs.size); |
158 | } |
159 | |
160 | |
161 | /** Hash functions. |
162 | * You can use either CityHash64, |
163 | * or a function based on the crc32 statement, |
164 | * which is obviously less qualitative, but on real data sets, |
165 | * when used in a hash table, works much faster. |
166 | * For more information, see hash_map_string_3.cpp |
167 | */ |
168 | |
169 | struct StringRefHash64 |
170 | { |
171 | size_t operator() (StringRef x) const |
172 | { |
173 | return CityHash_v1_0_2::CityHash64(x.data, x.size); |
174 | } |
175 | }; |
176 | |
177 | #if defined(__SSE4_2__) |
178 | |
179 | /// Parts are taken from CityHash. |
180 | |
181 | inline UInt64 hashLen16(UInt64 u, UInt64 v) |
182 | { |
183 | return CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(u, v)); |
184 | } |
185 | |
186 | inline UInt64 shiftMix(UInt64 val) |
187 | { |
188 | return val ^ (val >> 47); |
189 | } |
190 | |
191 | inline UInt64 rotateByAtLeast1(UInt64 val, int shift) |
192 | { |
193 | return (val >> shift) | (val << (64 - shift)); |
194 | } |
195 | |
196 | inline size_t hashLessThan8(const char * data, size_t size) |
197 | { |
198 | static constexpr UInt64 k2 = 0x9ae16a3b2f90404fULL; |
199 | static constexpr UInt64 k3 = 0xc949d7c7509e6557ULL; |
200 | |
201 | if (size >= 4) |
202 | { |
203 | UInt64 a = unalignedLoad<uint32_t>(data); |
204 | return hashLen16(size + (a << 3), unalignedLoad<uint32_t>(data + size - 4)); |
205 | } |
206 | |
207 | if (size > 0) |
208 | { |
209 | uint8_t a = data[0]; |
210 | uint8_t b = data[size >> 1]; |
211 | uint8_t c = data[size - 1]; |
212 | uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8); |
213 | uint32_t z = size + (static_cast<uint32_t>(c) << 2); |
214 | return shiftMix(y * k2 ^ z * k3) * k2; |
215 | } |
216 | |
217 | return k2; |
218 | } |
219 | |
220 | inline size_t hashLessThan16(const char * data, size_t size) |
221 | { |
222 | if (size > 8) |
223 | { |
224 | UInt64 a = unalignedLoad<UInt64>(data); |
225 | UInt64 b = unalignedLoad<UInt64>(data + size - 8); |
226 | return hashLen16(a, rotateByAtLeast1(b + size, size)) ^ b; |
227 | } |
228 | |
229 | return hashLessThan8(data, size); |
230 | } |
231 | |
232 | struct CRC32Hash |
233 | { |
234 | size_t operator() (StringRef x) const |
235 | { |
236 | const char * pos = x.data; |
237 | size_t size = x.size; |
238 | |
239 | if (size == 0) |
240 | return 0; |
241 | |
242 | if (size < 8) |
243 | { |
244 | return hashLessThan8(x.data, x.size); |
245 | } |
246 | |
247 | const char * end = pos + size; |
248 | size_t res = -1ULL; |
249 | |
250 | do |
251 | { |
252 | UInt64 word = unalignedLoad<UInt64>(pos); |
253 | res = _mm_crc32_u64(res, word); |
254 | |
255 | pos += 8; |
256 | } while (pos + 8 < end); |
257 | |
258 | UInt64 word = unalignedLoad<UInt64>(end - 8); /// I'm not sure if this is normal. |
259 | res = _mm_crc32_u64(res, word); |
260 | |
261 | return res; |
262 | } |
263 | }; |
264 | |
265 | struct StringRefHash : CRC32Hash {}; |
266 | |
267 | #else |
268 | |
269 | struct CRC32Hash |
270 | { |
271 | size_t operator() (StringRef /* x */) const |
272 | { |
273 | throw std::logic_error{"Not implemented CRC32Hash without SSE" }; |
274 | } |
275 | }; |
276 | |
277 | struct StringRefHash : StringRefHash64 {}; |
278 | |
279 | #endif |
280 | |
281 | |
282 | namespace std |
283 | { |
284 | template <> |
285 | struct hash<StringRef> : public StringRefHash {}; |
286 | } |
287 | |
288 | |
289 | namespace ZeroTraits |
290 | { |
291 | inline bool check(const StringRef & x) { return 0 == x.size; } |
292 | inline void set(StringRef & x) { x.size = 0; } |
293 | } |
294 | |
295 | |
296 | inline bool operator==(StringRef lhs, const char * rhs) |
297 | { |
298 | for (size_t pos = 0; pos < lhs.size; ++pos) |
299 | if (!rhs[pos] || lhs.data[pos] != rhs[pos]) |
300 | return false; |
301 | |
302 | return true; |
303 | } |
304 | |
305 | inline std::ostream & operator<<(std::ostream & os, const StringRef & str) |
306 | { |
307 | if (str.data) |
308 | os.write(str.data, str.size); |
309 | |
310 | return os; |
311 | } |
312 | |