1 | #pragma once |
2 | |
3 | #include <cstdint> |
4 | |
5 | #if defined(__SSE2__) |
6 | #include <emmintrin.h> |
7 | #endif |
8 | #if defined(__SSE4_2__) |
9 | #include <nmmintrin.h> |
10 | #endif |
11 | |
12 | |
13 | /** find_first_symbols<c1, c2, ...>(begin, end): |
14 | * |
15 | * Allow to search for next character from the set of 'symbols...' in a string. |
16 | * It is similar to 'strpbrk', 'strcspn' (and 'strchr', 'memchr' in the case of one symbol and '\0'), |
17 | * but with the following differencies: |
18 | * - works with any memory ranges, including containing zero bytes; |
19 | * - doesn't require terminating zero byte: end of memory range is passed explicitly; |
20 | * - if not found, returns pointer to end instead of nullptr; |
21 | * - maximum number of symbols to search is 16. |
22 | * |
23 | * Uses SSE 2 in case of small number of symbols for search and SSE 4.2 in the case of large number of symbols, |
24 | * that have more than 2x performance advantage over trivial loop |
25 | * in the case of parsing tab-separated dump with (probably escaped) string fields. |
26 | * In the case of parsing tab separated dump with short strings, there is no performance degradation over trivial loop. |
27 | * |
28 | * Note: the optimal threshold to choose between SSE 2 and SSE 4.2 may depend on CPU model. |
29 | * |
30 | * find_last_symbols_or_null<c1, c2, ...>(begin, end): |
31 | * |
32 | * Allow to search for the last matching character in a string. |
33 | * If no such characters, returns nullptr. |
34 | */ |
35 | |
36 | namespace detail |
37 | { |
38 | |
39 | template <char s0> |
40 | inline bool is_in(char x) |
41 | { |
42 | return x == s0; |
43 | } |
44 | |
45 | template <char s0, char s1, char... tail> |
46 | inline bool is_in(char x) |
47 | { |
48 | return x == s0 || is_in<s1, tail...>(x); |
49 | } |
50 | |
51 | #if defined(__SSE2__) |
52 | template <char s0> |
53 | inline __m128i mm_is_in(__m128i bytes) |
54 | { |
55 | __m128i eq0 = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(s0)); |
56 | return eq0; |
57 | } |
58 | |
59 | template <char s0, char s1, char... tail> |
60 | inline __m128i mm_is_in(__m128i bytes) |
61 | { |
62 | __m128i eq0 = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(s0)); |
63 | __m128i eq = mm_is_in<s1, tail...>(bytes); |
64 | return _mm_or_si128(eq0, eq); |
65 | } |
66 | #endif |
67 | |
68 | template <bool positive> |
69 | bool maybe_negate(bool x) |
70 | { |
71 | if constexpr (positive) |
72 | return x; |
73 | else |
74 | return !x; |
75 | } |
76 | |
77 | template <bool positive> |
78 | uint16_t maybe_negate(uint16_t x) |
79 | { |
80 | if constexpr (positive) |
81 | return x; |
82 | else |
83 | return ~x; |
84 | } |
85 | |
86 | enum class ReturnMode |
87 | { |
88 | End, |
89 | Nullptr, |
90 | }; |
91 | |
92 | |
93 | template <bool positive, ReturnMode return_mode, char... symbols> |
94 | inline const char * find_first_symbols_sse2(const char * const begin, const char * const end) |
95 | { |
96 | const char * pos = begin; |
97 | |
98 | #if defined(__SSE2__) |
99 | for (; pos + 15 < end; pos += 16) |
100 | { |
101 | __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)); |
102 | |
103 | __m128i eq = mm_is_in<symbols...>(bytes); |
104 | |
105 | uint16_t bit_mask = maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq))); |
106 | if (bit_mask) |
107 | return pos + __builtin_ctz(bit_mask); |
108 | } |
109 | #endif |
110 | |
111 | for (; pos < end; ++pos) |
112 | if (maybe_negate<positive>(is_in<symbols...>(*pos))) |
113 | return pos; |
114 | |
115 | return return_mode == ReturnMode::End ? end : nullptr; |
116 | } |
117 | |
118 | |
119 | template <bool positive, ReturnMode return_mode, char... symbols> |
120 | inline const char * find_last_symbols_sse2(const char * const begin, const char * const end) |
121 | { |
122 | const char * pos = end; |
123 | |
124 | #if defined(__SSE2__) |
125 | for (; pos - 16 >= begin; pos -= 16) /// Assuming the pointer cannot overflow. Assuming we can compare these pointers. |
126 | { |
127 | __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos - 16)); |
128 | |
129 | __m128i eq = mm_is_in<symbols...>(bytes); |
130 | |
131 | uint16_t bit_mask = maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq))); |
132 | if (bit_mask) |
133 | return pos - 1 - (__builtin_clz(bit_mask) - 16); /// because __builtin_clz works with mask as uint32. |
134 | } |
135 | #endif |
136 | |
137 | --pos; |
138 | for (; pos >= begin; --pos) |
139 | if (maybe_negate<positive>(is_in<symbols...>(*pos))) |
140 | return pos; |
141 | |
142 | return return_mode == ReturnMode::End ? end : nullptr; |
143 | } |
144 | |
145 | |
146 | template <bool positive, ReturnMode return_mode, size_t num_chars, |
147 | char c01, char c02 = 0, char c03 = 0, char c04 = 0, |
148 | char c05 = 0, char c06 = 0, char c07 = 0, char c08 = 0, |
149 | char c09 = 0, char c10 = 0, char c11 = 0, char c12 = 0, |
150 | char c13 = 0, char c14 = 0, char c15 = 0, char c16 = 0> |
151 | inline const char * find_first_symbols_sse42_impl(const char * const begin, const char * const end) |
152 | { |
153 | const char * pos = begin; |
154 | |
155 | #if defined(__SSE4_2__) |
156 | #define MODE (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT) |
157 | __m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16); |
158 | |
159 | for (; pos + 15 < end; pos += 16) |
160 | { |
161 | __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)); |
162 | |
163 | if constexpr (positive) |
164 | { |
165 | if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE)) |
166 | return pos + _mm_cmpestri(set, num_chars, bytes, 16, MODE); |
167 | } |
168 | else |
169 | { |
170 | if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE | _SIDD_NEGATIVE_POLARITY)) |
171 | return pos + _mm_cmpestri(set, num_chars, bytes, 16, MODE | _SIDD_NEGATIVE_POLARITY); |
172 | } |
173 | } |
174 | #undef MODE |
175 | #endif |
176 | |
177 | for (; pos < end; ++pos) |
178 | if ( (num_chars >= 1 && maybe_negate<positive>(*pos == c01)) |
179 | || (num_chars >= 2 && maybe_negate<positive>(*pos == c02)) |
180 | || (num_chars >= 3 && maybe_negate<positive>(*pos == c03)) |
181 | || (num_chars >= 4 && maybe_negate<positive>(*pos == c04)) |
182 | || (num_chars >= 5 && maybe_negate<positive>(*pos == c05)) |
183 | || (num_chars >= 6 && maybe_negate<positive>(*pos == c06)) |
184 | || (num_chars >= 7 && maybe_negate<positive>(*pos == c07)) |
185 | || (num_chars >= 8 && maybe_negate<positive>(*pos == c08)) |
186 | || (num_chars >= 9 && maybe_negate<positive>(*pos == c09)) |
187 | || (num_chars >= 10 && maybe_negate<positive>(*pos == c10)) |
188 | || (num_chars >= 11 && maybe_negate<positive>(*pos == c11)) |
189 | || (num_chars >= 12 && maybe_negate<positive>(*pos == c12)) |
190 | || (num_chars >= 13 && maybe_negate<positive>(*pos == c13)) |
191 | || (num_chars >= 14 && maybe_negate<positive>(*pos == c14)) |
192 | || (num_chars >= 15 && maybe_negate<positive>(*pos == c15)) |
193 | || (num_chars >= 16 && maybe_negate<positive>(*pos == c16))) |
194 | return pos; |
195 | return return_mode == ReturnMode::End ? end : nullptr; |
196 | } |
197 | |
198 | |
199 | template <bool positive, ReturnMode return_mode, char... symbols> |
200 | inline const char * find_first_symbols_sse42(const char * begin, const char * end) |
201 | { |
202 | return find_first_symbols_sse42_impl<positive, return_mode, sizeof...(symbols), symbols...>(begin, end); |
203 | } |
204 | |
205 | /// NOTE No SSE 4.2 implementation for find_last_symbols_or_null. Not worth to do. |
206 | |
207 | template <bool positive, ReturnMode return_mode, char... symbols> |
208 | inline const char * find_first_symbols_dispatch(const char * begin, const char * end) |
209 | { |
210 | #if defined(__SSE4_2__) |
211 | if (sizeof...(symbols) >= 5) |
212 | return find_first_symbols_sse42<positive, return_mode, symbols...>(begin, end); |
213 | else |
214 | #endif |
215 | return find_first_symbols_sse2<positive, return_mode, symbols...>(begin, end); |
216 | } |
217 | |
218 | } |
219 | |
220 | |
221 | template <char... symbols> |
222 | inline const char * find_first_symbols(const char * begin, const char * end) |
223 | { |
224 | return detail::find_first_symbols_dispatch<true, detail::ReturnMode::End, symbols...>(begin, end); |
225 | } |
226 | |
227 | /// Returning non const result for non const arguments. |
228 | /// It is convenient when you are using this function to iterate through non-const buffer. |
229 | template <char... symbols> |
230 | inline char * find_first_symbols(char * begin, char * end) |
231 | { |
232 | return const_cast<char *>(detail::find_first_symbols_dispatch<true, detail::ReturnMode::End, symbols...>(begin, end)); |
233 | } |
234 | |
235 | template <char... symbols> |
236 | inline const char * find_first_not_symbols(const char * begin, const char * end) |
237 | { |
238 | return detail::find_first_symbols_dispatch<false, detail::ReturnMode::End, symbols...>(begin, end); |
239 | } |
240 | |
241 | template <char... symbols> |
242 | inline char * find_first_not_symbols(char * begin, char * end) |
243 | { |
244 | return const_cast<char *>(detail::find_first_symbols_dispatch<false, detail::ReturnMode::End, symbols...>(begin, end)); |
245 | } |
246 | |
247 | template <char... symbols> |
248 | inline const char * find_first_symbols_or_null(const char * begin, const char * end) |
249 | { |
250 | return detail::find_first_symbols_dispatch<true, detail::ReturnMode::Nullptr, symbols...>(begin, end); |
251 | } |
252 | |
253 | template <char... symbols> |
254 | inline char * find_first_symbols_or_null(char * begin, char * end) |
255 | { |
256 | return const_cast<char *>(detail::find_first_symbols_dispatch<true, detail::ReturnMode::Nullptr, symbols...>(begin, end)); |
257 | } |
258 | |
259 | template <char... symbols> |
260 | inline const char * find_first_not_symbols_or_null(const char * begin, const char * end) |
261 | { |
262 | return detail::find_first_symbols_dispatch<false, detail::ReturnMode::Nullptr, symbols...>(begin, end); |
263 | } |
264 | |
265 | template <char... symbols> |
266 | inline char * find_first_not_symbols_or_null(char * begin, char * end) |
267 | { |
268 | return const_cast<char *>(detail::find_first_symbols_dispatch<false, detail::ReturnMode::Nullptr, symbols...>(begin, end)); |
269 | } |
270 | |
271 | |
272 | template <char... symbols> |
273 | inline const char * find_last_symbols_or_null(const char * begin, const char * end) |
274 | { |
275 | return detail::find_last_symbols_sse2<true, detail::ReturnMode::Nullptr, symbols...>(begin, end); |
276 | } |
277 | |
278 | template <char... symbols> |
279 | inline char * find_last_symbols_or_null(char * begin, char * end) |
280 | { |
281 | return const_cast<char *>(detail::find_last_symbols_sse2<true, detail::ReturnMode::Nullptr, symbols...>(begin, end)); |
282 | } |
283 | |
284 | template <char... symbols> |
285 | inline const char * find_last_not_symbols_or_null(const char * begin, const char * end) |
286 | { |
287 | return detail::find_last_symbols_sse2<false, detail::ReturnMode::Nullptr, symbols...>(begin, end); |
288 | } |
289 | |
290 | template <char... symbols> |
291 | inline char * find_last_not_symbols_or_null(char * begin, char * end) |
292 | { |
293 | return const_cast<char *>(detail::find_last_symbols_sse2<false, detail::ReturnMode::Nullptr, symbols...>(begin, end)); |
294 | } |
295 | |