| 1 | #pragma once | 
|---|
| 2 |  | 
|---|
| 3 | #include <cstdint> | 
|---|
| 4 |  | 
|---|
| 5 | #if defined(__SSE2__) | 
|---|
| 6 | #include <emmintrin.h> | 
|---|
| 7 | #endif | 
|---|
| 8 | #if defined(__SSE4_2__) | 
|---|
| 9 | #include <nmmintrin.h> | 
|---|
| 10 | #endif | 
|---|
| 11 |  | 
|---|
| 12 |  | 
|---|
| 13 | /** find_first_symbols<c1, c2, ...>(begin, end): | 
|---|
| 14 | * | 
|---|
| 15 | * Allow to search for next character from the set of 'symbols...' in a string. | 
|---|
| 16 | * It is similar to 'strpbrk', 'strcspn' (and 'strchr', 'memchr' in the case of one symbol and '\0'), | 
|---|
| 17 | * but with the following differencies: | 
|---|
| 18 | * - works with any memory ranges, including containing zero bytes; | 
|---|
| 19 | * - doesn't require terminating zero byte: end of memory range is passed explicitly; | 
|---|
| 20 | * - if not found, returns pointer to end instead of nullptr; | 
|---|
| 21 | * - maximum number of symbols to search is 16. | 
|---|
| 22 | * | 
|---|
| 23 | * Uses SSE 2 in case of small number of symbols for search and SSE 4.2 in the case of large number of symbols, | 
|---|
| 24 | *  that have more than 2x performance advantage over trivial loop | 
|---|
| 25 | *  in the case of parsing tab-separated dump with (probably escaped) string fields. | 
|---|
| 26 | * In the case of parsing tab separated dump with short strings, there is no performance degradation over trivial loop. | 
|---|
| 27 | * | 
|---|
| 28 | * Note: the optimal threshold to choose between SSE 2 and SSE 4.2 may depend on CPU model. | 
|---|
| 29 | * | 
|---|
| 30 | * find_last_symbols_or_null<c1, c2, ...>(begin, end): | 
|---|
| 31 | * | 
|---|
| 32 | * Allow to search for the last matching character in a string. | 
|---|
| 33 | * If no such characters, returns nullptr. | 
|---|
| 34 | */ | 
|---|
| 35 |  | 
|---|
| 36 | namespace detail | 
|---|
| 37 | { | 
|---|
| 38 |  | 
|---|
| 39 | template <char s0> | 
|---|
| 40 | inline bool is_in(char x) | 
|---|
| 41 | { | 
|---|
| 42 | return x == s0; | 
|---|
| 43 | } | 
|---|
| 44 |  | 
|---|
| 45 | template <char s0, char s1, char... tail> | 
|---|
| 46 | inline bool is_in(char x) | 
|---|
| 47 | { | 
|---|
| 48 | return x == s0 || is_in<s1, tail...>(x); | 
|---|
| 49 | } | 
|---|
| 50 |  | 
|---|
| 51 | #if defined(__SSE2__) | 
|---|
| 52 | template <char s0> | 
|---|
| 53 | inline __m128i mm_is_in(__m128i bytes) | 
|---|
| 54 | { | 
|---|
| 55 | __m128i eq0 = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(s0)); | 
|---|
| 56 | return eq0; | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 | template <char s0, char s1, char... tail> | 
|---|
| 60 | inline __m128i mm_is_in(__m128i bytes) | 
|---|
| 61 | { | 
|---|
| 62 | __m128i eq0 = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(s0)); | 
|---|
| 63 | __m128i eq = mm_is_in<s1, tail...>(bytes); | 
|---|
| 64 | return _mm_or_si128(eq0, eq); | 
|---|
| 65 | } | 
|---|
| 66 | #endif | 
|---|
| 67 |  | 
|---|
| 68 | template <bool positive> | 
|---|
| 69 | bool maybe_negate(bool x) | 
|---|
| 70 | { | 
|---|
| 71 | if constexpr (positive) | 
|---|
| 72 | return x; | 
|---|
| 73 | else | 
|---|
| 74 | return !x; | 
|---|
| 75 | } | 
|---|
| 76 |  | 
|---|
| 77 | template <bool positive> | 
|---|
| 78 | uint16_t maybe_negate(uint16_t x) | 
|---|
| 79 | { | 
|---|
| 80 | if constexpr (positive) | 
|---|
| 81 | return x; | 
|---|
| 82 | else | 
|---|
| 83 | return ~x; | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|
| 86 | enum class ReturnMode | 
|---|
| 87 | { | 
|---|
| 88 | End, | 
|---|
| 89 | Nullptr, | 
|---|
| 90 | }; | 
|---|
| 91 |  | 
|---|
| 92 |  | 
|---|
| 93 | template <bool positive, ReturnMode return_mode, char... symbols> | 
|---|
| 94 | inline const char * find_first_symbols_sse2(const char * const begin, const char * const end) | 
|---|
| 95 | { | 
|---|
| 96 | const char * pos = begin; | 
|---|
| 97 |  | 
|---|
| 98 | #if defined(__SSE2__) | 
|---|
| 99 | for (; pos + 15 < end; pos += 16) | 
|---|
| 100 | { | 
|---|
| 101 | __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)); | 
|---|
| 102 |  | 
|---|
| 103 | __m128i eq = mm_is_in<symbols...>(bytes); | 
|---|
| 104 |  | 
|---|
| 105 | uint16_t bit_mask = maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq))); | 
|---|
| 106 | if (bit_mask) | 
|---|
| 107 | return pos + __builtin_ctz(bit_mask); | 
|---|
| 108 | } | 
|---|
| 109 | #endif | 
|---|
| 110 |  | 
|---|
| 111 | for (; pos < end; ++pos) | 
|---|
| 112 | if (maybe_negate<positive>(is_in<symbols...>(*pos))) | 
|---|
| 113 | return pos; | 
|---|
| 114 |  | 
|---|
| 115 | return return_mode == ReturnMode::End ? end : nullptr; | 
|---|
| 116 | } | 
|---|
| 117 |  | 
|---|
| 118 |  | 
|---|
| 119 | template <bool positive, ReturnMode return_mode, char... symbols> | 
|---|
| 120 | inline const char * find_last_symbols_sse2(const char * const begin, const char * const end) | 
|---|
| 121 | { | 
|---|
| 122 | const char * pos = end; | 
|---|
| 123 |  | 
|---|
| 124 | #if defined(__SSE2__) | 
|---|
| 125 | for (; pos - 16 >= begin; pos -= 16)     /// Assuming the pointer cannot overflow. Assuming we can compare these pointers. | 
|---|
| 126 | { | 
|---|
| 127 | __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos - 16)); | 
|---|
| 128 |  | 
|---|
| 129 | __m128i eq = mm_is_in<symbols...>(bytes); | 
|---|
| 130 |  | 
|---|
| 131 | uint16_t bit_mask = maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq))); | 
|---|
| 132 | if (bit_mask) | 
|---|
| 133 | return pos - 1 - (__builtin_clz(bit_mask) - 16);    /// because __builtin_clz works with mask as uint32. | 
|---|
| 134 | } | 
|---|
| 135 | #endif | 
|---|
| 136 |  | 
|---|
| 137 | --pos; | 
|---|
| 138 | for (; pos >= begin; --pos) | 
|---|
| 139 | if (maybe_negate<positive>(is_in<symbols...>(*pos))) | 
|---|
| 140 | return pos; | 
|---|
| 141 |  | 
|---|
| 142 | return return_mode == ReturnMode::End ? end : nullptr; | 
|---|
| 143 | } | 
|---|
| 144 |  | 
|---|
| 145 |  | 
|---|
| 146 | template <bool positive, ReturnMode return_mode, size_t num_chars, | 
|---|
| 147 | char c01,     char c02 = 0, char c03 = 0, char c04 = 0, | 
|---|
| 148 | char c05 = 0, char c06 = 0, char c07 = 0, char c08 = 0, | 
|---|
| 149 | char c09 = 0, char c10 = 0, char c11 = 0, char c12 = 0, | 
|---|
| 150 | char c13 = 0, char c14 = 0, char c15 = 0, char c16 = 0> | 
|---|
| 151 | inline const char * find_first_symbols_sse42_impl(const char * const begin, const char * const end) | 
|---|
| 152 | { | 
|---|
| 153 | const char * pos = begin; | 
|---|
| 154 |  | 
|---|
| 155 | #if defined(__SSE4_2__) | 
|---|
| 156 | #define MODE (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT) | 
|---|
| 157 | __m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16); | 
|---|
| 158 |  | 
|---|
| 159 | for (; pos + 15 < end; pos += 16) | 
|---|
| 160 | { | 
|---|
| 161 | __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)); | 
|---|
| 162 |  | 
|---|
| 163 | if constexpr (positive) | 
|---|
| 164 | { | 
|---|
| 165 | if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE)) | 
|---|
| 166 | return pos + _mm_cmpestri(set, num_chars, bytes, 16, MODE); | 
|---|
| 167 | } | 
|---|
| 168 | else | 
|---|
| 169 | { | 
|---|
| 170 | if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE | _SIDD_NEGATIVE_POLARITY)) | 
|---|
| 171 | return pos + _mm_cmpestri(set, num_chars, bytes, 16, MODE | _SIDD_NEGATIVE_POLARITY); | 
|---|
| 172 | } | 
|---|
| 173 | } | 
|---|
| 174 | #undef MODE | 
|---|
| 175 | #endif | 
|---|
| 176 |  | 
|---|
| 177 | for (; pos < end; ++pos) | 
|---|
| 178 | if (   (num_chars >= 1 && maybe_negate<positive>(*pos == c01)) | 
|---|
| 179 | || (num_chars >= 2 && maybe_negate<positive>(*pos == c02)) | 
|---|
| 180 | || (num_chars >= 3 && maybe_negate<positive>(*pos == c03)) | 
|---|
| 181 | || (num_chars >= 4 && maybe_negate<positive>(*pos == c04)) | 
|---|
| 182 | || (num_chars >= 5 && maybe_negate<positive>(*pos == c05)) | 
|---|
| 183 | || (num_chars >= 6 && maybe_negate<positive>(*pos == c06)) | 
|---|
| 184 | || (num_chars >= 7 && maybe_negate<positive>(*pos == c07)) | 
|---|
| 185 | || (num_chars >= 8 && maybe_negate<positive>(*pos == c08)) | 
|---|
| 186 | || (num_chars >= 9 && maybe_negate<positive>(*pos == c09)) | 
|---|
| 187 | || (num_chars >= 10 && maybe_negate<positive>(*pos == c10)) | 
|---|
| 188 | || (num_chars >= 11 && maybe_negate<positive>(*pos == c11)) | 
|---|
| 189 | || (num_chars >= 12 && maybe_negate<positive>(*pos == c12)) | 
|---|
| 190 | || (num_chars >= 13 && maybe_negate<positive>(*pos == c13)) | 
|---|
| 191 | || (num_chars >= 14 && maybe_negate<positive>(*pos == c14)) | 
|---|
| 192 | || (num_chars >= 15 && maybe_negate<positive>(*pos == c15)) | 
|---|
| 193 | || (num_chars >= 16 && maybe_negate<positive>(*pos == c16))) | 
|---|
| 194 | return pos; | 
|---|
| 195 | return return_mode == ReturnMode::End ? end : nullptr; | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 |  | 
|---|
| 199 | template <bool positive, ReturnMode return_mode, char... symbols> | 
|---|
| 200 | inline const char * find_first_symbols_sse42(const char * begin, const char * end) | 
|---|
| 201 | { | 
|---|
| 202 | return find_first_symbols_sse42_impl<positive, return_mode, sizeof...(symbols), symbols...>(begin, end); | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | /// NOTE No SSE 4.2 implementation for find_last_symbols_or_null. Not worth to do. | 
|---|
| 206 |  | 
|---|
| 207 | template <bool positive, ReturnMode return_mode, char... symbols> | 
|---|
| 208 | inline const char * find_first_symbols_dispatch(const char * begin, const char * end) | 
|---|
| 209 | { | 
|---|
| 210 | #if defined(__SSE4_2__) | 
|---|
| 211 | if (sizeof...(symbols) >= 5) | 
|---|
| 212 | return find_first_symbols_sse42<positive, return_mode, symbols...>(begin, end); | 
|---|
| 213 | else | 
|---|
| 214 | #endif | 
|---|
| 215 | return find_first_symbols_sse2<positive, return_mode, symbols...>(begin, end); | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | } | 
|---|
| 219 |  | 
|---|
| 220 |  | 
|---|
| 221 | template <char... symbols> | 
|---|
| 222 | inline const char * find_first_symbols(const char * begin, const char * end) | 
|---|
| 223 | { | 
|---|
| 224 | return detail::find_first_symbols_dispatch<true, detail::ReturnMode::End, symbols...>(begin, end); | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|
| 227 | /// Returning non const result for non const arguments. | 
|---|
| 228 | /// It is convenient when you are using this function to iterate through non-const buffer. | 
|---|
| 229 | template <char... symbols> | 
|---|
| 230 | inline char * find_first_symbols(char * begin, char * end) | 
|---|
| 231 | { | 
|---|
| 232 | return const_cast<char *>(detail::find_first_symbols_dispatch<true, detail::ReturnMode::End, symbols...>(begin, end)); | 
|---|
| 233 | } | 
|---|
| 234 |  | 
|---|
| 235 | template <char... symbols> | 
|---|
| 236 | inline const char * find_first_not_symbols(const char * begin, const char * end) | 
|---|
| 237 | { | 
|---|
| 238 | return detail::find_first_symbols_dispatch<false, detail::ReturnMode::End, symbols...>(begin, end); | 
|---|
| 239 | } | 
|---|
| 240 |  | 
|---|
| 241 | template <char... symbols> | 
|---|
| 242 | inline char * find_first_not_symbols(char * begin, char * end) | 
|---|
| 243 | { | 
|---|
| 244 | return const_cast<char *>(detail::find_first_symbols_dispatch<false, detail::ReturnMode::End, symbols...>(begin, end)); | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 | template <char... symbols> | 
|---|
| 248 | inline const char * find_first_symbols_or_null(const char * begin, const char * end) | 
|---|
| 249 | { | 
|---|
| 250 | return detail::find_first_symbols_dispatch<true, detail::ReturnMode::Nullptr, symbols...>(begin, end); | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 | template <char... symbols> | 
|---|
| 254 | inline char * find_first_symbols_or_null(char * begin, char * end) | 
|---|
| 255 | { | 
|---|
| 256 | return const_cast<char *>(detail::find_first_symbols_dispatch<true, detail::ReturnMode::Nullptr, symbols...>(begin, end)); | 
|---|
| 257 | } | 
|---|
| 258 |  | 
|---|
| 259 | template <char... symbols> | 
|---|
| 260 | inline const char * find_first_not_symbols_or_null(const char * begin, const char * end) | 
|---|
| 261 | { | 
|---|
| 262 | return detail::find_first_symbols_dispatch<false, detail::ReturnMode::Nullptr, symbols...>(begin, end); | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | template <char... symbols> | 
|---|
| 266 | inline char * find_first_not_symbols_or_null(char * begin, char * end) | 
|---|
| 267 | { | 
|---|
| 268 | return const_cast<char *>(detail::find_first_symbols_dispatch<false, detail::ReturnMode::Nullptr, symbols...>(begin, end)); | 
|---|
| 269 | } | 
|---|
| 270 |  | 
|---|
| 271 |  | 
|---|
| 272 | template <char... symbols> | 
|---|
| 273 | inline const char * find_last_symbols_or_null(const char * begin, const char * end) | 
|---|
| 274 | { | 
|---|
| 275 | return detail::find_last_symbols_sse2<true, detail::ReturnMode::Nullptr, symbols...>(begin, end); | 
|---|
| 276 | } | 
|---|
| 277 |  | 
|---|
| 278 | template <char... symbols> | 
|---|
| 279 | inline char * find_last_symbols_or_null(char * begin, char * end) | 
|---|
| 280 | { | 
|---|
| 281 | return const_cast<char *>(detail::find_last_symbols_sse2<true, detail::ReturnMode::Nullptr, symbols...>(begin, end)); | 
|---|
| 282 | } | 
|---|
| 283 |  | 
|---|
| 284 | template <char... symbols> | 
|---|
| 285 | inline const char * find_last_not_symbols_or_null(const char * begin, const char * end) | 
|---|
| 286 | { | 
|---|
| 287 | return detail::find_last_symbols_sse2<false, detail::ReturnMode::Nullptr, symbols...>(begin, end); | 
|---|
| 288 | } | 
|---|
| 289 |  | 
|---|
| 290 | template <char... symbols> | 
|---|
| 291 | inline char * find_last_not_symbols_or_null(char * begin, char * end) | 
|---|
| 292 | { | 
|---|
| 293 | return const_cast<char *>(detail::find_last_symbols_sse2<false, detail::ReturnMode::Nullptr, symbols...>(begin, end)); | 
|---|
| 294 | } | 
|---|
| 295 |  | 
|---|