| 1 | #include <Columns/ColumnString.h> |
| 2 | |
| 3 | |
| 4 | namespace DB |
| 5 | { |
| 6 | |
| 7 | template <char not_case_lower_bound, char not_case_upper_bound> |
| 8 | struct LowerUpperImpl |
| 9 | { |
| 10 | static void vector(const ColumnString::Chars & data, |
| 11 | const ColumnString::Offsets & offsets, |
| 12 | ColumnString::Chars & res_data, |
| 13 | ColumnString::Offsets & res_offsets) |
| 14 | { |
| 15 | res_data.resize(data.size()); |
| 16 | res_offsets.assign(offsets); |
| 17 | array(data.data(), data.data() + data.size(), res_data.data()); |
| 18 | } |
| 19 | |
| 20 | static void vector_fixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data) |
| 21 | { |
| 22 | res_data.resize(data.size()); |
| 23 | array(data.data(), data.data() + data.size(), res_data.data()); |
| 24 | } |
| 25 | |
| 26 | private: |
| 27 | static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) |
| 28 | { |
| 29 | const auto flip_case_mask = 'A' ^ 'a'; |
| 30 | |
| 31 | #ifdef __SSE2__ |
| 32 | const auto bytes_sse = sizeof(__m128i); |
| 33 | const auto src_end_sse = src_end - (src_end - src) % bytes_sse; |
| 34 | |
| 35 | const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1); |
| 36 | const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); |
| 37 | const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask); |
| 38 | |
| 39 | for (; src < src_end_sse; src += bytes_sse, dst += bytes_sse) |
| 40 | { |
| 41 | /// load 16 sequential 8-bit characters |
| 42 | const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src)); |
| 43 | |
| 44 | /// find which 8-bit sequences belong to range [case_lower_bound, case_upper_bound] |
| 45 | const auto is_not_case |
| 46 | = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), _mm_cmplt_epi8(chars, v_not_case_upper_bound)); |
| 47 | |
| 48 | /// keep `flip_case_mask` only where necessary, zero out elsewhere |
| 49 | const auto xor_mask = _mm_and_si128(v_flip_case_mask, is_not_case); |
| 50 | |
| 51 | /// flip case by applying calculated mask |
| 52 | const auto cased_chars = _mm_xor_si128(chars, xor_mask); |
| 53 | |
| 54 | /// store result back to destination |
| 55 | _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars); |
| 56 | } |
| 57 | #endif |
| 58 | |
| 59 | for (; src < src_end; ++src, ++dst) |
| 60 | if (*src >= not_case_lower_bound && *src <= not_case_upper_bound) |
| 61 | *dst = *src ^ flip_case_mask; |
| 62 | else |
| 63 | *dst = *src; |
| 64 | } |
| 65 | }; |
| 66 | |
| 67 | } |
| 68 | |