1#include <DataTypes/DataTypeString.h>
2#include <Columns/ColumnString.h>
3#include <Functions/FunctionFactory.h>
4#include <Functions/FunctionStringToString.h>
5
6
7namespace DB
8{
9
10namespace ErrorCodes
11{
12 extern const int ILLEGAL_COLUMN;
13}
14
15
16/** Reverse the sequence of code points in a UTF-8 encoded string.
17 * The result may not match the expected result, because modifying code points (for example, diacritics) may be applied to another symbols.
18 * If the string is not encoded in UTF-8, then the behavior is undefined.
19 */
20struct ReverseUTF8Impl
21{
22 static void vector(const ColumnString::Chars & data,
23 const ColumnString::Offsets & offsets,
24 ColumnString::Chars & res_data,
25 ColumnString::Offsets & res_offsets)
26 {
27 res_data.resize(data.size());
28 res_offsets.assign(offsets);
29 size_t size = offsets.size();
30
31 ColumnString::Offset prev_offset = 0;
32 for (size_t i = 0; i < size; ++i)
33 {
34 ColumnString::Offset j = prev_offset;
35 while (j < offsets[i] - 1)
36 {
37 if (data[j] < 0xBF)
38 {
39 res_data[offsets[i] + prev_offset - 2 - j] = data[j];
40 j += 1;
41 }
42 else if (data[j] < 0xE0)
43 {
44 memcpy(&res_data[offsets[i] + prev_offset - 2 - j - 1], &data[j], 2);
45 j += 2;
46 }
47 else if (data[j] < 0xF0)
48 {
49 memcpy(&res_data[offsets[i] + prev_offset - 2 - j - 2], &data[j], 3);
50 j += 3;
51 }
52 else
53 {
54 res_data[offsets[i] + prev_offset - 2 - j] = data[j];
55 j += 1;
56 }
57 }
58
59 res_data[offsets[i] - 1] = 0;
60 prev_offset = offsets[i];
61 }
62 }
63
64 [[noreturn]] static void vector_fixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
65 {
66 throw Exception("Cannot apply function reverseUTF8 to fixed string.", ErrorCodes::ILLEGAL_COLUMN);
67 }
68};
69
70struct NameReverseUTF8
71{
72 static constexpr auto name = "reverseUTF8";
73};
74using FunctionReverseUTF8 = FunctionStringToString<ReverseUTF8Impl, NameReverseUTF8, true>;
75
76void registerFunctionReverseUTF8(FunctionFactory & factory)
77{
78 factory.registerFunction<FunctionReverseUTF8>();
79}
80
81}
82