1 | #include <Columns/ColumnString.h> |
2 | #include <DataTypes/DataTypeString.h> |
3 | #include <Functions/FunctionFactory.h> |
4 | #include <Functions/FunctionHelpers.h> |
5 | #include <common/find_symbols.h> |
6 | |
7 | |
8 | namespace DB |
9 | { |
10 | |
11 | namespace ErrorCodes |
12 | { |
13 | extern const int ILLEGAL_COLUMN; |
14 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
15 | } |
16 | |
17 | class FunctionRegexpQuoteMeta : public IFunction |
18 | { |
19 | public: |
20 | static constexpr auto name = "regexpQuoteMeta" ; |
21 | |
22 | static FunctionPtr create(const Context &) |
23 | { |
24 | return std::make_shared<FunctionRegexpQuoteMeta>(); |
25 | } |
26 | |
27 | String getName() const override |
28 | { |
29 | return name; |
30 | } |
31 | |
32 | size_t getNumberOfArguments() const override |
33 | { |
34 | return 1; |
35 | } |
36 | |
37 | bool useDefaultImplementationForConstants() const override |
38 | { |
39 | return true; |
40 | } |
41 | |
42 | DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override |
43 | { |
44 | if (!WhichDataType(arguments[0].type).isString()) |
45 | throw Exception( |
46 | "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String." , |
47 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
48 | |
49 | return std::make_shared<DataTypeString>(); |
50 | } |
51 | |
52 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override |
53 | { |
54 | const ColumnPtr & column_string = block.getByPosition(arguments[0]).column; |
55 | const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get()); |
56 | |
57 | if (!input) |
58 | throw Exception( |
59 | "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), |
60 | ErrorCodes::ILLEGAL_COLUMN); |
61 | |
62 | auto dst_column = ColumnString::create(); |
63 | auto & dst_data = dst_column->getChars(); |
64 | auto & dst_offsets = dst_column->getOffsets(); |
65 | |
66 | dst_offsets.resize(input_rows_count); |
67 | |
68 | const ColumnString::Offsets & src_offsets = input->getOffsets(); |
69 | |
70 | auto src_begin = reinterpret_cast<const char *>(input->getChars().data()); |
71 | auto src_pos = src_begin; |
72 | |
73 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) |
74 | { |
75 | /// NOTE This implementation slightly differs from re2::RE2::QuoteMeta. |
76 | /// It escapes zero byte as \0 instead of \x00 |
77 | /// and it escapes only required characters. |
78 | /// This is Ok. Look at comments in re2.cc |
79 | |
80 | const char * src_end = src_begin + src_offsets[row_idx] - 1; |
81 | |
82 | while (true) |
83 | { |
84 | const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end); |
85 | |
86 | size_t bytes_to_copy = next_src_pos - src_pos; |
87 | size_t old_dst_size = dst_data.size(); |
88 | dst_data.resize(old_dst_size + bytes_to_copy); |
89 | memcpySmallAllowReadWriteOverflow15(dst_data.data() + old_dst_size, src_pos, bytes_to_copy); |
90 | src_pos = next_src_pos + 1; |
91 | |
92 | if (next_src_pos == src_end) |
93 | { |
94 | dst_data.emplace_back('\0'); |
95 | break; |
96 | } |
97 | |
98 | dst_data.emplace_back('\\'); |
99 | dst_data.emplace_back(*next_src_pos); |
100 | } |
101 | |
102 | dst_offsets[row_idx] = dst_data.size(); |
103 | } |
104 | |
105 | block.getByPosition(result).column = std::move(dst_column); |
106 | } |
107 | |
108 | }; |
109 | |
110 | void registerFunctionRegexpQuoteMeta(FunctionFactory & factory) |
111 | { |
112 | factory.registerFunction<FunctionRegexpQuoteMeta>(); |
113 | } |
114 | } |
115 | |