1#include <Columns/ColumnString.h>
2#include <DataTypes/DataTypeString.h>
3#include <Functions/FunctionFactory.h>
4#include <Functions/FunctionHelpers.h>
5#include <common/find_symbols.h>
6
7
8namespace DB
9{
10
11namespace ErrorCodes
12{
13 extern const int ILLEGAL_COLUMN;
14 extern const int ILLEGAL_TYPE_OF_ARGUMENT;
15}
16
17class FunctionRegexpQuoteMeta : public IFunction
18{
19public:
20 static constexpr auto name = "regexpQuoteMeta";
21
22 static FunctionPtr create(const Context &)
23 {
24 return std::make_shared<FunctionRegexpQuoteMeta>();
25 }
26
27 String getName() const override
28 {
29 return name;
30 }
31
32 size_t getNumberOfArguments() const override
33 {
34 return 1;
35 }
36
37 bool useDefaultImplementationForConstants() const override
38 {
39 return true;
40 }
41
42 DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
43 {
44 if (!WhichDataType(arguments[0].type).isString())
45 throw Exception(
46 "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String.",
47 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
48
49 return std::make_shared<DataTypeString>();
50 }
51
52 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
53 {
54 const ColumnPtr & column_string = block.getByPosition(arguments[0]).column;
55 const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get());
56
57 if (!input)
58 throw Exception(
59 "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
60 ErrorCodes::ILLEGAL_COLUMN);
61
62 auto dst_column = ColumnString::create();
63 auto & dst_data = dst_column->getChars();
64 auto & dst_offsets = dst_column->getOffsets();
65
66 dst_offsets.resize(input_rows_count);
67
68 const ColumnString::Offsets & src_offsets = input->getOffsets();
69
70 auto src_begin = reinterpret_cast<const char *>(input->getChars().data());
71 auto src_pos = src_begin;
72
73 for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
74 {
75 /// NOTE This implementation slightly differs from re2::RE2::QuoteMeta.
76 /// It escapes zero byte as \0 instead of \x00
77 /// and it escapes only required characters.
78 /// This is Ok. Look at comments in re2.cc
79
80 const char * src_end = src_begin + src_offsets[row_idx] - 1;
81
82 while (true)
83 {
84 const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end);
85
86 size_t bytes_to_copy = next_src_pos - src_pos;
87 size_t old_dst_size = dst_data.size();
88 dst_data.resize(old_dst_size + bytes_to_copy);
89 memcpySmallAllowReadWriteOverflow15(dst_data.data() + old_dst_size, src_pos, bytes_to_copy);
90 src_pos = next_src_pos + 1;
91
92 if (next_src_pos == src_end)
93 {
94 dst_data.emplace_back('\0');
95 break;
96 }
97
98 dst_data.emplace_back('\\');
99 dst_data.emplace_back(*next_src_pos);
100 }
101
102 dst_offsets[row_idx] = dst_data.size();
103 }
104
105 block.getByPosition(result).column = std::move(dst_column);
106 }
107
108};
109
110void registerFunctionRegexpQuoteMeta(FunctionFactory & factory)
111{
112 factory.registerFunction<FunctionRegexpQuoteMeta>();
113}
114}
115