| 1 | #include <Columns/ColumnString.h> | 
| 2 | #include <DataTypes/DataTypeString.h> | 
| 3 | #include <Functions/FunctionFactory.h> | 
| 4 | #include <Functions/FunctionHelpers.h> | 
| 5 | #include <common/find_symbols.h> | 
| 6 |  | 
| 7 |  | 
| 8 | namespace DB | 
| 9 | { | 
| 10 |  | 
| 11 | namespace ErrorCodes | 
| 12 | { | 
| 13 |     extern const int ILLEGAL_COLUMN; | 
| 14 |     extern const int ILLEGAL_TYPE_OF_ARGUMENT; | 
| 15 | } | 
| 16 |  | 
| 17 | class FunctionRegexpQuoteMeta : public IFunction | 
| 18 | { | 
| 19 | public: | 
| 20 |     static constexpr auto name = "regexpQuoteMeta" ; | 
| 21 |  | 
| 22 |     static FunctionPtr create(const Context &) | 
| 23 |     { | 
| 24 |         return std::make_shared<FunctionRegexpQuoteMeta>(); | 
| 25 |     } | 
| 26 |  | 
| 27 |     String getName() const override | 
| 28 |     { | 
| 29 |         return name; | 
| 30 |     } | 
| 31 |  | 
| 32 |     size_t getNumberOfArguments() const override | 
| 33 |     { | 
| 34 |         return 1; | 
| 35 |     } | 
| 36 |  | 
| 37 |     bool useDefaultImplementationForConstants() const override | 
| 38 |     { | 
| 39 |         return true; | 
| 40 |     } | 
| 41 |  | 
| 42 |     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override | 
| 43 |     { | 
| 44 |         if (!WhichDataType(arguments[0].type).isString()) | 
| 45 |             throw Exception( | 
| 46 |                 "Illegal type "  + arguments[0].type->getName() + " of 1 argument of function "  + getName() + ". Must be String." , | 
| 47 |                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); | 
| 48 |  | 
| 49 |         return std::make_shared<DataTypeString>(); | 
| 50 |     } | 
| 51 |  | 
| 52 |     void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override | 
| 53 |     { | 
| 54 |         const ColumnPtr & column_string = block.getByPosition(arguments[0]).column; | 
| 55 |         const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get()); | 
| 56 |  | 
| 57 |         if (!input) | 
| 58 |             throw Exception( | 
| 59 |                 "Illegal column "  + block.getByPosition(arguments[0]).column->getName() + " of first argument of function "  + getName(), | 
| 60 |                 ErrorCodes::ILLEGAL_COLUMN); | 
| 61 |  | 
| 62 |         auto dst_column = ColumnString::create(); | 
| 63 |         auto & dst_data = dst_column->getChars(); | 
| 64 |         auto & dst_offsets = dst_column->getOffsets(); | 
| 65 |  | 
| 66 |         dst_offsets.resize(input_rows_count); | 
| 67 |  | 
| 68 |         const ColumnString::Offsets & src_offsets = input->getOffsets(); | 
| 69 |  | 
| 70 |         auto src_begin = reinterpret_cast<const char *>(input->getChars().data()); | 
| 71 |         auto src_pos = src_begin; | 
| 72 |  | 
| 73 |         for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) | 
| 74 |         { | 
| 75 |             /// NOTE This implementation slightly differs from re2::RE2::QuoteMeta. | 
| 76 |             /// It escapes zero byte as \0 instead of \x00 | 
| 77 |             ///  and it escapes only required characters. | 
| 78 |             /// This is Ok. Look at comments in re2.cc | 
| 79 |  | 
| 80 |             const char * src_end = src_begin + src_offsets[row_idx] - 1; | 
| 81 |  | 
| 82 |             while (true) | 
| 83 |             { | 
| 84 |                 const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end); | 
| 85 |  | 
| 86 |                 size_t bytes_to_copy = next_src_pos - src_pos; | 
| 87 |                 size_t old_dst_size = dst_data.size(); | 
| 88 |                 dst_data.resize(old_dst_size + bytes_to_copy); | 
| 89 |                 memcpySmallAllowReadWriteOverflow15(dst_data.data() + old_dst_size, src_pos, bytes_to_copy); | 
| 90 |                 src_pos = next_src_pos + 1; | 
| 91 |  | 
| 92 |                 if (next_src_pos == src_end) | 
| 93 |                 { | 
| 94 |                     dst_data.emplace_back('\0'); | 
| 95 |                     break; | 
| 96 |                 } | 
| 97 |  | 
| 98 |                 dst_data.emplace_back('\\'); | 
| 99 |                 dst_data.emplace_back(*next_src_pos); | 
| 100 |             } | 
| 101 |  | 
| 102 |             dst_offsets[row_idx] = dst_data.size(); | 
| 103 |         } | 
| 104 |  | 
| 105 |         block.getByPosition(result).column = std::move(dst_column); | 
| 106 |     } | 
| 107 |  | 
| 108 | }; | 
| 109 |  | 
| 110 | void registerFunctionRegexpQuoteMeta(FunctionFactory & factory) | 
| 111 | { | 
| 112 |     factory.registerFunction<FunctionRegexpQuoteMeta>(); | 
| 113 | } | 
| 114 | } | 
| 115 |  |