1 | #pragma once |
2 | |
3 | #include <Columns/ColumnArray.h> |
4 | #include <Columns/ColumnConst.h> |
5 | #include <Columns/ColumnString.h> |
6 | #include <Columns/ColumnVector.h> |
7 | #include <Columns/ColumnsNumber.h> |
8 | #include <DataTypes/DataTypeArray.h> |
9 | #include <DataTypes/DataTypeString.h> |
10 | #include <DataTypes/DataTypesNumber.h> |
11 | #include <Functions/FunctionHelpers.h> |
12 | #include <Functions/IFunctionImpl.h> |
13 | #include <IO/WriteHelpers.h> |
14 | #include <Interpreters/Context.h> |
15 | #include <common/StringRef.h> |
16 | |
17 | #include <optional> |
18 | |
19 | namespace DB |
20 | { |
21 | namespace ErrorCodes |
22 | { |
23 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
24 | extern const int ILLEGAL_COLUMN; |
25 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
26 | extern const int FUNCTION_NOT_ALLOWED; |
27 | } |
28 | |
29 | |
30 | template <typename Impl, typename Name, size_t LimitArgs> |
31 | class FunctionsMultiStringFuzzySearch : public IFunction |
32 | { |
33 | static_assert(LimitArgs > 0); |
34 | |
35 | public: |
36 | static constexpr auto name = Name::name; |
37 | static FunctionPtr create(const Context & context) |
38 | { |
39 | if (Impl::is_using_hyperscan && !context.getSettingsRef().allow_hyperscan) |
40 | throw Exception( |
41 | "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0" , ErrorCodes::FUNCTION_NOT_ALLOWED); |
42 | |
43 | return std::make_shared<FunctionsMultiStringFuzzySearch>(); |
44 | } |
45 | |
46 | String getName() const override { return name; } |
47 | |
48 | size_t getNumberOfArguments() const override { return 3; } |
49 | bool useDefaultImplementationForConstants() const override { return true; } |
50 | ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } |
51 | |
52 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
53 | { |
54 | if (!isString(arguments[0])) |
55 | throw Exception( |
56 | "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
57 | |
58 | if (!isUnsignedInteger(arguments[1])) |
59 | throw Exception( |
60 | "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
61 | |
62 | const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[2].get()); |
63 | if (!array_type || !checkAndGetDataType<DataTypeString>(array_type->getNestedType().get())) |
64 | throw Exception( |
65 | "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
66 | return Impl::ReturnType(); |
67 | } |
68 | |
69 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override |
70 | { |
71 | using ResultType = typename Impl::ResultType; |
72 | |
73 | const ColumnPtr & column_haystack = block.getByPosition(arguments[0]).column; |
74 | |
75 | const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack); |
76 | |
77 | const ColumnPtr & num_ptr = block.getByPosition(arguments[1]).column; |
78 | const ColumnConst * col_const_num = nullptr; |
79 | UInt32 edit_distance = 0; |
80 | |
81 | if ((col_const_num = checkAndGetColumnConst<ColumnUInt8>(num_ptr.get()))) |
82 | edit_distance = col_const_num->getValue<UInt8>(); |
83 | else if ((col_const_num = checkAndGetColumnConst<ColumnUInt16>(num_ptr.get()))) |
84 | edit_distance = col_const_num->getValue<UInt16>(); |
85 | else if ((col_const_num = checkAndGetColumnConst<ColumnUInt32>(num_ptr.get()))) |
86 | edit_distance = col_const_num->getValue<UInt32>(); |
87 | else |
88 | throw Exception( |
89 | "Illegal column " + block.getByPosition(arguments[1]).column->getName() |
90 | + ". The number is not const or does not fit in UInt32" , |
91 | ErrorCodes::ILLEGAL_COLUMN); |
92 | |
93 | |
94 | const ColumnPtr & arr_ptr = block.getByPosition(arguments[2]).column; |
95 | const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get()); |
96 | |
97 | if (!col_const_arr) |
98 | throw Exception( |
99 | "Illegal column " + block.getByPosition(arguments[2]).column->getName() + ". The array is not const" , |
100 | ErrorCodes::ILLEGAL_COLUMN); |
101 | |
102 | Array src_arr = col_const_arr->getValue<Array>(); |
103 | |
104 | if (src_arr.size() > LimitArgs) |
105 | throw Exception( |
106 | "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(src_arr.size()) |
107 | + ", should be at most " + std::to_string(LimitArgs), |
108 | ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
109 | |
110 | std::vector<StringRef> refs; |
111 | refs.reserve(src_arr.size()); |
112 | |
113 | for (const auto & el : src_arr) |
114 | refs.emplace_back(el.get<String>()); |
115 | |
116 | auto col_res = ColumnVector<ResultType>::create(); |
117 | auto col_offsets = ColumnArray::ColumnOffsets::create(); |
118 | |
119 | auto & vec_res = col_res->getData(); |
120 | auto & offsets_res = col_offsets->getData(); |
121 | |
122 | /// The blame for resizing output is for the callee. |
123 | if (col_haystack_vector) |
124 | Impl::vector_constant( |
125 | col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res, offsets_res, edit_distance); |
126 | else |
127 | throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName(), ErrorCodes::ILLEGAL_COLUMN); |
128 | |
129 | if constexpr (Impl::is_column_array) |
130 | block.getByPosition(result).column = ColumnArray::create(std::move(col_res), std::move(col_offsets)); |
131 | else |
132 | block.getByPosition(result).column = std::move(col_res); |
133 | } |
134 | }; |
135 | |
136 | } |
137 | |