| 1 | #pragma once |
| 2 | |
| 3 | #include <Columns/ColumnArray.h> |
| 4 | #include <Columns/ColumnConst.h> |
| 5 | #include <Columns/ColumnString.h> |
| 6 | #include <Columns/ColumnVector.h> |
| 7 | #include <Columns/ColumnsNumber.h> |
| 8 | #include <DataTypes/DataTypeArray.h> |
| 9 | #include <DataTypes/DataTypeString.h> |
| 10 | #include <DataTypes/DataTypesNumber.h> |
| 11 | #include <Functions/FunctionHelpers.h> |
| 12 | #include <Functions/IFunctionImpl.h> |
| 13 | #include <IO/WriteHelpers.h> |
| 14 | #include <Interpreters/Context.h> |
| 15 | #include <common/StringRef.h> |
| 16 | |
| 17 | #include <optional> |
| 18 | |
| 19 | namespace DB |
| 20 | { |
| 21 | namespace ErrorCodes |
| 22 | { |
| 23 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
| 24 | extern const int ILLEGAL_COLUMN; |
| 25 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
| 26 | extern const int FUNCTION_NOT_ALLOWED; |
| 27 | } |
| 28 | |
| 29 | |
| 30 | template <typename Impl, typename Name, size_t LimitArgs> |
| 31 | class FunctionsMultiStringFuzzySearch : public IFunction |
| 32 | { |
| 33 | static_assert(LimitArgs > 0); |
| 34 | |
| 35 | public: |
| 36 | static constexpr auto name = Name::name; |
| 37 | static FunctionPtr create(const Context & context) |
| 38 | { |
| 39 | if (Impl::is_using_hyperscan && !context.getSettingsRef().allow_hyperscan) |
| 40 | throw Exception( |
| 41 | "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0" , ErrorCodes::FUNCTION_NOT_ALLOWED); |
| 42 | |
| 43 | return std::make_shared<FunctionsMultiStringFuzzySearch>(); |
| 44 | } |
| 45 | |
| 46 | String getName() const override { return name; } |
| 47 | |
| 48 | size_t getNumberOfArguments() const override { return 3; } |
| 49 | bool useDefaultImplementationForConstants() const override { return true; } |
| 50 | ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } |
| 51 | |
| 52 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
| 53 | { |
| 54 | if (!isString(arguments[0])) |
| 55 | throw Exception( |
| 56 | "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 57 | |
| 58 | if (!isUnsignedInteger(arguments[1])) |
| 59 | throw Exception( |
| 60 | "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 61 | |
| 62 | const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[2].get()); |
| 63 | if (!array_type || !checkAndGetDataType<DataTypeString>(array_type->getNestedType().get())) |
| 64 | throw Exception( |
| 65 | "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 66 | return Impl::ReturnType(); |
| 67 | } |
| 68 | |
| 69 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override |
| 70 | { |
| 71 | using ResultType = typename Impl::ResultType; |
| 72 | |
| 73 | const ColumnPtr & column_haystack = block.getByPosition(arguments[0]).column; |
| 74 | |
| 75 | const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack); |
| 76 | |
| 77 | const ColumnPtr & num_ptr = block.getByPosition(arguments[1]).column; |
| 78 | const ColumnConst * col_const_num = nullptr; |
| 79 | UInt32 edit_distance = 0; |
| 80 | |
| 81 | if ((col_const_num = checkAndGetColumnConst<ColumnUInt8>(num_ptr.get()))) |
| 82 | edit_distance = col_const_num->getValue<UInt8>(); |
| 83 | else if ((col_const_num = checkAndGetColumnConst<ColumnUInt16>(num_ptr.get()))) |
| 84 | edit_distance = col_const_num->getValue<UInt16>(); |
| 85 | else if ((col_const_num = checkAndGetColumnConst<ColumnUInt32>(num_ptr.get()))) |
| 86 | edit_distance = col_const_num->getValue<UInt32>(); |
| 87 | else |
| 88 | throw Exception( |
| 89 | "Illegal column " + block.getByPosition(arguments[1]).column->getName() |
| 90 | + ". The number is not const or does not fit in UInt32" , |
| 91 | ErrorCodes::ILLEGAL_COLUMN); |
| 92 | |
| 93 | |
| 94 | const ColumnPtr & arr_ptr = block.getByPosition(arguments[2]).column; |
| 95 | const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get()); |
| 96 | |
| 97 | if (!col_const_arr) |
| 98 | throw Exception( |
| 99 | "Illegal column " + block.getByPosition(arguments[2]).column->getName() + ". The array is not const" , |
| 100 | ErrorCodes::ILLEGAL_COLUMN); |
| 101 | |
| 102 | Array src_arr = col_const_arr->getValue<Array>(); |
| 103 | |
| 104 | if (src_arr.size() > LimitArgs) |
| 105 | throw Exception( |
| 106 | "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(src_arr.size()) |
| 107 | + ", should be at most " + std::to_string(LimitArgs), |
| 108 | ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
| 109 | |
| 110 | std::vector<StringRef> refs; |
| 111 | refs.reserve(src_arr.size()); |
| 112 | |
| 113 | for (const auto & el : src_arr) |
| 114 | refs.emplace_back(el.get<String>()); |
| 115 | |
| 116 | auto col_res = ColumnVector<ResultType>::create(); |
| 117 | auto col_offsets = ColumnArray::ColumnOffsets::create(); |
| 118 | |
| 119 | auto & vec_res = col_res->getData(); |
| 120 | auto & offsets_res = col_offsets->getData(); |
| 121 | |
| 122 | /// The blame for resizing output is for the callee. |
| 123 | if (col_haystack_vector) |
| 124 | Impl::vector_constant( |
| 125 | col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res, offsets_res, edit_distance); |
| 126 | else |
| 127 | throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName(), ErrorCodes::ILLEGAL_COLUMN); |
| 128 | |
| 129 | if constexpr (Impl::is_column_array) |
| 130 | block.getByPosition(result).column = ColumnArray::create(std::move(col_res), std::move(col_offsets)); |
| 131 | else |
| 132 | block.getByPosition(result).column = std::move(col_res); |
| 133 | } |
| 134 | }; |
| 135 | |
| 136 | } |
| 137 | |