| 1 | #include "duckdb/function/scalar/string_functions.hpp" |
| 2 | #include "duckdb/common/types/bit.hpp" |
| 3 | |
| 4 | #include "duckdb/common/exception.hpp" |
| 5 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
| 6 | #include "duckdb/planner/expression/bound_function_expression.hpp" |
| 7 | |
| 8 | #include "duckdb/planner/expression/bound_parameter_expression.hpp" |
| 9 | #include "utf8proc.hpp" |
| 10 | |
| 11 | namespace duckdb { |
| 12 | |
| 13 | // length returns the number of unicode codepoints |
| 14 | struct StringLengthOperator { |
| 15 | template <class TA, class TR> |
| 16 | static inline TR Operation(TA input) { |
| 17 | return LengthFun::Length<TA, TR>(input); |
| 18 | } |
| 19 | }; |
| 20 | |
| 21 | struct GraphemeCountOperator { |
| 22 | template <class TA, class TR> |
| 23 | static inline TR Operation(TA input) { |
| 24 | return LengthFun::GraphemeCount<TA, TR>(input); |
| 25 | } |
| 26 | }; |
| 27 | |
| 28 | struct ArrayLengthOperator { |
| 29 | template <class TA, class TR> |
| 30 | static inline TR Operation(TA input) { |
| 31 | return input.length; |
| 32 | } |
| 33 | }; |
| 34 | |
| 35 | struct ArrayLengthBinaryOperator { |
| 36 | template <class TA, class TB, class TR> |
| 37 | static inline TR Operation(TA input, TB dimension) { |
| 38 | if (dimension != 1) { |
| 39 | throw NotImplementedException("array_length for dimensions other than 1 not implemented" ); |
| 40 | } |
| 41 | return input.length; |
| 42 | } |
| 43 | }; |
| 44 | |
| 45 | // strlen returns the size in bytes |
| 46 | struct StrLenOperator { |
| 47 | template <class TA, class TR> |
| 48 | static inline TR Operation(TA input) { |
| 49 | return input.GetSize(); |
| 50 | } |
| 51 | }; |
| 52 | |
| 53 | struct OctetLenOperator { |
| 54 | template <class TA, class TR> |
| 55 | static inline TR Operation(TA input) { |
| 56 | return Bit::OctetLength(bits: input); |
| 57 | } |
| 58 | }; |
| 59 | |
| 60 | // bitlen returns the size in bits |
| 61 | struct BitLenOperator { |
| 62 | template <class TA, class TR> |
| 63 | static inline TR Operation(TA input) { |
| 64 | return 8 * input.GetSize(); |
| 65 | } |
| 66 | }; |
| 67 | |
| 68 | // bitstringlen returns the amount of bits in a bitstring |
| 69 | struct BitStringLenOperator { |
| 70 | template <class TA, class TR> |
| 71 | static inline TR Operation(TA input) { |
| 72 | return Bit::BitLength(bits: input); |
| 73 | } |
| 74 | }; |
| 75 | |
| 76 | static unique_ptr<BaseStatistics> LengthPropagateStats(ClientContext &context, FunctionStatisticsInput &input) { |
| 77 | auto &child_stats = input.child_stats; |
| 78 | auto &expr = input.expr; |
| 79 | D_ASSERT(child_stats.size() == 1); |
| 80 | // can only propagate stats if the children have stats |
| 81 | if (!StringStats::CanContainUnicode(stats: child_stats[0])) { |
| 82 | expr.function.function = ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>; |
| 83 | } |
| 84 | return nullptr; |
| 85 | } |
| 86 | |
| 87 | static unique_ptr<FunctionData> ListLengthBind(ClientContext &context, ScalarFunction &bound_function, |
| 88 | vector<unique_ptr<Expression>> &arguments) { |
| 89 | if (arguments[0]->HasParameter()) { |
| 90 | throw ParameterNotResolvedException(); |
| 91 | } |
| 92 | bound_function.arguments[0] = arguments[0]->return_type; |
| 93 | return nullptr; |
| 94 | } |
| 95 | |
| 96 | void LengthFun::RegisterFunction(BuiltinFunctions &set) { |
| 97 | ScalarFunction array_length_unary = |
| 98 | ScalarFunction({LogicalType::LIST(child: LogicalType::ANY)}, LogicalType::BIGINT, |
| 99 | ScalarFunction::UnaryFunction<list_entry_t, int64_t, ArrayLengthOperator>, ListLengthBind); |
| 100 | ScalarFunctionSet length("length" ); |
| 101 | length.AddFunction(function: ScalarFunction({LogicalType::VARCHAR}, LogicalType::BIGINT, |
| 102 | ScalarFunction::UnaryFunction<string_t, int64_t, StringLengthOperator>, nullptr, |
| 103 | nullptr, LengthPropagateStats)); |
| 104 | length.AddFunction(function: ScalarFunction({LogicalType::BIT}, LogicalType::BIGINT, |
| 105 | ScalarFunction::UnaryFunction<string_t, int64_t, BitStringLenOperator>)); |
| 106 | length.AddFunction(function: array_length_unary); |
| 107 | set.AddFunction(set: length); |
| 108 | length.name = "len" ; |
| 109 | set.AddFunction(set: length); |
| 110 | |
| 111 | ScalarFunctionSet length_grapheme("length_grapheme" ); |
| 112 | length_grapheme.AddFunction(function: ScalarFunction({LogicalType::VARCHAR}, LogicalType::BIGINT, |
| 113 | ScalarFunction::UnaryFunction<string_t, int64_t, GraphemeCountOperator>, |
| 114 | nullptr, nullptr, LengthPropagateStats)); |
| 115 | set.AddFunction(set: length_grapheme); |
| 116 | |
| 117 | ScalarFunctionSet array_length("array_length" ); |
| 118 | array_length.AddFunction(function: array_length_unary); |
| 119 | array_length.AddFunction(function: ScalarFunction( |
| 120 | {LogicalType::LIST(child: LogicalType::ANY), LogicalType::BIGINT}, LogicalType::BIGINT, |
| 121 | ScalarFunction::BinaryFunction<list_entry_t, int64_t, int64_t, ArrayLengthBinaryOperator>, ListLengthBind)); |
| 122 | set.AddFunction(set: array_length); |
| 123 | |
| 124 | set.AddFunction(function: ScalarFunction("strlen" , {LogicalType::VARCHAR}, LogicalType::BIGINT, |
| 125 | ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>)); |
| 126 | ScalarFunctionSet bit_length("bit_length" ); |
| 127 | bit_length.AddFunction(function: ScalarFunction({LogicalType::VARCHAR}, LogicalType::BIGINT, |
| 128 | ScalarFunction::UnaryFunction<string_t, int64_t, BitLenOperator>)); |
| 129 | bit_length.AddFunction(function: ScalarFunction({LogicalType::BIT}, LogicalType::BIGINT, |
| 130 | ScalarFunction::UnaryFunction<string_t, int64_t, BitStringLenOperator>)); |
| 131 | set.AddFunction(set: bit_length); |
| 132 | // length for BLOB type |
| 133 | ScalarFunctionSet octet_length("octet_length" ); |
| 134 | octet_length.AddFunction(function: ScalarFunction({LogicalType::BLOB}, LogicalType::BIGINT, |
| 135 | ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>)); |
| 136 | octet_length.AddFunction(function: ScalarFunction({LogicalType::BIT}, LogicalType::BIGINT, |
| 137 | ScalarFunction::UnaryFunction<string_t, int64_t, OctetLenOperator>)); |
| 138 | set.AddFunction(set: octet_length); |
| 139 | } |
| 140 | |
| 141 | } // namespace duckdb |
| 142 | |