1 | #include "duckdb/function/scalar/string_functions.hpp" |
2 | #include "duckdb/common/types/bit.hpp" |
3 | |
4 | #include "duckdb/common/exception.hpp" |
5 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
6 | #include "duckdb/planner/expression/bound_function_expression.hpp" |
7 | |
8 | #include "duckdb/planner/expression/bound_parameter_expression.hpp" |
9 | #include "utf8proc.hpp" |
10 | |
11 | namespace duckdb { |
12 | |
13 | // length returns the number of unicode codepoints |
14 | struct StringLengthOperator { |
15 | template <class TA, class TR> |
16 | static inline TR Operation(TA input) { |
17 | return LengthFun::Length<TA, TR>(input); |
18 | } |
19 | }; |
20 | |
21 | struct GraphemeCountOperator { |
22 | template <class TA, class TR> |
23 | static inline TR Operation(TA input) { |
24 | return LengthFun::GraphemeCount<TA, TR>(input); |
25 | } |
26 | }; |
27 | |
28 | struct ArrayLengthOperator { |
29 | template <class TA, class TR> |
30 | static inline TR Operation(TA input) { |
31 | return input.length; |
32 | } |
33 | }; |
34 | |
35 | struct ArrayLengthBinaryOperator { |
36 | template <class TA, class TB, class TR> |
37 | static inline TR Operation(TA input, TB dimension) { |
38 | if (dimension != 1) { |
39 | throw NotImplementedException("array_length for dimensions other than 1 not implemented" ); |
40 | } |
41 | return input.length; |
42 | } |
43 | }; |
44 | |
45 | // strlen returns the size in bytes |
46 | struct StrLenOperator { |
47 | template <class TA, class TR> |
48 | static inline TR Operation(TA input) { |
49 | return input.GetSize(); |
50 | } |
51 | }; |
52 | |
53 | struct OctetLenOperator { |
54 | template <class TA, class TR> |
55 | static inline TR Operation(TA input) { |
56 | return Bit::OctetLength(bits: input); |
57 | } |
58 | }; |
59 | |
60 | // bitlen returns the size in bits |
61 | struct BitLenOperator { |
62 | template <class TA, class TR> |
63 | static inline TR Operation(TA input) { |
64 | return 8 * input.GetSize(); |
65 | } |
66 | }; |
67 | |
68 | // bitstringlen returns the amount of bits in a bitstring |
69 | struct BitStringLenOperator { |
70 | template <class TA, class TR> |
71 | static inline TR Operation(TA input) { |
72 | return Bit::BitLength(bits: input); |
73 | } |
74 | }; |
75 | |
76 | static unique_ptr<BaseStatistics> LengthPropagateStats(ClientContext &context, FunctionStatisticsInput &input) { |
77 | auto &child_stats = input.child_stats; |
78 | auto &expr = input.expr; |
79 | D_ASSERT(child_stats.size() == 1); |
80 | // can only propagate stats if the children have stats |
81 | if (!StringStats::CanContainUnicode(stats: child_stats[0])) { |
82 | expr.function.function = ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>; |
83 | } |
84 | return nullptr; |
85 | } |
86 | |
87 | static unique_ptr<FunctionData> ListLengthBind(ClientContext &context, ScalarFunction &bound_function, |
88 | vector<unique_ptr<Expression>> &arguments) { |
89 | if (arguments[0]->HasParameter()) { |
90 | throw ParameterNotResolvedException(); |
91 | } |
92 | bound_function.arguments[0] = arguments[0]->return_type; |
93 | return nullptr; |
94 | } |
95 | |
96 | void LengthFun::RegisterFunction(BuiltinFunctions &set) { |
97 | ScalarFunction array_length_unary = |
98 | ScalarFunction({LogicalType::LIST(child: LogicalType::ANY)}, LogicalType::BIGINT, |
99 | ScalarFunction::UnaryFunction<list_entry_t, int64_t, ArrayLengthOperator>, ListLengthBind); |
100 | ScalarFunctionSet length("length" ); |
101 | length.AddFunction(function: ScalarFunction({LogicalType::VARCHAR}, LogicalType::BIGINT, |
102 | ScalarFunction::UnaryFunction<string_t, int64_t, StringLengthOperator>, nullptr, |
103 | nullptr, LengthPropagateStats)); |
104 | length.AddFunction(function: ScalarFunction({LogicalType::BIT}, LogicalType::BIGINT, |
105 | ScalarFunction::UnaryFunction<string_t, int64_t, BitStringLenOperator>)); |
106 | length.AddFunction(function: array_length_unary); |
107 | set.AddFunction(set: length); |
108 | length.name = "len" ; |
109 | set.AddFunction(set: length); |
110 | |
111 | ScalarFunctionSet length_grapheme("length_grapheme" ); |
112 | length_grapheme.AddFunction(function: ScalarFunction({LogicalType::VARCHAR}, LogicalType::BIGINT, |
113 | ScalarFunction::UnaryFunction<string_t, int64_t, GraphemeCountOperator>, |
114 | nullptr, nullptr, LengthPropagateStats)); |
115 | set.AddFunction(set: length_grapheme); |
116 | |
117 | ScalarFunctionSet array_length("array_length" ); |
118 | array_length.AddFunction(function: array_length_unary); |
119 | array_length.AddFunction(function: ScalarFunction( |
120 | {LogicalType::LIST(child: LogicalType::ANY), LogicalType::BIGINT}, LogicalType::BIGINT, |
121 | ScalarFunction::BinaryFunction<list_entry_t, int64_t, int64_t, ArrayLengthBinaryOperator>, ListLengthBind)); |
122 | set.AddFunction(set: array_length); |
123 | |
124 | set.AddFunction(function: ScalarFunction("strlen" , {LogicalType::VARCHAR}, LogicalType::BIGINT, |
125 | ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>)); |
126 | ScalarFunctionSet bit_length("bit_length" ); |
127 | bit_length.AddFunction(function: ScalarFunction({LogicalType::VARCHAR}, LogicalType::BIGINT, |
128 | ScalarFunction::UnaryFunction<string_t, int64_t, BitLenOperator>)); |
129 | bit_length.AddFunction(function: ScalarFunction({LogicalType::BIT}, LogicalType::BIGINT, |
130 | ScalarFunction::UnaryFunction<string_t, int64_t, BitStringLenOperator>)); |
131 | set.AddFunction(set: bit_length); |
132 | // length for BLOB type |
133 | ScalarFunctionSet octet_length("octet_length" ); |
134 | octet_length.AddFunction(function: ScalarFunction({LogicalType::BLOB}, LogicalType::BIGINT, |
135 | ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>)); |
136 | octet_length.AddFunction(function: ScalarFunction({LogicalType::BIT}, LogicalType::BIGINT, |
137 | ScalarFunction::UnaryFunction<string_t, int64_t, OctetLenOperator>)); |
138 | set.AddFunction(set: octet_length); |
139 | } |
140 | |
141 | } // namespace duckdb |
142 | |