1#include "duckdb/function/scalar/string_functions.hpp"
2#include "duckdb/common/types/bit.hpp"
3
4#include "duckdb/common/exception.hpp"
5#include "duckdb/common/vector_operations/vector_operations.hpp"
6#include "duckdb/planner/expression/bound_function_expression.hpp"
7
8#include "duckdb/planner/expression/bound_parameter_expression.hpp"
9#include "utf8proc.hpp"
10
11namespace duckdb {
12
13// length returns the number of unicode codepoints
14struct StringLengthOperator {
15 template <class TA, class TR>
16 static inline TR Operation(TA input) {
17 return LengthFun::Length<TA, TR>(input);
18 }
19};
20
21struct GraphemeCountOperator {
22 template <class TA, class TR>
23 static inline TR Operation(TA input) {
24 return LengthFun::GraphemeCount<TA, TR>(input);
25 }
26};
27
28struct ArrayLengthOperator {
29 template <class TA, class TR>
30 static inline TR Operation(TA input) {
31 return input.length;
32 }
33};
34
35struct ArrayLengthBinaryOperator {
36 template <class TA, class TB, class TR>
37 static inline TR Operation(TA input, TB dimension) {
38 if (dimension != 1) {
39 throw NotImplementedException("array_length for dimensions other than 1 not implemented");
40 }
41 return input.length;
42 }
43};
44
45// strlen returns the size in bytes
46struct StrLenOperator {
47 template <class TA, class TR>
48 static inline TR Operation(TA input) {
49 return input.GetSize();
50 }
51};
52
53struct OctetLenOperator {
54 template <class TA, class TR>
55 static inline TR Operation(TA input) {
56 return Bit::OctetLength(bits: input);
57 }
58};
59
60// bitlen returns the size in bits
61struct BitLenOperator {
62 template <class TA, class TR>
63 static inline TR Operation(TA input) {
64 return 8 * input.GetSize();
65 }
66};
67
68// bitstringlen returns the amount of bits in a bitstring
69struct BitStringLenOperator {
70 template <class TA, class TR>
71 static inline TR Operation(TA input) {
72 return Bit::BitLength(bits: input);
73 }
74};
75
76static unique_ptr<BaseStatistics> LengthPropagateStats(ClientContext &context, FunctionStatisticsInput &input) {
77 auto &child_stats = input.child_stats;
78 auto &expr = input.expr;
79 D_ASSERT(child_stats.size() == 1);
80 // can only propagate stats if the children have stats
81 if (!StringStats::CanContainUnicode(stats: child_stats[0])) {
82 expr.function.function = ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>;
83 }
84 return nullptr;
85}
86
87static unique_ptr<FunctionData> ListLengthBind(ClientContext &context, ScalarFunction &bound_function,
88 vector<unique_ptr<Expression>> &arguments) {
89 if (arguments[0]->HasParameter()) {
90 throw ParameterNotResolvedException();
91 }
92 bound_function.arguments[0] = arguments[0]->return_type;
93 return nullptr;
94}
95
96void LengthFun::RegisterFunction(BuiltinFunctions &set) {
97 ScalarFunction array_length_unary =
98 ScalarFunction({LogicalType::LIST(child: LogicalType::ANY)}, LogicalType::BIGINT,
99 ScalarFunction::UnaryFunction<list_entry_t, int64_t, ArrayLengthOperator>, ListLengthBind);
100 ScalarFunctionSet length("length");
101 length.AddFunction(function: ScalarFunction({LogicalType::VARCHAR}, LogicalType::BIGINT,
102 ScalarFunction::UnaryFunction<string_t, int64_t, StringLengthOperator>, nullptr,
103 nullptr, LengthPropagateStats));
104 length.AddFunction(function: ScalarFunction({LogicalType::BIT}, LogicalType::BIGINT,
105 ScalarFunction::UnaryFunction<string_t, int64_t, BitStringLenOperator>));
106 length.AddFunction(function: array_length_unary);
107 set.AddFunction(set: length);
108 length.name = "len";
109 set.AddFunction(set: length);
110
111 ScalarFunctionSet length_grapheme("length_grapheme");
112 length_grapheme.AddFunction(function: ScalarFunction({LogicalType::VARCHAR}, LogicalType::BIGINT,
113 ScalarFunction::UnaryFunction<string_t, int64_t, GraphemeCountOperator>,
114 nullptr, nullptr, LengthPropagateStats));
115 set.AddFunction(set: length_grapheme);
116
117 ScalarFunctionSet array_length("array_length");
118 array_length.AddFunction(function: array_length_unary);
119 array_length.AddFunction(function: ScalarFunction(
120 {LogicalType::LIST(child: LogicalType::ANY), LogicalType::BIGINT}, LogicalType::BIGINT,
121 ScalarFunction::BinaryFunction<list_entry_t, int64_t, int64_t, ArrayLengthBinaryOperator>, ListLengthBind));
122 set.AddFunction(set: array_length);
123
124 set.AddFunction(function: ScalarFunction("strlen", {LogicalType::VARCHAR}, LogicalType::BIGINT,
125 ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>));
126 ScalarFunctionSet bit_length("bit_length");
127 bit_length.AddFunction(function: ScalarFunction({LogicalType::VARCHAR}, LogicalType::BIGINT,
128 ScalarFunction::UnaryFunction<string_t, int64_t, BitLenOperator>));
129 bit_length.AddFunction(function: ScalarFunction({LogicalType::BIT}, LogicalType::BIGINT,
130 ScalarFunction::UnaryFunction<string_t, int64_t, BitStringLenOperator>));
131 set.AddFunction(set: bit_length);
132 // length for BLOB type
133 ScalarFunctionSet octet_length("octet_length");
134 octet_length.AddFunction(function: ScalarFunction({LogicalType::BLOB}, LogicalType::BIGINT,
135 ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>));
136 octet_length.AddFunction(function: ScalarFunction({LogicalType::BIT}, LogicalType::BIGINT,
137 ScalarFunction::UnaryFunction<string_t, int64_t, OctetLenOperator>));
138 set.AddFunction(set: octet_length);
139}
140
141} // namespace duckdb
142