1 | #include "duckdb/function/scalar/string_functions.hpp" |
2 | |
3 | #include "duckdb/common/exception.hpp" |
4 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
5 | #include "duckdb/common/vector_operations/unary_executor.hpp" |
6 | #include "utf8proc.hpp" |
7 | |
8 | #include <string.h> |
9 | |
10 | using namespace std; |
11 | |
12 | namespace duckdb { |
13 | |
14 | struct SpaceChar { |
15 | static char Operation(utf8proc_int32_t codepoint) { |
16 | return UTF8PROC_CATEGORY_ZS == utf8proc_category(codepoint); |
17 | } |
18 | }; |
19 | |
20 | struct KeptChar { |
21 | static char Operation(utf8proc_int32_t codepoint) { |
22 | return false; |
23 | } |
24 | }; |
25 | |
26 | template <class LTRIM, class RTRIM> static void trim_function(Vector &input, Vector &result, idx_t count) { |
27 | assert(input.type == TypeId::VARCHAR); |
28 | |
29 | UnaryExecutor::Execute<string_t, string_t, true>(input, result, count, [&](string_t input) { |
30 | const auto data = input.GetData(); |
31 | const auto size = input.GetSize(); |
32 | |
33 | utf8proc_int32_t codepoint; |
34 | const auto str = reinterpret_cast<const utf8proc_uint8_t *>(data); |
35 | |
36 | // Find the first character that is not left trimmed |
37 | idx_t begin = 0; |
38 | while (begin < size) { |
39 | const auto bytes = utf8proc_iterate(str + begin, size - begin, &codepoint); |
40 | assert(bytes > 0); |
41 | if (!LTRIM::Operation(codepoint)) { |
42 | break; |
43 | } |
44 | begin += bytes; |
45 | } |
46 | |
47 | // Find the last character that is not right trimmed |
48 | idx_t end = size; |
49 | for (auto next = begin; next < size;) { |
50 | const auto bytes = utf8proc_iterate(str + next, size - next, &codepoint); |
51 | assert(bytes > 0); |
52 | next += bytes; |
53 | if (!RTRIM::Operation(codepoint)) { |
54 | end = next; |
55 | } |
56 | } |
57 | |
58 | // Copy the trimmed string |
59 | auto target = StringVector::EmptyString(result, end - begin); |
60 | auto output = target.GetData(); |
61 | memcpy(output, data + begin, end - begin); |
62 | |
63 | target.Finalize(); |
64 | return target; |
65 | }); |
66 | } |
67 | |
68 | static void trim_ltrim_function(DataChunk &args, ExpressionState &state, Vector &result) { |
69 | assert(args.column_count() == 1); |
70 | trim_function<SpaceChar, KeptChar>(args.data[0], result, args.size()); |
71 | } |
72 | |
73 | static void trim_rtrim_function(DataChunk &args, ExpressionState &state, Vector &result) { |
74 | assert(args.column_count() == 1); |
75 | trim_function<KeptChar, SpaceChar>(args.data[0], result, args.size()); |
76 | } |
77 | |
78 | void LtrimFun::RegisterFunction(BuiltinFunctions &set) { |
79 | set.AddFunction(ScalarFunction("ltrim" , {SQLType::VARCHAR}, SQLType::VARCHAR, trim_ltrim_function)); |
80 | } |
81 | |
82 | void RtrimFun::RegisterFunction(BuiltinFunctions &set) { |
83 | set.AddFunction(ScalarFunction("rtrim" , {SQLType::VARCHAR}, SQLType::VARCHAR, trim_rtrim_function)); |
84 | } |
85 | |
86 | } // namespace duckdb |
87 | |