1#include "duckdb/function/scalar/string_functions.hpp"
2
3#include "duckdb/common/exception.hpp"
4#include "duckdb/common/vector_operations/vector_operations.hpp"
5#include "duckdb/common/vector_operations/unary_executor.hpp"
6#include "utf8proc.hpp"
7
8#include <string.h>
9
10using namespace std;
11
12namespace duckdb {
13
14struct SpaceChar {
15 static char Operation(utf8proc_int32_t codepoint) {
16 return UTF8PROC_CATEGORY_ZS == utf8proc_category(codepoint);
17 }
18};
19
20struct KeptChar {
21 static char Operation(utf8proc_int32_t codepoint) {
22 return false;
23 }
24};
25
26template <class LTRIM, class RTRIM> static void trim_function(Vector &input, Vector &result, idx_t count) {
27 assert(input.type == TypeId::VARCHAR);
28
29 UnaryExecutor::Execute<string_t, string_t, true>(input, result, count, [&](string_t input) {
30 const auto data = input.GetData();
31 const auto size = input.GetSize();
32
33 utf8proc_int32_t codepoint;
34 const auto str = reinterpret_cast<const utf8proc_uint8_t *>(data);
35
36 // Find the first character that is not left trimmed
37 idx_t begin = 0;
38 while (begin < size) {
39 const auto bytes = utf8proc_iterate(str + begin, size - begin, &codepoint);
40 assert(bytes > 0);
41 if (!LTRIM::Operation(codepoint)) {
42 break;
43 }
44 begin += bytes;
45 }
46
47 // Find the last character that is not right trimmed
48 idx_t end = size;
49 for (auto next = begin; next < size;) {
50 const auto bytes = utf8proc_iterate(str + next, size - next, &codepoint);
51 assert(bytes > 0);
52 next += bytes;
53 if (!RTRIM::Operation(codepoint)) {
54 end = next;
55 }
56 }
57
58 // Copy the trimmed string
59 auto target = StringVector::EmptyString(result, end - begin);
60 auto output = target.GetData();
61 memcpy(output, data + begin, end - begin);
62
63 target.Finalize();
64 return target;
65 });
66}
67
68static void trim_ltrim_function(DataChunk &args, ExpressionState &state, Vector &result) {
69 assert(args.column_count() == 1);
70 trim_function<SpaceChar, KeptChar>(args.data[0], result, args.size());
71}
72
73static void trim_rtrim_function(DataChunk &args, ExpressionState &state, Vector &result) {
74 assert(args.column_count() == 1);
75 trim_function<KeptChar, SpaceChar>(args.data[0], result, args.size());
76}
77
78void LtrimFun::RegisterFunction(BuiltinFunctions &set) {
79 set.AddFunction(ScalarFunction("ltrim", {SQLType::VARCHAR}, SQLType::VARCHAR, trim_ltrim_function));
80}
81
82void RtrimFun::RegisterFunction(BuiltinFunctions &set) {
83 set.AddFunction(ScalarFunction("rtrim", {SQLType::VARCHAR}, SQLType::VARCHAR, trim_rtrim_function));
84}
85
86} // namespace duckdb
87