1#include "duckdb/function/scalar/string_functions.hpp"
2
3#include "utf8proc.hpp"
4
5namespace duckdb {
6
7bool StripAccentsFun::IsAscii(const char *input, idx_t n) {
8 for (idx_t i = 0; i < n; i++) {
9 if (input[i] & 0x80) {
10 // non-ascii character
11 return false;
12 }
13 }
14 return true;
15}
16
17struct StripAccentsOperator {
18 template <class INPUT_TYPE, class RESULT_TYPE>
19 static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
20 if (StripAccentsFun::IsAscii(input: input.GetData(), n: input.GetSize())) {
21 return input;
22 }
23
24 // non-ascii, perform collation
25 auto stripped = utf8proc_remove_accents((const utf8proc_uint8_t *)input.GetData(), input.GetSize());
26 auto result_str = StringVector::AddString(result, const_char_ptr_cast(stripped));
27 free(stripped);
28 return result_str;
29 }
30};
31
32static void StripAccentsFunction(DataChunk &args, ExpressionState &state, Vector &result) {
33 D_ASSERT(args.ColumnCount() == 1);
34
35 UnaryExecutor::ExecuteString<string_t, string_t, StripAccentsOperator>(input&: args.data[0], result, count: args.size());
36 StringVector::AddHeapReference(vector&: result, other&: args.data[0]);
37}
38
39ScalarFunction StripAccentsFun::GetFunction() {
40 return ScalarFunction("strip_accents", {LogicalType::VARCHAR}, LogicalType::VARCHAR, StripAccentsFunction);
41}
42
43void StripAccentsFun::RegisterFunction(BuiltinFunctions &set) {
44 set.AddFunction(function: StripAccentsFun::GetFunction());
45}
46
47} // namespace duckdb
48