1#include "duckdb/function/scalar/string_functions.hpp"
2
3#include "utf8proc.hpp"
4
5using namespace std;
6
7namespace duckdb {
8
9static bool is_ascii(const char *input, idx_t n) {
10 for (idx_t i = 0; i < n; i++) {
11 if (input[i] & 0x80) {
12 // non-ascii character
13 return false;
14 }
15 }
16 return true;
17}
18
19static void strip_accents_function(DataChunk &args, ExpressionState &state, Vector &result) {
20 assert(args.column_count() == 1);
21 assert(args.data[0].type == TypeId::VARCHAR);
22
23 UnaryExecutor::Execute<string_t, string_t, true>(args.data[0], result, args.size(), [&](string_t input) {
24 auto input_data = input.GetData();
25 auto input_length = input.GetSize();
26 if (is_ascii(input_data, input_length)) {
27 return input;
28 }
29 // non-ascii, perform collation
30 auto stripped = utf8proc_remove_accents((const utf8proc_uint8_t *) input_data);
31 auto result_str = StringVector::AddString(result, (const char*) stripped);
32 free(stripped);
33 return result_str;
34 });
35 StringVector::AddHeapReference(result, args.data[0]);
36}
37
38ScalarFunction StripAccentsFun::GetFunction() {
39 return ScalarFunction("strip_accents", {SQLType::VARCHAR}, SQLType::VARCHAR, strip_accents_function);
40}
41
42void StripAccentsFun::RegisterFunction(BuiltinFunctions &set) {
43 set.AddFunction(StripAccentsFun::GetFunction());
44}
45
46} // namespace duckdb
47