1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/function/scalar/string_functions.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/function/function_set.hpp"
12#include "utf8proc.hpp"
13#include "duckdb/function/built_in_functions.hpp"
14
15namespace re2 {
16class RE2;
17}
18
19namespace duckdb {
20
21struct LowerFun {
22 static uint8_t ascii_to_lower_map[];
23
24 //! Returns the length of the result string obtained from lowercasing the given input (in bytes)
25 static idx_t LowerLength(const char *input_data, idx_t input_length);
26 //! Lowercases the string to the target output location, result_data must have space for at least LowerLength bytes
27 static void LowerCase(const char *input_data, idx_t input_length, char *result_data);
28
29 static ScalarFunction GetFunction();
30 static void RegisterFunction(BuiltinFunctions &set);
31};
32
33struct UpperFun {
34 static uint8_t ascii_to_upper_map[];
35
36 static void RegisterFunction(BuiltinFunctions &set);
37};
38
39struct StripAccentsFun {
40 static bool IsAscii(const char *input, idx_t n);
41 static ScalarFunction GetFunction();
42 static void RegisterFunction(BuiltinFunctions &set);
43};
44
45struct ConcatFun {
46 static void RegisterFunction(BuiltinFunctions &set);
47};
48
49struct LengthFun {
50 static void RegisterFunction(BuiltinFunctions &set);
51 static inline bool IsCharacter(char c) {
52 return (c & 0xc0) != 0x80;
53 }
54
55 template <class TA, class TR>
56 static inline TR Length(TA input) {
57 auto input_data = input.GetData();
58 auto input_length = input.GetSize();
59 TR length = 0;
60 for (idx_t i = 0; i < input_length; i++) {
61 length += IsCharacter(c: input_data[i]);
62 }
63 return length;
64 }
65
66 template <class TA, class TR>
67 static inline TR GraphemeCount(TA input) {
68 auto input_data = input.GetData();
69 auto input_length = input.GetSize();
70 for (idx_t i = 0; i < input_length; i++) {
71 if (input_data[i] & 0x80) {
72 int64_t length = 0;
73 // non-ascii character: use grapheme iterator on remainder of string
74 utf8proc_grapheme_callback(input_data, input_length, [&](size_t start, size_t end) {
75 length++;
76 return true;
77 });
78 return length;
79 }
80 }
81 return input_length;
82 }
83};
84
85struct LikeFun {
86 static void RegisterFunction(BuiltinFunctions &set);
87 DUCKDB_API static bool Glob(const char *s, idx_t slen, const char *pattern, idx_t plen,
88 bool allow_question_mark = true);
89};
90
91struct LikeEscapeFun {
92 static void RegisterFunction(BuiltinFunctions &set);
93};
94
95struct NFCNormalizeFun {
96 static ScalarFunction GetFunction();
97 static void RegisterFunction(BuiltinFunctions &set);
98};
99
100struct SubstringFun {
101 static void RegisterFunction(BuiltinFunctions &set);
102 static string_t SubstringUnicode(Vector &result, string_t input, int64_t offset, int64_t length);
103 static string_t SubstringGrapheme(Vector &result, string_t input, int64_t offset, int64_t length);
104};
105
106struct PrefixFun {
107 static ScalarFunction GetFunction();
108 static void RegisterFunction(BuiltinFunctions &set);
109};
110
111struct SuffixFun {
112 static ScalarFunction GetFunction();
113 static void RegisterFunction(BuiltinFunctions &set);
114};
115
116struct ContainsFun {
117 static ScalarFunction GetFunction();
118 static void RegisterFunction(BuiltinFunctions &set);
119 static idx_t Find(const string_t &haystack, const string_t &needle);
120 static idx_t Find(const unsigned char *haystack, idx_t haystack_size, const unsigned char *needle,
121 idx_t needle_size);
122};
123
124struct RegexpFun {
125 static void RegisterFunction(BuiltinFunctions &set);
126};
127
128} // namespace duckdb
129