1#include "duckdb/function/scalar/string_functions.hpp"
2
3#include "duckdb/common/exception.hpp"
4#include "duckdb/common/vector_operations/vector_operations.hpp"
5#include "duckdb/common/vector_operations/ternary_executor.hpp"
6#include "utf8proc.hpp"
7
8using namespace std;
9
10namespace duckdb {
11
12static pair<idx_t, idx_t> count_chars(const idx_t len, const char *data, const idx_t size) {
13 // Count how much of str will fit in the output
14 auto str = reinterpret_cast<const utf8proc_uint8_t *>(data);
15 idx_t nbytes = 0;
16 idx_t nchars = 0;
17 for (; nchars < len && nbytes < size; ++nchars) {
18 utf8proc_int32_t codepoint;
19 const auto bytes = utf8proc_iterate(str + nbytes, size - nbytes, &codepoint);
20 assert(bytes > 0);
21 nbytes += bytes;
22 }
23
24 return pair<idx_t, idx_t>(nbytes, nchars);
25}
26
27static bool insert_padding(const idx_t len, const string_t &pad, vector<char> &result) {
28 // Copy the padding until the output is long enough
29 const auto data = pad.GetData();
30 const auto size = pad.GetSize();
31
32 // Check whether we need data that we don't have
33 if (len > 0 && size == 0) {
34 return false;
35 }
36
37 // Insert characters until we have all we need.
38 auto str = reinterpret_cast<const utf8proc_uint8_t *>(data);
39 idx_t nbytes = 0;
40 for (idx_t nchars = 0; nchars < len; ++nchars) {
41 // If we are at the end of the pad, flush all of it and loop back
42 if (nbytes >= size) {
43 result.insert(result.end(), data, data + size);
44 nbytes = 0;
45 }
46
47 // Write the next character
48 utf8proc_int32_t codepoint;
49 const auto bytes = utf8proc_iterate(str + nbytes, size - nbytes, &codepoint);
50 assert(bytes > 0);
51 nbytes += bytes;
52 }
53
54 // Flush the remaining pad
55 result.insert(result.end(), data, data + nbytes);
56
57 return true;
58}
59
60static string_t lpad(const string_t &str, const int32_t len, const string_t &pad, vector<char> &result) {
61 // Reuse the buffer
62 result.clear();
63
64 // Get information about the base string
65 const auto data_str = str.GetData();
66 const auto size_str = str.GetSize();
67
68 // Count how much of str will fit in the output
69 const auto written = count_chars(len, data_str, size_str);
70
71 // Left pad by the number of characters still needed
72 if (!insert_padding(len - written.second, pad, result)) {
73 throw Exception("Insufficient padding in LPAD.");
74 }
75
76 // Append as much of the original string as fits
77 result.insert(result.end(), data_str, data_str + written.first);
78
79 return string_t(result.data(), result.size());
80}
81
82struct LpadOperator {
83 static inline string_t Operation(const string_t &str, const int32_t len, const string_t &pad,
84 vector<char> &result) {
85 return lpad(str, len, pad, result);
86 }
87};
88
89static string_t rpad(const string_t &str, const int32_t len, const string_t &pad, vector<char> &result) {
90 // Reuse the buffer
91 result.clear();
92
93 // Get information about the base string
94 const auto data_str = str.GetData();
95 const auto size_str = str.GetSize();
96
97 // Count how much of str will fit in the output
98 const auto written = count_chars(len, data_str, size_str);
99
100 // Append as much of the original string as fits
101 result.insert(result.end(), data_str, data_str + written.first);
102
103 // Right pad by the number of characters still needed
104 if (!insert_padding(len - written.second, pad, result)) {
105 throw Exception("Insufficient padding in RPAD.");
106 };
107
108 return string_t(result.data(), result.size());
109}
110
111struct RpadOperator {
112 static inline string_t Operation(const string_t &str, const int32_t len, const string_t &pad,
113 vector<char> &result) {
114 return rpad(str, len, pad, result);
115 }
116};
117
118template <class Op> static void pad_function(DataChunk &args, ExpressionState &state, Vector &result) {
119 assert(args.column_count() == 3 && args.data[0].type == TypeId::VARCHAR && args.data[1].type == TypeId::INT32 &&
120 args.data[2].type == TypeId::VARCHAR);
121 auto &str_vector = args.data[0];
122 auto &len_vector = args.data[1];
123 auto &pad_vector = args.data[2];
124
125 vector<char> buffer;
126 TernaryExecutor::Execute<string_t, int32_t, string_t, string_t>(
127 str_vector, len_vector, pad_vector, result, args.size(), [&](string_t str, int32_t len, string_t pad) {
128 len = max(len, int32_t(0));
129 return StringVector::AddString(result, Op::Operation(str, len, pad, buffer));
130 });
131}
132
133void LpadFun::RegisterFunction(BuiltinFunctions &set) {
134 set.AddFunction(ScalarFunction("lpad", // name of the function
135 {SQLType::VARCHAR, SQLType::INTEGER, // argument list
136 SQLType::VARCHAR},
137 SQLType::VARCHAR, // return type
138 pad_function<LpadOperator>)); // pointer to function implementation
139}
140
141void RpadFun::RegisterFunction(BuiltinFunctions &set) {
142 set.AddFunction(ScalarFunction("rpad", // name of the function
143 {SQLType::VARCHAR, SQLType::INTEGER, // argument list
144 SQLType::VARCHAR},
145 SQLType::VARCHAR, // return type
146 pad_function<RpadOperator>)); // pointer to function implementation
147}
148
149} // namespace duckdb
150