pad.cpp source code [DuckDB/src/function/scalar/string/pad.cpp]

1	#include "duckdb/function/scalar/string_functions.hpp"
2
3	#include "duckdb/common/exception.hpp"
4	#include "duckdb/common/vector_operations/vector_operations.hpp"
5	#include "duckdb/common/vector_operations/ternary_executor.hpp"
6	#include "utf8proc.hpp"
7
8	using namespace std;
9
10	namespace duckdb {
11
12	static pair<idx_t, idx_t> count_chars(const idx_t len, const char data, const* idx_t size) {
13	// Count how much of str will fit in the output
14	auto str = reinterpret_cast<const utf8proc_uint8_t *>(data);
15	idx_t nbytes = `0`;
16	idx_t nchars = `0`;
17	for (; nchars < len && nbytes < size; ++nchars) {
18	utf8proc_int32_t codepoint;
19	const auto bytes = utf8proc_iterate(str + nbytes, size - nbytes, &codepoint);
20	assert(bytes > `0`);
21	nbytes += bytes;
22	}
23
24	return pair<idx_t, idx_t>(nbytes, nchars);
25	}
26
27	static bool insert_padding(const idx_t len, const string_t &pad, vector<char> &result) {
28	// Copy the padding until the output is long enough
29	const auto data = pad.GetData();
30	const auto size = pad.GetSize();
31
32	// Check whether we need data that we don't have
33	if (len > `0` && size == `0`) {
34	return false;
35	}
36
37	// Insert characters until we have all we need.
38	auto str = reinterpret_cast<const utf8proc_uint8_t *>(data);
39	idx_t nbytes = `0`;
40	for (idx_t nchars = `0`; nchars < len; ++nchars) {
41	// If we are at the end of the pad, flush all of it and loop back
42	if (nbytes >= size) {
43	result.insert(result.end(), data, data + size);
44	nbytes = `0`;
45	}
46
47	// Write the next character
48	utf8proc_int32_t codepoint;
49	const auto bytes = utf8proc_iterate(str + nbytes, size - nbytes, &codepoint);
50	assert(bytes > `0`);
51	nbytes += bytes;
52	}
53
54	// Flush the remaining pad
55	result.insert(result.end(), data, data + nbytes);
56
57	return true;
58	}
59
60	static string_t lpad(const string_t &str, const int32_t len, const string_t &pad, vector<char> &result) {
61	// Reuse the buffer
62	result.clear();
63
64	// Get information about the base string
65	const auto data_str = str.GetData();
66	const auto size_str = str.GetSize();
67
68	// Count how much of str will fit in the output
69	const auto written = count_chars(len, data_str, size_str);
70
71	// Left pad by the number of characters still needed
72	if (!insert_padding(len - written.second, pad, result)) {
73	throw Exception ("Insufficient padding in LPAD.");
74	}
75
76	// Append as much of the original string as fits
77	result.insert(result.end(), data_str, data_str + written.first);
78
79	return string_t (result.data(), result.size());
80	}
81
82	struct LpadOperator {
83	static inline string_t Operation(const string_t &str, const int32_t len, const string_t &pad,
84	vector<char> &result) {
85	return lpad(str, len, pad, result);
86	}
87	};
88
89	static string_t rpad(const string_t &str, const int32_t len, const string_t &pad, vector<char> &result) {
90	// Reuse the buffer
91	result.clear();
92
93	// Get information about the base string
94	const auto data_str = str.GetData();
95	const auto size_str = str.GetSize();
96
97	// Count how much of str will fit in the output
98	const auto written = count_chars(len, data_str, size_str);
99
100	// Append as much of the original string as fits
101	result.insert(result.end(), data_str, data_str + written.first);
102
103	// Right pad by the number of characters still needed
104	if (!insert_padding(len - written.second, pad, result)) {
105	throw Exception ("Insufficient padding in RPAD.");
106	};
107
108	return string_t (result.data(), result.size());
109	}
110
111	struct RpadOperator {
112	static inline string_t Operation(const string_t &str, const int32_t len, const string_t &pad,
113	vector<char> &result) {
114	return rpad(str, len, pad, result);
115	}
116	};
117
118	template <class Op> static void pad_function(DataChunk &args, ExpressionState &state, Vector &result) {
119	assert(args.column_count() == `3` && args.data[`0`].type == TypeId::VARCHAR && args.data[`1`].type == TypeId::INT32 &&
120	args.data[`2`].type == TypeId::VARCHAR);
121	auto &str_vector = args.data [`0`];
122	auto &len_vector = args.data [`1`];
123	auto &pad_vector = args.data [`2`];
124
125	vector<char> buffer;
126	TernaryExecutor::Execute<string_t, int32_t, string_t, string_t>(
127	str_vector, len_vector, pad_vector, result, args.size(), [&](string_t str, int32_t len, string_t pad) {
128	len = max(len, int32_t(`0`));
129	return StringVector::AddString(result, Op::Operation(str, len, pad, buffer));
130	});
131	}
132
133	void LpadFun::RegisterFunction(BuiltinFunctions &set) {
134	set.AddFunction(ScalarFunction ("lpad", // name of the function
135	{SQLType::VARCHAR, SQLType::INTEGER, // argument list
136	SQLType::VARCHAR},
137	SQLType::VARCHAR, // return type
138	pad_function<LpadOperator>)); // pointer to function implementation
139	}
140
141	void RpadFun::RegisterFunction(BuiltinFunctions &set) {
142	set.AddFunction(ScalarFunction ("rpad", // name of the function
143	{SQLType::VARCHAR, SQLType::INTEGER, // argument list
144	SQLType::VARCHAR},
145	SQLType::VARCHAR, // return type
146	pad_function<RpadOperator>)); // pointer to function implementation
147	}
148
149	} // namespace duckdb
150

Browse the source code of DuckDB/src/function/scalar/string/pad.cpp