concat.cpp source code [DuckDB/src/function/scalar/string/concat.cpp]

1	#include "duckdb/function/scalar/string_functions.hpp"
2
3	#include "duckdb/common/exception.hpp"
4	#include "duckdb/common/types/date.hpp"
5	#include "duckdb/common/vector_operations/vector_operations.hpp"
6	#include "duckdb/common/vector_operations/binary_executor.hpp"
7
8	#include <string.h>
9
10	using namespace std;
11
12	namespace duckdb {
13
14	static void concat_function(DataChunk &args, ExpressionState &state, Vector &result) {
15	result.vector_type = VectorType::CONSTANT_VECTOR;
16	// iterate over the vectors to count how large the final string will be
17	idx_t constant_lengths = `0`;
18	vector<idx_t> result_lengths(args.size(), `0`);
19	for (idx_t col_idx = `0`; col_idx < args.column_count(); col_idx++) {
20	auto &input = args.data [col_idx];
21	assert(input.type == TypeId::VARCHAR);
22	if (input.vector_type == VectorType::CONSTANT_VECTOR) {
23	if (ConstantVector::IsNull(input)) {
24	// constant null, skip
25	continue;
26	}
27	auto input_data = ConstantVector::GetData<string_t>(input);
28	constant_lengths += input_data->GetSize();
29	} else {
30	// non-constant vector: set the result type to a flat vector
31	result.vector_type = VectorType::FLAT_VECTOR;
32	// now get the lengths of each of the input elements
33	VectorData vdata;
34	input.Orrify(args.size(), vdata);
35
36	auto input_data = (string_t *)vdata.data;
37	// now add the length of each vector to the result length
38	for (idx_t i = `0`; i < args.size(); i++) {
39	auto idx = vdata.sel->get_index(i);
40	if ((*vdata.nullmask)[idx]) {
41	continue;
42	}
43	result_lengths [i] += input_data[idx].GetSize();
44	}
45	}
46	}
47
48	// first we allocate the empty strings for each of the values
49	auto result_data = FlatVector::GetData<string_t>(result);
50	for (idx_t i = `0`; i < args.size(); i++) {
51	// allocate an empty string of the required size
52	idx_t str_length = constant_lengths + result_lengths [i];
53	result_data[i] = StringVector::EmptyString(result, str_length);
54	// we reuse the result_lengths vector to store the currently appended size
55	result_lengths [i] = `0`;
56	}
57
58	// now that the empty space for the strings has been allocated, perform the concatenation
59	for (idx_t col_idx = `0`; col_idx < args.column_count(); col_idx++) {
60	auto &input = args.data [col_idx];
61
62	// loop over the vector and concat to all results
63	if (input.vector_type == VectorType::CONSTANT_VECTOR) {
64	// constant vector
65	if (ConstantVector::IsNull(input)) {
66	// constant null, skip
67	continue;
68	}
69	// append the constant vector to each of the strings
70	auto input_data = ConstantVector::GetData<string_t>(input);
71	auto input_ptr = input_data->GetData();
72	auto input_len = input_data->GetSize();
73	for (idx_t i = `0`; i < args.size(); i++) {
74	memcpy(result_data[i].GetData() + result_lengths [i], input_ptr, input_len);
75	result_lengths [i] += input_len;
76	}
77	} else {
78	// standard vector
79	VectorData idata;
80	input.Orrify(args.size(), idata);
81
82	auto input_data = (string_t *)idata.data;
83	for (idx_t i = `0`; i < args.size(); i++) {
84	auto idx = idata.sel->get_index(i);
85	if ((*idata.nullmask)[idx]) {
86	continue;
87	}
88	auto input_ptr = input_data[idx].GetData();
89	auto input_len = input_data[idx].GetSize();
90	memcpy(result_data[i].GetData() + result_lengths [i], input_ptr, input_len);
91	result_lengths [i] += input_len;
92	}
93	}
94	}
95	for (idx_t i = `0`; i < args.size(); i++) {
96	result_data[i].Finalize();
97	}
98	}
99
100	static void concat_operator(DataChunk &args, ExpressionState &state, Vector &result) {
101	BinaryExecutor::Execute<string_t, string_t, string_t, true>(
102	args.data [`0`], args.data [`1`], result, args.size(), [&](string_t a, string_t b) {
103	auto a_data = a.GetData();
104	auto b_data = b.GetData();
105	auto a_length = a.GetSize();
106	auto b_length = b.GetSize();
107
108	auto target_length = a_length + b_length;
109	auto target = StringVector::EmptyString(result, target_length);
110	auto target_data = target.GetData();
111
112	memcpy(target_data, a_data, a_length);
113	memcpy(target_data + a_length, b_data, b_length);
114	target.Finalize();
115	return target;
116	});
117	}
118
119	static void templated_concat_ws(DataChunk &args, string_t sep_data, const* SelectionVector &sep_sel,
120	const SelectionVector &rsel, idx_t count, Vector &result) {
121	vector<idx_t> result_lengths(args.size(), `0`);
122	vector<bool> has_results(args.size(), false);
123	auto orrified_data = unique_ptr<VectorData[]>(new VectorData[args.column_count() - `1`]);
124	for (idx_t col_idx = `1`; col_idx < args.column_count(); col_idx++) {
125	args.data [col_idx].Orrify(args.size(), orrified_data [col_idx - `1`]);
126	}
127
128	// first figure out the lengths
129	for (idx_t col_idx = `1`; col_idx < args.column_count(); col_idx++) {
130	auto &idata = orrified_data [col_idx - `1`];
131
132	auto input_data = (string_t *)idata.data;
133	for (idx_t i = `0`; i < count; i++) {
134	auto ridx = rsel.get_index(i);
135	auto sep_idx = sep_sel.get_index(ridx);
136	auto idx = idata.sel->get_index(ridx);
137	if ((*idata.nullmask)[idx]) {
138	continue;
139	}
140	if (has_results [ridx]) {
141	result_lengths [ridx] += sep_data[sep_idx].GetSize();
142	}
143	result_lengths [ridx] += input_data[idx].GetSize();
144	has_results [ridx] = true;
145	}
146	}
147
148	// first we allocate the empty strings for each of the values
149	auto result_data = FlatVector::GetData<string_t>(result);
150	for (idx_t i = `0`; i < count; i++) {
151	auto ridx = rsel.get_index(i);
152	// allocate an empty string of the required size
153	result_data[ridx] = StringVector::EmptyString(result, result_lengths [ridx]);
154	// we reuse the result_lengths vector to store the currently appended size
155	result_lengths [ridx] = `0`;
156	has_results [ridx] = false;
157	}
158
159	// now that the empty space for the strings has been allocated, perform the concatenation
160	for (idx_t col_idx = `1`; col_idx < args.column_count(); col_idx++) {
161	auto &idata = orrified_data [col_idx - `1`];
162	auto input_data = (string_t *)idata.data;
163	for (idx_t i = `0`; i < count; i++) {
164	auto ridx = rsel.get_index(i);
165	auto sep_idx = sep_sel.get_index(ridx);
166	auto idx = idata.sel->get_index(ridx);
167	if ((*idata.nullmask)[idx]) {
168	continue;
169	}
170	if (has_results [ridx]) {
171	auto sep_size = sep_data[sep_idx].GetSize();
172	auto sep_ptr = sep_data[sep_idx].GetData();
173	memcpy(result_data[ridx].GetData() + result_lengths [ridx], sep_ptr, sep_size);
174	result_lengths [ridx] += sep_size;
175	}
176	auto input_ptr = input_data[idx].GetData();
177	auto input_len = input_data[idx].GetSize();
178	memcpy(result_data[ridx].GetData() + result_lengths [ridx], input_ptr, input_len);
179	result_lengths [ridx] += input_len;
180	has_results [ridx] = true;
181	}
182	}
183	for (idx_t i = `0`; i < count; i++) {
184	auto ridx = rsel.get_index(i);
185	result_data[ridx].Finalize();
186	}
187	}
188
189	static void concat_ws_function(DataChunk &args, ExpressionState &state, Vector &result) {
190	auto &separator = args.data [`0`];
191	VectorData vdata;
192	separator.Orrify(args.size(), vdata);
193
194	result.vector_type = VectorType::CONSTANT_VECTOR;
195	for (idx_t col_idx = `0`; col_idx < args.column_count(); col_idx++) {
196	if (args.data [col_idx].vector_type != VectorType::CONSTANT_VECTOR) {
197	result.vector_type = VectorType::FLAT_VECTOR;
198	break;
199	}
200	}
201	switch (separator.vector_type) {
202	case VectorType::CONSTANT_VECTOR:
203	if (ConstantVector::IsNull(separator)) {
204	// constant NULL as separator: return constant NULL vector
205	result.vector_type = VectorType::CONSTANT_VECTOR;
206	ConstantVector::SetNull(result, true);
207	return;
208	}
209	// no null values
210	templated_concat_ws(args, (string_t )vdata.data, vdata.sel, FlatVector::IncrementalSelectionVector,
211	args.size(), result);
212	return;
213	default: {
214	// default case: loop over nullmask and create a non-null selection vector
215	idx_t not_null_count = `0`;
216	SelectionVector not_null_vector(STANDARD_VECTOR_SIZE);
217	auto &result_nullmask = FlatVector::Nullmask(result);
218	for (idx_t i = `0`; i < args.size(); i++) {
219	if ((*vdata.nullmask)[vdata.sel->get_index(i)]) {
220	result_nullmask [i] = true;
221	} else {
222	not_null_vector.set_index(not_null_count++, i);
223	}
224	}
225	templated_concat_ws(args, (string_t )vdata.data, vdata.sel, not_null_vector, not_null_count, result);
226	return;
227	}
228	}
229	}
230
231	void ConcatFun::RegisterFunction(BuiltinFunctions &set) {
232	// the concat operator and concat function have different behavior regarding NULLs
233	// this is strange but seems consistent with postgresql and mysql
234	// (sqlite does not support the concat function, only the concat operator)
235
236	// the concat operator behaves as one would expect: any NULL value present results in a NULL
237	// i.e. NULL \|\| 'hello' = NULL
238	// the concat function, however, treats NULL values as an empty string
239	// i.e. concat(NULL, 'hello') = 'hello'
240	// concat_ws functions similarly to the concat function, except the result is NULL if the separator is NULL
241	// if the separator is not NULL, however, NULL values are counted as empty string
242	// there is one separate rule: there are no separators added between NULL values
243	// so the NULL value and empty string are different!
244	// e.g.:
245	// concat_ws(',', NULL, NULL) = ""
246	// concat_ws(',', '', '') = ","
247	ScalarFunction concat = ScalarFunction ("concat", {SQLType::VARCHAR}, SQLType::VARCHAR, concat_function);
248	concat.varargs = SQLType::VARCHAR;
249	set.AddFunction(concat);
250
251	ScalarFunctionSet concat_op("\|\|");
252	concat_op.AddFunction(ScalarFunction ({SQLType::VARCHAR, SQLType::VARCHAR}, SQLType::VARCHAR, concat_operator));
253	concat_op.AddFunction(ScalarFunction ({SQLType::BLOB, SQLType::BLOB}, SQLType::BLOB, concat_operator));
254	set.AddFunction(concat_op);
255
256
257	ScalarFunction concat_ws =
258	ScalarFunction ("concat_ws", {SQLType::VARCHAR, SQLType::VARCHAR}, SQLType::VARCHAR, concat_ws_function);
259	concat_ws.varargs = SQLType::VARCHAR;
260	set.AddFunction(concat_ws);
261	}
262
263	} // namespace duckdb
264

Browse the source code of DuckDB/src/function/scalar/string/concat.cpp