1#include <Functions/IFunctionImpl.h>
2#include <Functions/FunctionFactory.h>
3#include <Functions/FunctionHelpers.h>
4#include <DataTypes/DataTypeArray.h>
5#include <Columns/ColumnArray.h>
6#include <Columns/ColumnNullable.h>
7#include <Columns/ColumnString.h>
8#include <Columns/ColumnFixedString.h>
9#include <Common/typeid_cast.h>
10#include <Common/assert_cast.h>
11
12
13namespace DB
14{
15
16namespace ErrorCodes
17{
18 extern const int LOGICAL_ERROR;
19 extern const int ILLEGAL_COLUMN;
20 extern const int ILLEGAL_TYPE_OF_ARGUMENT;
21}
22
23
24/** emptyArrayToSingle(arr) - replace empty arrays with arrays of one element with a default value.
25 */
26class FunctionEmptyArrayToSingle : public IFunction
27{
28public:
29 static constexpr auto name = "emptyArrayToSingle";
30 static FunctionPtr create(const Context &) { return std::make_shared<FunctionEmptyArrayToSingle>(); }
31
32 String getName() const override { return name; }
33
34 size_t getNumberOfArguments() const override { return 1; }
35 bool useDefaultImplementationForConstants() const override { return true; }
36 bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
37
38 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
39 {
40 const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
41 if (!array_type)
42 throw Exception("Argument for function " + getName() + " must be array.",
43 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
44
45 return arguments[0];
46 }
47
48 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
49};
50
51
52namespace
53{
54 namespace FunctionEmptyArrayToSingleImpl
55 {
56 bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
57 {
58 if (const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[0]).column.get()))
59 {
60 if (const_array->getValue<Array>().empty())
61 {
62 auto nested_type = typeid_cast<const DataTypeArray &>(*block.getByPosition(arguments[0]).type).getNestedType();
63
64 block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(
65 input_rows_count,
66 Array{nested_type->getDefault()});
67 }
68 else
69 block.getByPosition(result).column = block.getByPosition(arguments[0]).column;
70
71 return true;
72 }
73 else
74 return false;
75 }
76
77 template <typename T, bool nullable>
78 bool executeNumber(
79 const IColumn & src_data, const ColumnArray::Offsets & src_offsets,
80 IColumn & res_data_col, ColumnArray::Offsets & res_offsets,
81 const NullMap * src_null_map,
82 NullMap * res_null_map)
83 {
84 if (const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data))
85 {
86 const PaddedPODArray<T> & src_data_vec = src_data_concrete->getData();
87 PaddedPODArray<T> & res_data = assert_cast<ColumnVector<T> &>(res_data_col).getData();
88
89 size_t size = src_offsets.size();
90 res_offsets.resize(size);
91 res_data.reserve(src_data_vec.size());
92
93 if (nullable)
94 res_null_map->reserve(src_null_map->size());
95
96 ColumnArray::Offset src_prev_offset = 0;
97 ColumnArray::Offset res_prev_offset = 0;
98
99 for (size_t i = 0; i < size; ++i)
100 {
101 if (src_offsets[i] != src_prev_offset)
102 {
103 size_t size_to_write = src_offsets[i] - src_prev_offset;
104 res_data.resize(res_prev_offset + size_to_write);
105 memcpy(&res_data[res_prev_offset], &src_data_vec[src_prev_offset], size_to_write * sizeof(T));
106
107 if (nullable)
108 {
109 res_null_map->resize(res_prev_offset + size_to_write);
110 memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write);
111 }
112
113 res_prev_offset += size_to_write;
114 res_offsets[i] = res_prev_offset;
115 }
116 else
117 {
118 res_data.push_back(T());
119 ++res_prev_offset;
120 res_offsets[i] = res_prev_offset;
121
122 if (nullable)
123 res_null_map->push_back(1); /// Push NULL.
124 }
125
126 src_prev_offset = src_offsets[i];
127 }
128
129 return true;
130 }
131 else
132 return false;
133 }
134
135
136 template <bool nullable>
137 bool executeFixedString(
138 const IColumn & src_data, const ColumnArray::Offsets & src_offsets,
139 IColumn & res_data_col, ColumnArray::Offsets & res_offsets,
140 const NullMap * src_null_map,
141 NullMap * res_null_map)
142 {
143 if (const ColumnFixedString * src_data_concrete = checkAndGetColumn<ColumnFixedString>(&src_data))
144 {
145 const size_t n = src_data_concrete->getN();
146 const ColumnFixedString::Chars & src_data_vec = src_data_concrete->getChars();
147
148 auto concrete_res_data = typeid_cast<ColumnFixedString *>(&res_data_col);
149 if (!concrete_res_data)
150 throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR};
151
152 ColumnFixedString::Chars & res_data = concrete_res_data->getChars();
153 size_t size = src_offsets.size();
154 res_offsets.resize(size);
155 res_data.reserve(src_data_vec.size());
156
157 if (nullable)
158 res_null_map->reserve(src_null_map->size());
159
160 ColumnArray::Offset src_prev_offset = 0;
161 ColumnArray::Offset res_prev_offset = 0;
162
163 for (size_t i = 0; i < size; ++i)
164 {
165 if (src_offsets[i] != src_prev_offset)
166 {
167 size_t size_to_write = src_offsets[i] - src_prev_offset;
168 size_t prev_res_data_size = res_data.size();
169 res_data.resize(prev_res_data_size + size_to_write * n);
170 memcpy(&res_data[prev_res_data_size], &src_data_vec[src_prev_offset * n], size_to_write * n);
171
172 if (nullable)
173 {
174 res_null_map->resize(res_prev_offset + size_to_write);
175 memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write);
176 }
177
178 res_prev_offset += size_to_write;
179 res_offsets[i] = res_prev_offset;
180 }
181 else
182 {
183 size_t prev_res_data_size = res_data.size();
184 res_data.resize(prev_res_data_size + n);
185 memset(&res_data[prev_res_data_size], 0, n);
186 ++res_prev_offset;
187 res_offsets[i] = res_prev_offset;
188
189 if (nullable)
190 res_null_map->push_back(1);
191 }
192
193 src_prev_offset = src_offsets[i];
194 }
195
196 return true;
197 }
198 else
199 return false;
200 }
201
202
203 template <bool nullable>
204 bool executeString(
205 const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets,
206 IColumn & res_data_col, ColumnArray::Offsets & res_array_offsets,
207 const NullMap * src_null_map,
208 NullMap * res_null_map)
209 {
210 if (const ColumnString * src_data_concrete = checkAndGetColumn<ColumnString>(&src_data))
211 {
212 const ColumnString::Offsets & src_string_offsets = src_data_concrete->getOffsets();
213
214 auto concrete_res_string_offsets = typeid_cast<ColumnString *>(&res_data_col);
215 if (!concrete_res_string_offsets)
216 throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR};
217 ColumnString::Offsets & res_string_offsets = concrete_res_string_offsets->getOffsets();
218
219 const ColumnString::Chars & src_data_vec = src_data_concrete->getChars();
220
221 auto concrete_res_data = typeid_cast<ColumnString *>(&res_data_col);
222 if (!concrete_res_data)
223 throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR};
224 ColumnString::Chars & res_data = concrete_res_data->getChars();
225
226 size_t size = src_array_offsets.size();
227 res_array_offsets.resize(size);
228 res_string_offsets.reserve(src_string_offsets.size());
229 res_data.reserve(src_data_vec.size());
230
231 if (nullable)
232 res_null_map->reserve(src_null_map->size());
233
234 ColumnArray::Offset src_array_prev_offset = 0;
235 ColumnArray::Offset res_array_prev_offset = 0;
236
237 ColumnString::Offset src_string_prev_offset = 0;
238 ColumnString::Offset res_string_prev_offset = 0;
239
240 for (size_t i = 0; i < size; ++i)
241 {
242 if (src_array_offsets[i] != src_array_prev_offset)
243 {
244 size_t array_size = src_array_offsets[i] - src_array_prev_offset;
245
246 size_t bytes_to_copy = 0;
247 size_t from_string_prev_offset_local = src_string_prev_offset;
248 for (size_t j = 0; j < array_size; ++j)
249 {
250 size_t string_size = src_string_offsets[src_array_prev_offset + j] - from_string_prev_offset_local;
251
252 res_string_prev_offset += string_size;
253 res_string_offsets.push_back(res_string_prev_offset);
254
255 from_string_prev_offset_local += string_size;
256 bytes_to_copy += string_size;
257 }
258
259 size_t res_data_old_size = res_data.size();
260 res_data.resize(res_data_old_size + bytes_to_copy);
261 memcpy(&res_data[res_data_old_size], &src_data_vec[src_string_prev_offset], bytes_to_copy);
262
263 if (nullable)
264 {
265 res_null_map->resize(res_array_prev_offset + array_size);
266 memcpy(&(*res_null_map)[res_array_prev_offset], &(*src_null_map)[src_array_prev_offset], array_size);
267 }
268
269 res_array_prev_offset += array_size;
270 res_array_offsets[i] = res_array_prev_offset;
271 }
272 else
273 {
274 res_data.push_back(0); /// An empty string, including zero at the end.
275
276 if (nullable)
277 res_null_map->push_back(1);
278
279 ++res_string_prev_offset;
280 res_string_offsets.push_back(res_string_prev_offset);
281
282 ++res_array_prev_offset;
283 res_array_offsets[i] = res_array_prev_offset;
284 }
285
286 src_array_prev_offset = src_array_offsets[i];
287
288 if (src_array_prev_offset)
289 src_string_prev_offset = src_string_offsets[src_array_prev_offset - 1];
290 }
291
292 return true;
293 }
294 else
295 return false;
296 }
297
298
299 template <bool nullable>
300 void executeGeneric(
301 const IColumn & src_data, const ColumnArray::Offsets & src_offsets,
302 IColumn & res_data, ColumnArray::Offsets & res_offsets,
303 const NullMap * src_null_map,
304 NullMap * res_null_map)
305 {
306 size_t size = src_offsets.size();
307 res_offsets.resize(size);
308 res_data.reserve(src_data.size());
309
310 if (nullable)
311 res_null_map->reserve(src_null_map->size());
312
313 ColumnArray::Offset src_prev_offset = 0;
314 ColumnArray::Offset res_prev_offset = 0;
315
316 for (size_t i = 0; i < size; ++i)
317 {
318 if (src_offsets[i] != src_prev_offset)
319 {
320 size_t size_to_write = src_offsets[i] - src_prev_offset;
321 res_data.insertRangeFrom(src_data, src_prev_offset, size_to_write);
322
323 if (nullable)
324 {
325 res_null_map->resize(res_prev_offset + size_to_write);
326 memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write);
327 }
328
329 res_prev_offset += size_to_write;
330 res_offsets[i] = res_prev_offset;
331 }
332 else
333 {
334 res_data.insertDefault();
335 ++res_prev_offset;
336 res_offsets[i] = res_prev_offset;
337
338 if (nullable)
339 res_null_map->push_back(1);
340 }
341
342 src_prev_offset = src_offsets[i];
343 }
344 }
345
346
347 template <bool nullable>
348 void executeDispatch(
349 const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets,
350 IColumn & res_data_col, ColumnArray::Offsets & res_array_offsets,
351 const NullMap * src_null_map,
352 NullMap * res_null_map)
353 {
354 if (!(executeNumber<UInt8, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
355 || executeNumber<UInt16, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
356 || executeNumber<UInt32, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
357 || executeNumber<UInt64, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
358 || executeNumber<Int8, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
359 || executeNumber<Int16, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
360 || executeNumber<Int32, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
361 || executeNumber<Int64, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
362 || executeNumber<Float32, nullable>(src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
363 || executeNumber<Float64, nullable>(src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
364 || executeString<nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)
365 || executeFixedString<nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map)))
366 executeGeneric<nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map);
367 }
368 }
369}
370
371
372void FunctionEmptyArrayToSingle::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
373{
374 if (FunctionEmptyArrayToSingleImpl::executeConst(block, arguments, result, input_rows_count))
375 return;
376
377 const ColumnArray * array = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments[0]).column.get());
378 if (!array)
379 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
380 ErrorCodes::ILLEGAL_COLUMN);
381
382 MutableColumnPtr res_ptr = array->cloneEmpty();
383 ColumnArray & res = assert_cast<ColumnArray &>(*res_ptr);
384
385 const IColumn & src_data = array->getData();
386 const ColumnArray::Offsets & src_offsets = array->getOffsets();
387 IColumn & res_data = res.getData();
388 ColumnArray::Offsets & res_offsets = res.getOffsets();
389
390 const NullMap * src_null_map = nullptr;
391 NullMap * res_null_map = nullptr;
392
393 const IColumn * inner_col;
394 IColumn * inner_res_col;
395
396 auto nullable_col = checkAndGetColumn<ColumnNullable>(src_data);
397 if (nullable_col)
398 {
399 inner_col = &nullable_col->getNestedColumn();
400 src_null_map = &nullable_col->getNullMapData();
401
402 auto & nullable_res_col = assert_cast<ColumnNullable &>(res_data);
403 inner_res_col = &nullable_res_col.getNestedColumn();
404 res_null_map = &nullable_res_col.getNullMapData();
405 }
406 else
407 {
408 inner_col = &src_data;
409 inner_res_col = &res_data;
410 }
411
412 if (nullable_col)
413 FunctionEmptyArrayToSingleImpl::executeDispatch<true>(*inner_col, src_offsets, *inner_res_col, res_offsets, src_null_map, res_null_map);
414 else
415 FunctionEmptyArrayToSingleImpl::executeDispatch<false>(*inner_col, src_offsets, *inner_res_col, res_offsets, src_null_map, res_null_map);
416
417 block.getByPosition(result).column = std::move(res_ptr);
418}
419
420
421void registerFunctionEmptyArrayToSingle(FunctionFactory & factory)
422{
423 factory.registerFunction<FunctionEmptyArrayToSingle>();
424}
425
426}
427