1 | #include <Functions/IFunctionImpl.h> |
2 | #include <Functions/FunctionFactory.h> |
3 | #include <Functions/FunctionHelpers.h> |
4 | #include <DataTypes/DataTypeArray.h> |
5 | #include <Columns/ColumnArray.h> |
6 | #include <Columns/ColumnNullable.h> |
7 | #include <Columns/ColumnString.h> |
8 | #include <Columns/ColumnFixedString.h> |
9 | #include <Common/typeid_cast.h> |
10 | #include <Common/assert_cast.h> |
11 | |
12 | |
13 | namespace DB |
14 | { |
15 | |
16 | namespace ErrorCodes |
17 | { |
18 | extern const int LOGICAL_ERROR; |
19 | extern const int ILLEGAL_COLUMN; |
20 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
21 | } |
22 | |
23 | |
24 | /** emptyArrayToSingle(arr) - replace empty arrays with arrays of one element with a default value. |
25 | */ |
26 | class FunctionEmptyArrayToSingle : public IFunction |
27 | { |
28 | public: |
29 | static constexpr auto name = "emptyArrayToSingle" ; |
30 | static FunctionPtr create(const Context &) { return std::make_shared<FunctionEmptyArrayToSingle>(); } |
31 | |
32 | String getName() const override { return name; } |
33 | |
34 | size_t getNumberOfArguments() const override { return 1; } |
35 | bool useDefaultImplementationForConstants() const override { return true; } |
36 | bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } |
37 | |
38 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
39 | { |
40 | const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get()); |
41 | if (!array_type) |
42 | throw Exception("Argument for function " + getName() + " must be array." , |
43 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
44 | |
45 | return arguments[0]; |
46 | } |
47 | |
48 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; |
49 | }; |
50 | |
51 | |
52 | namespace |
53 | { |
54 | namespace FunctionEmptyArrayToSingleImpl |
55 | { |
56 | bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) |
57 | { |
58 | if (const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[0]).column.get())) |
59 | { |
60 | if (const_array->getValue<Array>().empty()) |
61 | { |
62 | auto nested_type = typeid_cast<const DataTypeArray &>(*block.getByPosition(arguments[0]).type).getNestedType(); |
63 | |
64 | block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst( |
65 | input_rows_count, |
66 | Array{nested_type->getDefault()}); |
67 | } |
68 | else |
69 | block.getByPosition(result).column = block.getByPosition(arguments[0]).column; |
70 | |
71 | return true; |
72 | } |
73 | else |
74 | return false; |
75 | } |
76 | |
77 | template <typename T, bool nullable> |
78 | bool executeNumber( |
79 | const IColumn & src_data, const ColumnArray::Offsets & src_offsets, |
80 | IColumn & res_data_col, ColumnArray::Offsets & res_offsets, |
81 | const NullMap * src_null_map, |
82 | NullMap * res_null_map) |
83 | { |
84 | if (const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data)) |
85 | { |
86 | const PaddedPODArray<T> & src_data_vec = src_data_concrete->getData(); |
87 | PaddedPODArray<T> & res_data = assert_cast<ColumnVector<T> &>(res_data_col).getData(); |
88 | |
89 | size_t size = src_offsets.size(); |
90 | res_offsets.resize(size); |
91 | res_data.reserve(src_data_vec.size()); |
92 | |
93 | if (nullable) |
94 | res_null_map->reserve(src_null_map->size()); |
95 | |
96 | ColumnArray::Offset src_prev_offset = 0; |
97 | ColumnArray::Offset res_prev_offset = 0; |
98 | |
99 | for (size_t i = 0; i < size; ++i) |
100 | { |
101 | if (src_offsets[i] != src_prev_offset) |
102 | { |
103 | size_t size_to_write = src_offsets[i] - src_prev_offset; |
104 | res_data.resize(res_prev_offset + size_to_write); |
105 | memcpy(&res_data[res_prev_offset], &src_data_vec[src_prev_offset], size_to_write * sizeof(T)); |
106 | |
107 | if (nullable) |
108 | { |
109 | res_null_map->resize(res_prev_offset + size_to_write); |
110 | memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write); |
111 | } |
112 | |
113 | res_prev_offset += size_to_write; |
114 | res_offsets[i] = res_prev_offset; |
115 | } |
116 | else |
117 | { |
118 | res_data.push_back(T()); |
119 | ++res_prev_offset; |
120 | res_offsets[i] = res_prev_offset; |
121 | |
122 | if (nullable) |
123 | res_null_map->push_back(1); /// Push NULL. |
124 | } |
125 | |
126 | src_prev_offset = src_offsets[i]; |
127 | } |
128 | |
129 | return true; |
130 | } |
131 | else |
132 | return false; |
133 | } |
134 | |
135 | |
136 | template <bool nullable> |
137 | bool executeFixedString( |
138 | const IColumn & src_data, const ColumnArray::Offsets & src_offsets, |
139 | IColumn & res_data_col, ColumnArray::Offsets & res_offsets, |
140 | const NullMap * src_null_map, |
141 | NullMap * res_null_map) |
142 | { |
143 | if (const ColumnFixedString * src_data_concrete = checkAndGetColumn<ColumnFixedString>(&src_data)) |
144 | { |
145 | const size_t n = src_data_concrete->getN(); |
146 | const ColumnFixedString::Chars & src_data_vec = src_data_concrete->getChars(); |
147 | |
148 | auto concrete_res_data = typeid_cast<ColumnFixedString *>(&res_data_col); |
149 | if (!concrete_res_data) |
150 | throw Exception{"Internal error" , ErrorCodes::LOGICAL_ERROR}; |
151 | |
152 | ColumnFixedString::Chars & res_data = concrete_res_data->getChars(); |
153 | size_t size = src_offsets.size(); |
154 | res_offsets.resize(size); |
155 | res_data.reserve(src_data_vec.size()); |
156 | |
157 | if (nullable) |
158 | res_null_map->reserve(src_null_map->size()); |
159 | |
160 | ColumnArray::Offset src_prev_offset = 0; |
161 | ColumnArray::Offset res_prev_offset = 0; |
162 | |
163 | for (size_t i = 0; i < size; ++i) |
164 | { |
165 | if (src_offsets[i] != src_prev_offset) |
166 | { |
167 | size_t size_to_write = src_offsets[i] - src_prev_offset; |
168 | size_t prev_res_data_size = res_data.size(); |
169 | res_data.resize(prev_res_data_size + size_to_write * n); |
170 | memcpy(&res_data[prev_res_data_size], &src_data_vec[src_prev_offset * n], size_to_write * n); |
171 | |
172 | if (nullable) |
173 | { |
174 | res_null_map->resize(res_prev_offset + size_to_write); |
175 | memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write); |
176 | } |
177 | |
178 | res_prev_offset += size_to_write; |
179 | res_offsets[i] = res_prev_offset; |
180 | } |
181 | else |
182 | { |
183 | size_t prev_res_data_size = res_data.size(); |
184 | res_data.resize(prev_res_data_size + n); |
185 | memset(&res_data[prev_res_data_size], 0, n); |
186 | ++res_prev_offset; |
187 | res_offsets[i] = res_prev_offset; |
188 | |
189 | if (nullable) |
190 | res_null_map->push_back(1); |
191 | } |
192 | |
193 | src_prev_offset = src_offsets[i]; |
194 | } |
195 | |
196 | return true; |
197 | } |
198 | else |
199 | return false; |
200 | } |
201 | |
202 | |
203 | template <bool nullable> |
204 | bool executeString( |
205 | const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, |
206 | IColumn & res_data_col, ColumnArray::Offsets & res_array_offsets, |
207 | const NullMap * src_null_map, |
208 | NullMap * res_null_map) |
209 | { |
210 | if (const ColumnString * src_data_concrete = checkAndGetColumn<ColumnString>(&src_data)) |
211 | { |
212 | const ColumnString::Offsets & src_string_offsets = src_data_concrete->getOffsets(); |
213 | |
214 | auto concrete_res_string_offsets = typeid_cast<ColumnString *>(&res_data_col); |
215 | if (!concrete_res_string_offsets) |
216 | throw Exception{"Internal error" , ErrorCodes::LOGICAL_ERROR}; |
217 | ColumnString::Offsets & res_string_offsets = concrete_res_string_offsets->getOffsets(); |
218 | |
219 | const ColumnString::Chars & src_data_vec = src_data_concrete->getChars(); |
220 | |
221 | auto concrete_res_data = typeid_cast<ColumnString *>(&res_data_col); |
222 | if (!concrete_res_data) |
223 | throw Exception{"Internal error" , ErrorCodes::LOGICAL_ERROR}; |
224 | ColumnString::Chars & res_data = concrete_res_data->getChars(); |
225 | |
226 | size_t size = src_array_offsets.size(); |
227 | res_array_offsets.resize(size); |
228 | res_string_offsets.reserve(src_string_offsets.size()); |
229 | res_data.reserve(src_data_vec.size()); |
230 | |
231 | if (nullable) |
232 | res_null_map->reserve(src_null_map->size()); |
233 | |
234 | ColumnArray::Offset src_array_prev_offset = 0; |
235 | ColumnArray::Offset res_array_prev_offset = 0; |
236 | |
237 | ColumnString::Offset src_string_prev_offset = 0; |
238 | ColumnString::Offset res_string_prev_offset = 0; |
239 | |
240 | for (size_t i = 0; i < size; ++i) |
241 | { |
242 | if (src_array_offsets[i] != src_array_prev_offset) |
243 | { |
244 | size_t array_size = src_array_offsets[i] - src_array_prev_offset; |
245 | |
246 | size_t bytes_to_copy = 0; |
247 | size_t from_string_prev_offset_local = src_string_prev_offset; |
248 | for (size_t j = 0; j < array_size; ++j) |
249 | { |
250 | size_t string_size = src_string_offsets[src_array_prev_offset + j] - from_string_prev_offset_local; |
251 | |
252 | res_string_prev_offset += string_size; |
253 | res_string_offsets.push_back(res_string_prev_offset); |
254 | |
255 | from_string_prev_offset_local += string_size; |
256 | bytes_to_copy += string_size; |
257 | } |
258 | |
259 | size_t res_data_old_size = res_data.size(); |
260 | res_data.resize(res_data_old_size + bytes_to_copy); |
261 | memcpy(&res_data[res_data_old_size], &src_data_vec[src_string_prev_offset], bytes_to_copy); |
262 | |
263 | if (nullable) |
264 | { |
265 | res_null_map->resize(res_array_prev_offset + array_size); |
266 | memcpy(&(*res_null_map)[res_array_prev_offset], &(*src_null_map)[src_array_prev_offset], array_size); |
267 | } |
268 | |
269 | res_array_prev_offset += array_size; |
270 | res_array_offsets[i] = res_array_prev_offset; |
271 | } |
272 | else |
273 | { |
274 | res_data.push_back(0); /// An empty string, including zero at the end. |
275 | |
276 | if (nullable) |
277 | res_null_map->push_back(1); |
278 | |
279 | ++res_string_prev_offset; |
280 | res_string_offsets.push_back(res_string_prev_offset); |
281 | |
282 | ++res_array_prev_offset; |
283 | res_array_offsets[i] = res_array_prev_offset; |
284 | } |
285 | |
286 | src_array_prev_offset = src_array_offsets[i]; |
287 | |
288 | if (src_array_prev_offset) |
289 | src_string_prev_offset = src_string_offsets[src_array_prev_offset - 1]; |
290 | } |
291 | |
292 | return true; |
293 | } |
294 | else |
295 | return false; |
296 | } |
297 | |
298 | |
299 | template <bool nullable> |
300 | void executeGeneric( |
301 | const IColumn & src_data, const ColumnArray::Offsets & src_offsets, |
302 | IColumn & res_data, ColumnArray::Offsets & res_offsets, |
303 | const NullMap * src_null_map, |
304 | NullMap * res_null_map) |
305 | { |
306 | size_t size = src_offsets.size(); |
307 | res_offsets.resize(size); |
308 | res_data.reserve(src_data.size()); |
309 | |
310 | if (nullable) |
311 | res_null_map->reserve(src_null_map->size()); |
312 | |
313 | ColumnArray::Offset src_prev_offset = 0; |
314 | ColumnArray::Offset res_prev_offset = 0; |
315 | |
316 | for (size_t i = 0; i < size; ++i) |
317 | { |
318 | if (src_offsets[i] != src_prev_offset) |
319 | { |
320 | size_t size_to_write = src_offsets[i] - src_prev_offset; |
321 | res_data.insertRangeFrom(src_data, src_prev_offset, size_to_write); |
322 | |
323 | if (nullable) |
324 | { |
325 | res_null_map->resize(res_prev_offset + size_to_write); |
326 | memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write); |
327 | } |
328 | |
329 | res_prev_offset += size_to_write; |
330 | res_offsets[i] = res_prev_offset; |
331 | } |
332 | else |
333 | { |
334 | res_data.insertDefault(); |
335 | ++res_prev_offset; |
336 | res_offsets[i] = res_prev_offset; |
337 | |
338 | if (nullable) |
339 | res_null_map->push_back(1); |
340 | } |
341 | |
342 | src_prev_offset = src_offsets[i]; |
343 | } |
344 | } |
345 | |
346 | |
347 | template <bool nullable> |
348 | void executeDispatch( |
349 | const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, |
350 | IColumn & res_data_col, ColumnArray::Offsets & res_array_offsets, |
351 | const NullMap * src_null_map, |
352 | NullMap * res_null_map) |
353 | { |
354 | if (!(executeNumber<UInt8, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
355 | || executeNumber<UInt16, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
356 | || executeNumber<UInt32, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
357 | || executeNumber<UInt64, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
358 | || executeNumber<Int8, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
359 | || executeNumber<Int16, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
360 | || executeNumber<Int32, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
361 | || executeNumber<Int64, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
362 | || executeNumber<Float32, nullable>(src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
363 | || executeNumber<Float64, nullable>(src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
364 | || executeString<nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
365 | || executeFixedString<nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map))) |
366 | executeGeneric<nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map); |
367 | } |
368 | } |
369 | } |
370 | |
371 | |
372 | void FunctionEmptyArrayToSingle::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) |
373 | { |
374 | if (FunctionEmptyArrayToSingleImpl::executeConst(block, arguments, result, input_rows_count)) |
375 | return; |
376 | |
377 | const ColumnArray * array = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments[0]).column.get()); |
378 | if (!array) |
379 | throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), |
380 | ErrorCodes::ILLEGAL_COLUMN); |
381 | |
382 | MutableColumnPtr res_ptr = array->cloneEmpty(); |
383 | ColumnArray & res = assert_cast<ColumnArray &>(*res_ptr); |
384 | |
385 | const IColumn & src_data = array->getData(); |
386 | const ColumnArray::Offsets & src_offsets = array->getOffsets(); |
387 | IColumn & res_data = res.getData(); |
388 | ColumnArray::Offsets & res_offsets = res.getOffsets(); |
389 | |
390 | const NullMap * src_null_map = nullptr; |
391 | NullMap * res_null_map = nullptr; |
392 | |
393 | const IColumn * inner_col; |
394 | IColumn * inner_res_col; |
395 | |
396 | auto nullable_col = checkAndGetColumn<ColumnNullable>(src_data); |
397 | if (nullable_col) |
398 | { |
399 | inner_col = &nullable_col->getNestedColumn(); |
400 | src_null_map = &nullable_col->getNullMapData(); |
401 | |
402 | auto & nullable_res_col = assert_cast<ColumnNullable &>(res_data); |
403 | inner_res_col = &nullable_res_col.getNestedColumn(); |
404 | res_null_map = &nullable_res_col.getNullMapData(); |
405 | } |
406 | else |
407 | { |
408 | inner_col = &src_data; |
409 | inner_res_col = &res_data; |
410 | } |
411 | |
412 | if (nullable_col) |
413 | FunctionEmptyArrayToSingleImpl::executeDispatch<true>(*inner_col, src_offsets, *inner_res_col, res_offsets, src_null_map, res_null_map); |
414 | else |
415 | FunctionEmptyArrayToSingleImpl::executeDispatch<false>(*inner_col, src_offsets, *inner_res_col, res_offsets, src_null_map, res_null_map); |
416 | |
417 | block.getByPosition(result).column = std::move(res_ptr); |
418 | } |
419 | |
420 | |
421 | void registerFunctionEmptyArrayToSingle(FunctionFactory & factory) |
422 | { |
423 | factory.registerFunction<FunctionEmptyArrayToSingle>(); |
424 | } |
425 | |
426 | } |
427 | |