1 | #include <Functions/IFunctionImpl.h> |
2 | #include <Functions/FunctionFactory.h> |
3 | #include <Functions/FunctionHelpers.h> |
4 | #include <DataTypes/DataTypeArray.h> |
5 | #include <DataTypes/DataTypeNullable.h> |
6 | #include <DataTypes/DataTypesNumber.h> |
7 | #include <Columns/ColumnArray.h> |
8 | #include <Columns/ColumnString.h> |
9 | #include <Columns/ColumnFixedString.h> |
10 | #include <Columns/ColumnsNumber.h> |
11 | #include <Columns/ColumnNullable.h> |
12 | #include <Common/FieldVisitors.h> |
13 | #include <Common/memcmpSmall.h> |
14 | #include <Common/assert_cast.h> |
15 | |
16 | |
17 | namespace DB |
18 | { |
19 | |
20 | namespace ErrorCodes |
21 | { |
22 | extern const int LOGICAL_ERROR; |
23 | extern const int ILLEGAL_COLUMN; |
24 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
25 | } |
26 | |
27 | /// For has. |
28 | struct IndexToOne |
29 | { |
30 | using ResultType = UInt8; |
31 | static bool apply(size_t, ResultType & current) { current = 1; return false; } |
32 | }; |
33 | |
34 | /// For indexOf. |
35 | struct IndexIdentity |
36 | { |
37 | using ResultType = UInt64; |
38 | /// The index is returned starting from 1. |
39 | static bool apply(size_t j, ResultType & current) { current = j + 1; return false; } |
40 | }; |
41 | |
42 | /// For countEqual. |
43 | struct IndexCount |
44 | { |
45 | using ResultType = UInt64; |
46 | static bool apply(size_t, ResultType & current) { ++current; return true; } |
47 | }; |
48 | |
49 | |
50 | template <typename T, typename U, typename IndexConv> |
51 | struct ArrayIndexNumImpl |
52 | { |
53 | private: |
54 | |
55 | #pragma GCC diagnostic push |
56 | #pragma GCC diagnostic ignored "-Wsign-compare" |
57 | |
58 | /// compares `lhs` against `i`-th element of `rhs` |
59 | static bool compare(const T & lhs, const PaddedPODArray<U> & rhs, const size_t i) { return lhs == rhs[i]; } |
60 | /// compares `lhs against `rhs`, third argument unused |
61 | static bool compare(const T & lhs, const U & rhs, size_t) { return lhs == rhs; } |
62 | |
63 | #pragma GCC diagnostic pop |
64 | |
65 | static bool hasNull(const PaddedPODArray<UInt8> & null_map, size_t i) |
66 | { |
67 | return null_map[i]; |
68 | } |
69 | |
70 | /// Both function arguments are ordinary. |
71 | template <typename ScalarOrVector> |
72 | static void vectorCase1( |
73 | const PaddedPODArray<T> & data, const ColumnArray::Offsets & offsets, |
74 | const ScalarOrVector & value, |
75 | PaddedPODArray<typename IndexConv::ResultType> & result) |
76 | { |
77 | size_t size = offsets.size(); |
78 | result.resize(size); |
79 | |
80 | ColumnArray::Offset current_offset = 0; |
81 | for (size_t i = 0; i < size; ++i) |
82 | { |
83 | size_t array_size = offsets[i] - current_offset; |
84 | typename IndexConv::ResultType current = 0; |
85 | |
86 | for (size_t j = 0; j < array_size; ++j) |
87 | { |
88 | if (compare(data[current_offset + j], value, i)) |
89 | { |
90 | if (!IndexConv::apply(j, current)) |
91 | break; |
92 | } |
93 | } |
94 | |
95 | result[i] = current; |
96 | current_offset = offsets[i]; |
97 | } |
98 | } |
99 | |
100 | /// The 2nd function argument is nullable. |
101 | template <typename ScalarOrVector> |
102 | static void vectorCase2( |
103 | const PaddedPODArray<T> & data, const ColumnArray::Offsets & offsets, |
104 | const ScalarOrVector & value, |
105 | PaddedPODArray<typename IndexConv::ResultType> & result, |
106 | const PaddedPODArray<UInt8> & null_map_item) |
107 | { |
108 | size_t size = offsets.size(); |
109 | result.resize(size); |
110 | |
111 | ColumnArray::Offset current_offset = 0; |
112 | for (size_t i = 0; i < size; ++i) |
113 | { |
114 | size_t array_size = offsets[i] - current_offset; |
115 | typename IndexConv::ResultType current = 0; |
116 | |
117 | for (size_t j = 0; j < array_size; ++j) |
118 | { |
119 | if (!hasNull(null_map_item, i) && compare(data[current_offset + j], value, i)) |
120 | { |
121 | if (!IndexConv::apply(j, current)) |
122 | break; |
123 | } |
124 | } |
125 | |
126 | result[i] = current; |
127 | current_offset = offsets[i]; |
128 | } |
129 | } |
130 | |
131 | /// The 1st function argument is a non-constant array of nullable values. |
132 | template <typename ScalarOrVector> |
133 | static void vectorCase3( |
134 | const PaddedPODArray<T> & data, const ColumnArray::Offsets & offsets, |
135 | const ScalarOrVector & value, |
136 | PaddedPODArray<typename IndexConv::ResultType> & result, |
137 | const PaddedPODArray<UInt8> & null_map_data) |
138 | { |
139 | size_t size = offsets.size(); |
140 | result.resize(size); |
141 | |
142 | ColumnArray::Offset current_offset = 0; |
143 | for (size_t i = 0; i < size; ++i) |
144 | { |
145 | size_t array_size = offsets[i] - current_offset; |
146 | typename IndexConv::ResultType current = 0; |
147 | |
148 | for (size_t j = 0; j < array_size; ++j) |
149 | { |
150 | if (null_map_data[current_offset + j]) |
151 | { |
152 | } |
153 | else if (compare(data[current_offset + j], value, i)) |
154 | { |
155 | if (!IndexConv::apply(j, current)) |
156 | break; |
157 | } |
158 | } |
159 | |
160 | result[i] = current; |
161 | current_offset = offsets[i]; |
162 | } |
163 | } |
164 | |
165 | /// The 1st function argument is a non-constant array of nullable values. |
166 | /// The 2nd function argument is nullable. |
167 | template <typename ScalarOrVector> |
168 | static void vectorCase4( |
169 | const PaddedPODArray<T> & data, const ColumnArray::Offsets & offsets, |
170 | const ScalarOrVector & value, |
171 | PaddedPODArray<typename IndexConv::ResultType> & result, |
172 | const PaddedPODArray<UInt8> & null_map_data, |
173 | const PaddedPODArray<UInt8> & null_map_item) |
174 | { |
175 | size_t size = offsets.size(); |
176 | result.resize(size); |
177 | |
178 | ColumnArray::Offset current_offset = 0; |
179 | for (size_t i = 0; i < size; ++i) |
180 | { |
181 | size_t array_size = offsets[i] - current_offset; |
182 | typename IndexConv::ResultType current = 0; |
183 | |
184 | for (size_t j = 0; j < array_size; ++j) |
185 | { |
186 | bool hit = false; |
187 | if (null_map_data[current_offset + j]) |
188 | { |
189 | if (hasNull(null_map_item, i)) |
190 | hit = true; |
191 | } |
192 | else if (compare(data[current_offset + j], value, i)) |
193 | hit = true; |
194 | |
195 | if (hit) |
196 | { |
197 | if (!IndexConv::apply(j, current)) |
198 | break; |
199 | } |
200 | } |
201 | |
202 | result[i] = current; |
203 | current_offset = offsets[i]; |
204 | } |
205 | } |
206 | |
207 | public: |
208 | template <typename ScalarOrVector> |
209 | static void vector( |
210 | const PaddedPODArray<T> & data, const ColumnArray::Offsets & offsets, |
211 | const ScalarOrVector & value, |
212 | PaddedPODArray<typename IndexConv::ResultType> & result, |
213 | const PaddedPODArray<UInt8> * null_map_data, |
214 | const PaddedPODArray<UInt8> * null_map_item) |
215 | { |
216 | /// Processing is split into 4 cases. |
217 | if (!null_map_data && !null_map_item) |
218 | vectorCase1(data, offsets, value, result); |
219 | else if (!null_map_data && null_map_item) |
220 | vectorCase2(data, offsets, value, result, *null_map_item); |
221 | else if (null_map_data && !null_map_item) |
222 | vectorCase3(data, offsets, value, result, *null_map_data); |
223 | else |
224 | vectorCase4(data, offsets, value, result, *null_map_data, *null_map_item); |
225 | } |
226 | }; |
227 | |
228 | /// Specialization that catches internal errors. |
229 | template <typename T, typename IndexConv> |
230 | struct ArrayIndexNumImpl<T, Null, IndexConv> |
231 | { |
232 | template <typename ScalarOrVector> |
233 | static void vector( |
234 | const PaddedPODArray<T> &, const ColumnArray::Offsets &, |
235 | const ScalarOrVector &, |
236 | PaddedPODArray<typename IndexConv::ResultType> &, |
237 | const PaddedPODArray<UInt8> *, |
238 | const PaddedPODArray<UInt8> *) |
239 | { |
240 | throw Exception{"Logical error in implementation of a function that returns array index" , ErrorCodes::LOGICAL_ERROR}; |
241 | } |
242 | }; |
243 | |
244 | /// Implementation for arrays of numbers when the 2nd function argument |
245 | /// is a NULL value. |
246 | template <typename T, typename IndexConv> |
247 | struct ArrayIndexNumNullImpl |
248 | { |
249 | static void vector( |
250 | const PaddedPODArray<T> & /*data*/, const ColumnArray::Offsets & offsets, |
251 | PaddedPODArray<typename IndexConv::ResultType> & result, |
252 | const PaddedPODArray<UInt8> * null_map_data) |
253 | { |
254 | size_t size = offsets.size(); |
255 | result.resize(size); |
256 | |
257 | ColumnArray::Offset current_offset = 0; |
258 | for (size_t i = 0; i < size; ++i) |
259 | { |
260 | size_t array_size = offsets[i] - current_offset; |
261 | typename IndexConv::ResultType current = 0; |
262 | |
263 | for (size_t j = 0; j < array_size; ++j) |
264 | { |
265 | if (null_map_data && (*null_map_data)[current_offset + j]) |
266 | { |
267 | if (!IndexConv::apply(j, current)) |
268 | break; |
269 | } |
270 | } |
271 | |
272 | result[i] = current; |
273 | current_offset = offsets[i]; |
274 | } |
275 | } |
276 | }; |
277 | |
278 | /// Implementation for arrays of strings when the 2nd function argument is a NULL value. |
279 | template <typename IndexConv> |
280 | struct ArrayIndexStringNullImpl |
281 | { |
282 | static void vector_const( |
283 | const ColumnString::Chars & /*data*/, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & /*string_offsets*/, |
284 | PaddedPODArray<typename IndexConv::ResultType> & result, |
285 | const PaddedPODArray<UInt8> * null_map_data) |
286 | { |
287 | const auto size = offsets.size(); |
288 | result.resize(size); |
289 | |
290 | ColumnArray::Offset current_offset = 0; |
291 | for (size_t i = 0; i < size; ++i) |
292 | { |
293 | const auto array_size = offsets[i] - current_offset; |
294 | typename IndexConv::ResultType current = 0; |
295 | |
296 | for (size_t j = 0; j < array_size; ++j) |
297 | { |
298 | if (null_map_data && (*null_map_data)[current_offset + j]) |
299 | { |
300 | if (!IndexConv::apply(j, current)) |
301 | break; |
302 | } |
303 | } |
304 | |
305 | result[i] = current; |
306 | current_offset = offsets[i]; |
307 | } |
308 | } |
309 | }; |
310 | |
311 | template <typename IndexConv> |
312 | struct ArrayIndexStringImpl |
313 | { |
314 | static void vector_const( |
315 | const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets, |
316 | const ColumnString::Chars & value, ColumnString::Offset value_size, |
317 | PaddedPODArray<typename IndexConv::ResultType> & result, |
318 | const PaddedPODArray<UInt8> * null_map_data) |
319 | { |
320 | const auto size = offsets.size(); |
321 | result.resize(size); |
322 | |
323 | ColumnArray::Offset current_offset = 0; |
324 | for (size_t i = 0; i < size; ++i) |
325 | { |
326 | const auto array_size = offsets[i] - current_offset; |
327 | typename IndexConv::ResultType current = 0; |
328 | |
329 | for (size_t j = 0; j < array_size; ++j) |
330 | { |
331 | ColumnArray::Offset string_pos = current_offset == 0 && j == 0 |
332 | ? 0 |
333 | : string_offsets[current_offset + j - 1]; |
334 | |
335 | ColumnArray::Offset string_size = string_offsets[current_offset + j] - string_pos - 1; |
336 | |
337 | if (null_map_data && (*null_map_data)[current_offset + j]) |
338 | { |
339 | } |
340 | else if (memequalSmallAllowOverflow15(value.data(), value_size, &data[string_pos], string_size)) |
341 | { |
342 | if (!IndexConv::apply(j, current)) |
343 | break; |
344 | } |
345 | } |
346 | |
347 | result[i] = current; |
348 | current_offset = offsets[i]; |
349 | } |
350 | } |
351 | |
352 | static void vector_vector( |
353 | const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets, |
354 | const ColumnString::Chars & item_values, const ColumnString::Offsets & item_offsets, |
355 | PaddedPODArray<typename IndexConv::ResultType> & result, |
356 | const PaddedPODArray<UInt8> * null_map_data, |
357 | const PaddedPODArray<UInt8> * null_map_item) |
358 | { |
359 | const auto size = offsets.size(); |
360 | result.resize(size); |
361 | |
362 | ColumnArray::Offset current_offset = 0; |
363 | for (size_t i = 0; i < size; ++i) |
364 | { |
365 | const auto array_size = offsets[i] - current_offset; |
366 | typename IndexConv::ResultType current = 0; |
367 | const auto value_pos = 0 == i ? 0 : item_offsets[i - 1]; |
368 | const auto value_size = item_offsets[i] - value_pos; |
369 | |
370 | for (size_t j = 0; j < array_size; ++j) |
371 | { |
372 | ColumnArray::Offset string_pos = current_offset == 0 && j == 0 |
373 | ? 0 |
374 | : string_offsets[current_offset + j - 1]; |
375 | |
376 | ColumnArray::Offset string_size = string_offsets[current_offset + j] - string_pos; |
377 | |
378 | bool hit = false; |
379 | |
380 | if (null_map_data && (*null_map_data)[current_offset + j]) |
381 | { |
382 | if (null_map_item && (*null_map_item)[i]) |
383 | hit = true; |
384 | } |
385 | else if (memequalSmallAllowOverflow15(&item_values[value_pos], value_size, &data[string_pos], string_size)) |
386 | hit = true; |
387 | |
388 | if (hit) |
389 | { |
390 | if (!IndexConv::apply(j, current)) |
391 | break; |
392 | } |
393 | } |
394 | |
395 | result[i] = current; |
396 | current_offset = offsets[i]; |
397 | } |
398 | } |
399 | }; |
400 | |
401 | /// Catch-all implementation for arrays of arbitrary type. |
402 | /// To compare with constant value, create non-constant column with single element, |
403 | /// and pass is_value_has_single_element_to_compare = true. |
404 | template <typename IndexConv, bool is_value_has_single_element_to_compare> |
405 | struct ArrayIndexGenericImpl |
406 | { |
407 | private: |
408 | /// Both function arguments are ordinary. |
409 | static void vectorCase1( |
410 | const IColumn & data, const ColumnArray::Offsets & offsets, |
411 | const IColumn & value, |
412 | PaddedPODArray<typename IndexConv::ResultType> & result) |
413 | { |
414 | size_t size = offsets.size(); |
415 | result.resize(size); |
416 | |
417 | ColumnArray::Offset current_offset = 0; |
418 | for (size_t i = 0; i < size; ++i) |
419 | { |
420 | size_t array_size = offsets[i] - current_offset; |
421 | typename IndexConv::ResultType current = 0; |
422 | |
423 | for (size_t j = 0; j < array_size; ++j) |
424 | { |
425 | if (0 == data.compareAt(current_offset + j, is_value_has_single_element_to_compare ? 0 : i, value, 1)) |
426 | { |
427 | if (!IndexConv::apply(j, current)) |
428 | break; |
429 | } |
430 | } |
431 | |
432 | result[i] = current; |
433 | current_offset = offsets[i]; |
434 | } |
435 | } |
436 | |
437 | /// The 2nd function argument is nullable. |
438 | static void vectorCase2( |
439 | const IColumn & data, const ColumnArray::Offsets & offsets, |
440 | const IColumn & value, |
441 | PaddedPODArray<typename IndexConv::ResultType> & result, |
442 | const PaddedPODArray<UInt8> & null_map_item) |
443 | { |
444 | size_t size = offsets.size(); |
445 | result.resize(size); |
446 | |
447 | ColumnArray::Offset current_offset = 0; |
448 | for (size_t i = 0; i < size; ++i) |
449 | { |
450 | size_t array_size = offsets[i] - current_offset; |
451 | typename IndexConv::ResultType current = 0; |
452 | |
453 | for (size_t j = 0; j < array_size; ++j) |
454 | { |
455 | if ((null_map_item[i] == 0) && |
456 | (0 == data.compareAt(current_offset + j, is_value_has_single_element_to_compare ? 0 : i, value, 1))) |
457 | { |
458 | if (!IndexConv::apply(j, current)) |
459 | break; |
460 | } |
461 | } |
462 | |
463 | result[i] = current; |
464 | current_offset = offsets[i]; |
465 | } |
466 | } |
467 | |
468 | /// The 1st function argument is a non-constant array of nullable values. |
469 | static void vectorCase3( |
470 | const IColumn & data, const ColumnArray::Offsets & offsets, |
471 | const IColumn & value, |
472 | PaddedPODArray<typename IndexConv::ResultType> & result, |
473 | const PaddedPODArray<UInt8> & null_map_data) |
474 | { |
475 | size_t size = offsets.size(); |
476 | result.resize(size); |
477 | |
478 | ColumnArray::Offset current_offset = 0; |
479 | for (size_t i = 0; i < size; ++i) |
480 | { |
481 | size_t array_size = offsets[i] - current_offset; |
482 | typename IndexConv::ResultType current = 0; |
483 | |
484 | for (size_t j = 0; j < array_size; ++j) |
485 | { |
486 | if (null_map_data[current_offset + j]) |
487 | { |
488 | } |
489 | else if (0 == data.compareAt(current_offset + j, is_value_has_single_element_to_compare ? 0 : i, value, 1)) |
490 | { |
491 | if (!IndexConv::apply(j, current)) |
492 | break; |
493 | } |
494 | } |
495 | |
496 | result[i] = current; |
497 | current_offset = offsets[i]; |
498 | } |
499 | } |
500 | |
501 | /// The 1st function argument is a non-constant array of nullable values. |
502 | /// The 2nd function argument is nullable. |
503 | static void vectorCase4( |
504 | const IColumn & data, const ColumnArray::Offsets & offsets, |
505 | const IColumn & value, |
506 | PaddedPODArray<typename IndexConv::ResultType> & result, |
507 | const PaddedPODArray<UInt8> & null_map_data, |
508 | const PaddedPODArray<UInt8> & null_map_item) |
509 | { |
510 | size_t size = offsets.size(); |
511 | result.resize(size); |
512 | |
513 | ColumnArray::Offset current_offset = 0; |
514 | for (size_t i = 0; i < size; ++i) |
515 | { |
516 | size_t array_size = offsets[i] - current_offset; |
517 | typename IndexConv::ResultType current = 0; |
518 | |
519 | for (size_t j = 0; j < array_size; ++j) |
520 | { |
521 | bool hit = false; |
522 | if (null_map_data[current_offset + j]) |
523 | { |
524 | if (null_map_item[i]) |
525 | hit = true; |
526 | } |
527 | else if (0 == data.compareAt(current_offset + j, is_value_has_single_element_to_compare ? 0 : i, value, 1)) |
528 | hit = true; |
529 | |
530 | if (hit) |
531 | { |
532 | if (!IndexConv::apply(j, current)) |
533 | break; |
534 | } |
535 | } |
536 | } |
537 | } |
538 | |
539 | public: |
540 | static void vector( |
541 | const IColumn & data, const ColumnArray::Offsets & offsets, |
542 | const IColumn & value, |
543 | PaddedPODArray<typename IndexConv::ResultType> & result, |
544 | const PaddedPODArray<UInt8> * null_map_data, |
545 | const PaddedPODArray<UInt8> * null_map_item) |
546 | { |
547 | /// Processing is split into 4 cases. |
548 | if (!null_map_data && !null_map_item) |
549 | vectorCase1(data, offsets, value, result); |
550 | else if (!null_map_data && null_map_item) |
551 | vectorCase2(data, offsets, value, result, *null_map_item); |
552 | else if (null_map_data && !null_map_item) |
553 | vectorCase3(data, offsets, value, result, *null_map_data); |
554 | else |
555 | vectorCase4(data, offsets, value, result, *null_map_data, *null_map_item); |
556 | } |
557 | }; |
558 | |
559 | /// Catch-all implementation for arrays of arbitrary type |
560 | /// when the 2nd function argument is a NULL value. |
561 | template <typename IndexConv> |
562 | struct ArrayIndexGenericNullImpl |
563 | { |
564 | static void vector( |
565 | const IColumn & /*data*/, const ColumnArray::Offsets & offsets, |
566 | PaddedPODArray<typename IndexConv::ResultType> & result, |
567 | const PaddedPODArray<UInt8> * null_map_data) |
568 | { |
569 | size_t size = offsets.size(); |
570 | result.resize(size); |
571 | |
572 | ColumnArray::Offset current_offset = 0; |
573 | for (size_t i = 0; i < size; ++i) |
574 | { |
575 | size_t array_size = offsets[i] - current_offset; |
576 | typename IndexConv::ResultType current = 0; |
577 | |
578 | for (size_t j = 0; j < array_size; ++j) |
579 | { |
580 | if (null_map_data && (*null_map_data)[current_offset + j]) |
581 | { |
582 | if (!IndexConv::apply(j, current)) |
583 | break; |
584 | } |
585 | } |
586 | |
587 | result[i] = current; |
588 | current_offset = offsets[i]; |
589 | } |
590 | } |
591 | }; |
592 | |
593 | |
594 | inline bool allowArrayIndex(const DataTypePtr & type0, const DataTypePtr & type1) |
595 | { |
596 | DataTypePtr data_type0 = removeNullable(type0); |
597 | DataTypePtr data_type1 = removeNullable(type1); |
598 | |
599 | return ((isNativeNumber(data_type0) || isEnum(data_type0)) && isNativeNumber(data_type1)) |
600 | || data_type0->equals(*data_type1); |
601 | } |
602 | |
603 | |
604 | template <typename IndexConv, typename Name> |
605 | class FunctionArrayIndex : public IFunction |
606 | { |
607 | public: |
608 | static constexpr auto name = Name::name; |
609 | static FunctionPtr create(const Context &) { return std::make_shared<FunctionArrayIndex>(); } |
610 | |
611 | private: |
612 | using ResultColumnType = ColumnVector<typename IndexConv::ResultType>; |
613 | |
614 | template <typename T> |
615 | bool executeNumber(Block & block, const ColumnNumbers & arguments, size_t result) |
616 | { |
617 | return executeNumberNumber<T, UInt8>(block, arguments, result) |
618 | || executeNumberNumber<T, UInt16>(block, arguments, result) |
619 | || executeNumberNumber<T, UInt32>(block, arguments, result) |
620 | || executeNumberNumber<T, UInt64>(block, arguments, result) |
621 | || executeNumberNumber<T, Int8>(block, arguments, result) |
622 | || executeNumberNumber<T, Int16>(block, arguments, result) |
623 | || executeNumberNumber<T, Int32>(block, arguments, result) |
624 | || executeNumberNumber<T, Int64>(block, arguments, result) |
625 | || executeNumberNumber<T, Float32>(block, arguments, result) |
626 | || executeNumberNumber<T, Float64>(block, arguments, result) |
627 | || executeNumberNumber<T, Null>(block, arguments, result); |
628 | } |
629 | |
630 | template <typename T, typename U> |
631 | bool executeNumberNumber(Block & block, const ColumnNumbers & arguments, size_t result) |
632 | { |
633 | const ColumnArray * col_array = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments[0]).column.get()); |
634 | |
635 | if (!col_array) |
636 | return false; |
637 | |
638 | const ColumnVector<T> * col_nested = checkAndGetColumn<ColumnVector<T>>(&col_array->getData()); |
639 | |
640 | if (!col_nested) |
641 | return false; |
642 | |
643 | auto col_res = ResultColumnType::create(); |
644 | |
645 | /// Null maps of the 1st and second function arguments, |
646 | /// if it applies. |
647 | const PaddedPODArray<UInt8> * null_map_data = nullptr; |
648 | const PaddedPODArray<UInt8> * null_map_item = nullptr; |
649 | |
650 | if (arguments.size() > 2) |
651 | { |
652 | const auto & null_map1 = block.getByPosition(arguments[2]).column; |
653 | if (null_map1) |
654 | null_map_data = &assert_cast<const ColumnUInt8 &>(*null_map1).getData(); |
655 | |
656 | const auto & null_map2 = block.getByPosition(arguments[3]).column; |
657 | if (null_map2) |
658 | null_map_item = &assert_cast<const ColumnUInt8 &>(*null_map2).getData(); |
659 | } |
660 | |
661 | const auto item_arg = block.getByPosition(arguments[1]).column.get(); |
662 | |
663 | if (item_arg->onlyNull()) |
664 | ArrayIndexNumNullImpl<T, IndexConv>::vector(col_nested->getData(), col_array->getOffsets(), |
665 | col_res->getData(), null_map_data); |
666 | else if (const auto item_arg_const = checkAndGetColumnConst<ColumnVector<U>>(item_arg)) |
667 | ArrayIndexNumImpl<T, U, IndexConv>::vector(col_nested->getData(), col_array->getOffsets(), |
668 | item_arg_const->template getValue<U>(), col_res->getData(), null_map_data, nullptr); |
669 | else if (const auto item_arg_vector = checkAndGetColumn<ColumnVector<U>>(item_arg)) |
670 | ArrayIndexNumImpl<T, U, IndexConv>::vector(col_nested->getData(), col_array->getOffsets(), |
671 | item_arg_vector->getData(), col_res->getData(), null_map_data, null_map_item); |
672 | else |
673 | return false; |
674 | |
675 | block.getByPosition(result).column = std::move(col_res); |
676 | return true; |
677 | } |
678 | |
679 | bool executeString(Block & block, const ColumnNumbers & arguments, size_t result) |
680 | { |
681 | const ColumnArray * col_array = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments[0]).column.get()); |
682 | |
683 | if (!col_array) |
684 | return false; |
685 | |
686 | const ColumnString * col_nested = checkAndGetColumn<ColumnString>(&col_array->getData()); |
687 | |
688 | if (!col_nested) |
689 | return false; |
690 | |
691 | auto col_res = ResultColumnType::create(); |
692 | |
693 | /// Null maps of the 1st and second function arguments, |
694 | /// if it applies. |
695 | const PaddedPODArray<UInt8> * null_map_data = nullptr; |
696 | const PaddedPODArray<UInt8> * null_map_item = nullptr; |
697 | |
698 | if (arguments.size() > 2) |
699 | { |
700 | const auto & col1 = block.getByPosition(arguments[2]).column; |
701 | if (col1) |
702 | null_map_data = &assert_cast<const ColumnUInt8 &>(*col1).getData(); |
703 | |
704 | const auto & col2 = block.getByPosition(arguments[3]).column; |
705 | if (col2) |
706 | null_map_item = &assert_cast<const ColumnUInt8 &>(*col2).getData(); |
707 | } |
708 | |
709 | const auto item_arg = block.getByPosition(arguments[1]).column.get(); |
710 | |
711 | if (item_arg->onlyNull()) |
712 | { |
713 | ArrayIndexStringNullImpl<IndexConv>::vector_const(col_nested->getChars(), col_array->getOffsets(), |
714 | col_nested->getOffsets(), col_res->getData(), null_map_data); |
715 | } |
716 | else if (const auto item_arg_const = checkAndGetColumnConstStringOrFixedString(item_arg)) |
717 | { |
718 | const ColumnString * item_const_string = checkAndGetColumn<ColumnString>(&item_arg_const->getDataColumn()); |
719 | const ColumnFixedString * item_const_fixedstring = checkAndGetColumn<ColumnFixedString>(&item_arg_const->getDataColumn()); |
720 | |
721 | if (item_const_string) |
722 | ArrayIndexStringImpl<IndexConv>::vector_const(col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), |
723 | item_const_string->getChars(), item_const_string->getDataAt(0).size, |
724 | col_res->getData(), null_map_data); |
725 | else if (item_const_fixedstring) |
726 | ArrayIndexStringImpl<IndexConv>::vector_const(col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), |
727 | item_const_fixedstring->getChars(), item_const_fixedstring->getN(), |
728 | col_res->getData(), null_map_data); |
729 | else |
730 | throw Exception("Logical error: ColumnConst contains not String nor FixedString column" , ErrorCodes::ILLEGAL_COLUMN); |
731 | } |
732 | else if (const auto item_arg_vector = checkAndGetColumn<ColumnString>(item_arg)) |
733 | { |
734 | ArrayIndexStringImpl<IndexConv>::vector_vector(col_nested->getChars(), col_array->getOffsets(), |
735 | col_nested->getOffsets(), item_arg_vector->getChars(), item_arg_vector->getOffsets(), |
736 | col_res->getData(), null_map_data, null_map_item); |
737 | } |
738 | else |
739 | return false; |
740 | |
741 | block.getByPosition(result).column = std::move(col_res); |
742 | return true; |
743 | } |
744 | |
745 | bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result) |
746 | { |
747 | const ColumnConst * col_array = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[0]).column.get()); |
748 | |
749 | if (!col_array) |
750 | return false; |
751 | |
752 | Array arr = col_array->getValue<Array>(); |
753 | |
754 | const auto item_arg = block.getByPosition(arguments[1]).column.get(); |
755 | if (isColumnConst(*item_arg)) |
756 | { |
757 | typename IndexConv::ResultType current = 0; |
758 | const auto & value = (*item_arg)[0]; |
759 | |
760 | for (size_t i = 0, size = arr.size(); i < size; ++i) |
761 | { |
762 | if (applyVisitor(FieldVisitorAccurateEquals(), arr[i], value)) |
763 | { |
764 | if (!IndexConv::apply(i, current)) |
765 | break; |
766 | } |
767 | } |
768 | |
769 | block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst( |
770 | item_arg->size(), |
771 | static_cast<typename IndexConv::ResultType>(current)); |
772 | } |
773 | else |
774 | { |
775 | /// Null map of the 2nd function argument, if it applies. |
776 | const PaddedPODArray<UInt8> * null_map = nullptr; |
777 | |
778 | if (arguments.size() > 2) |
779 | { |
780 | const auto & col = block.getByPosition(arguments[3]).column; |
781 | if (col) |
782 | null_map = &assert_cast<const ColumnUInt8 &>(*col).getData(); |
783 | } |
784 | |
785 | const auto size = item_arg->size(); |
786 | auto col_res = ResultColumnType::create(size); |
787 | |
788 | auto & data = col_res->getData(); |
789 | |
790 | for (size_t row = 0; row < size; ++row) |
791 | { |
792 | const auto & value = (*item_arg)[row]; |
793 | |
794 | data[row] = 0; |
795 | for (size_t i = 0, arr_size = arr.size(); i < arr_size; ++i) |
796 | { |
797 | bool hit = false; |
798 | |
799 | if (arr[i].isNull()) |
800 | { |
801 | if (null_map && (*null_map)[row]) |
802 | hit = true; |
803 | } |
804 | else if (applyVisitor(FieldVisitorAccurateEquals(), arr[i], value)) |
805 | hit = true; |
806 | |
807 | if (hit) |
808 | { |
809 | if (!IndexConv::apply(i, data[row])) |
810 | break; |
811 | } |
812 | } |
813 | } |
814 | |
815 | block.getByPosition(result).column = std::move(col_res); |
816 | } |
817 | |
818 | return true; |
819 | } |
820 | |
821 | bool executeGeneric(Block & block, const ColumnNumbers & arguments, size_t result) |
822 | { |
823 | const ColumnArray * col_array = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments[0]).column.get()); |
824 | |
825 | if (!col_array) |
826 | return false; |
827 | |
828 | const IColumn & col_nested = col_array->getData(); |
829 | const IColumn & item_arg = *block.getByPosition(arguments[1]).column; |
830 | |
831 | auto col_res = ResultColumnType::create(); |
832 | |
833 | /// Null maps of the 1st and second function arguments, |
834 | /// if it applies. |
835 | const PaddedPODArray<UInt8> * null_map_data = nullptr; |
836 | const PaddedPODArray<UInt8> * null_map_item = nullptr; |
837 | |
838 | if (arguments.size() > 2) |
839 | { |
840 | const auto & null_map1 = block.getByPosition(arguments[2]).column; |
841 | if (null_map1) |
842 | null_map_data = &assert_cast<const ColumnUInt8 &>(*null_map1).getData(); |
843 | |
844 | const auto & null_map2 = block.getByPosition(arguments[3]).column; |
845 | if (null_map2) |
846 | null_map_item = &assert_cast<const ColumnUInt8 &>(*null_map2).getData(); |
847 | } |
848 | |
849 | if (item_arg.onlyNull()) |
850 | ArrayIndexGenericNullImpl<IndexConv>::vector(col_nested, col_array->getOffsets(), |
851 | col_res->getData(), null_map_data); |
852 | else if (isColumnConst(item_arg)) |
853 | ArrayIndexGenericImpl<IndexConv, true>::vector(col_nested, col_array->getOffsets(), |
854 | assert_cast<const ColumnConst &>(item_arg).getDataColumn(), col_res->getData(), /// TODO This is wrong. |
855 | null_map_data, nullptr); |
856 | else |
857 | { |
858 | ArrayIndexGenericImpl<IndexConv, false>::vector( |
859 | col_nested, col_array->getOffsets(), *item_arg.convertToFullColumnIfConst(), col_res->getData(), |
860 | null_map_data, null_map_item); |
861 | } |
862 | |
863 | block.getByPosition(result).column = std::move(col_res); |
864 | return true; |
865 | } |
866 | |
867 | |
868 | public: |
869 | /// Get function name. |
870 | String getName() const override |
871 | { |
872 | return name; |
873 | } |
874 | |
875 | bool useDefaultImplementationForNulls() const override { return false; } |
876 | |
877 | size_t getNumberOfArguments() const override { return 2; } |
878 | |
879 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
880 | { |
881 | const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get()); |
882 | if (!array_type) |
883 | throw Exception("First argument for function " + getName() + " must be an array." , |
884 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
885 | |
886 | if (!arguments[1]->onlyNull()) |
887 | { |
888 | if (!allowArrayIndex(array_type->getNestedType(), arguments[1])) |
889 | throw Exception("Types of array and 2nd argument of function " |
890 | + getName() + " must be identical up to nullability or numeric types or Enum and numeric type. Passed: " |
891 | + arguments[0]->getName() + " and " + arguments[1]->getName() + "." , |
892 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
893 | } |
894 | |
895 | return std::make_shared<DataTypeNumber<typename IndexConv::ResultType>>(); |
896 | } |
897 | |
898 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override |
899 | { |
900 | /// If one or both arguments passed to this function are nullable, |
901 | /// we create a new block that contains non-nullable arguments: |
902 | /// - if the 1st argument is a non-constant array of nullable values, |
903 | /// it is turned into a non-constant array of ordinary values + a null |
904 | /// byte map; |
905 | /// - if the 2nd argument is a nullable value, it is turned into an |
906 | /// ordinary value + a null byte map. |
907 | /// Note that since constant arrays have quite a specific structure |
908 | /// (they are vectors of Fields, which may represent the NULL value), |
909 | /// they do not require any preprocessing |
910 | |
911 | const ColumnArray * col_array = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments[0]).column.get()); |
912 | |
913 | const ColumnNullable * nullable = nullptr; |
914 | if (col_array) |
915 | nullable = checkAndGetColumn<ColumnNullable>(col_array->getData()); |
916 | |
917 | auto & arg_column = block.getByPosition(arguments[1]).column; |
918 | |
919 | const ColumnNullable * arg_nullable = nullptr; |
920 | arg_nullable = checkAndGetColumn<ColumnNullable>(*arg_column); |
921 | |
922 | if (!nullable && !arg_nullable) |
923 | { |
924 | /// Simple case: no nullable values passeded. |
925 | perform(block, arguments, result); |
926 | } |
927 | else |
928 | { |
929 | /// Template of the block on which we will actually apply the function. |
930 | /// Its elements will be filled later. |
931 | Block source_block = |
932 | { |
933 | /// 1st function argument (data) |
934 | { |
935 | }, |
936 | |
937 | /// 2nd function argument |
938 | { |
939 | }, |
940 | |
941 | /// 1st argument null map |
942 | { |
943 | }, |
944 | |
945 | /// 2nd argument null map |
946 | { |
947 | }, |
948 | |
949 | /// Function result. |
950 | { |
951 | nullptr, |
952 | block.getByPosition(result).type, |
953 | "" |
954 | } |
955 | }; |
956 | |
957 | if (nullable) |
958 | { |
959 | const auto & nested_col = nullable->getNestedColumnPtr(); |
960 | |
961 | auto & data = source_block.getByPosition(0); |
962 | data.column = ColumnArray::create(nested_col, col_array->getOffsetsPtr()); |
963 | data.type = std::make_shared<DataTypeArray>( |
964 | static_cast<const DataTypeNullable &>( |
965 | *static_cast<const DataTypeArray &>(*block.getByPosition(arguments[0]).type).getNestedType()).getNestedType()); |
966 | |
967 | auto & null_map = source_block.getByPosition(2); |
968 | null_map.column = nullable->getNullMapColumnPtr(); |
969 | null_map.type = std::make_shared<DataTypeUInt8>(); |
970 | } |
971 | else |
972 | { |
973 | auto & data = source_block.getByPosition(0); |
974 | data = block.getByPosition(arguments[0]); |
975 | } |
976 | |
977 | if (arg_nullable) |
978 | { |
979 | auto & arg = source_block.getByPosition(1); |
980 | arg.column = arg_nullable->getNestedColumnPtr(); |
981 | arg.type = static_cast<const DataTypeNullable &>(*block.getByPosition(arguments[1]).type).getNestedType(); |
982 | |
983 | auto & null_map = source_block.getByPosition(3); |
984 | null_map.column = arg_nullable->getNullMapColumnPtr(); |
985 | null_map.type = std::make_shared<DataTypeUInt8>(); |
986 | } |
987 | else |
988 | { |
989 | auto & arg = source_block.getByPosition(1); |
990 | arg = block.getByPosition(arguments[1]); |
991 | } |
992 | |
993 | /// Now perform the function. |
994 | perform(source_block, {0, 1, 2, 3}, 4); |
995 | |
996 | /// Move the result to its final position. |
997 | const ColumnWithTypeAndName & source_col = source_block.getByPosition(4); |
998 | ColumnWithTypeAndName & dest_col = block.getByPosition(result); |
999 | dest_col.column = std::move(source_col.column); |
1000 | } |
1001 | } |
1002 | |
1003 | private: |
1004 | /// Perform function on the given block. Internal version. |
1005 | void perform(Block & block, const ColumnNumbers & arguments, size_t result) |
1006 | { |
1007 | if (!(executeNumber<UInt8>(block, arguments, result) |
1008 | || executeNumber<UInt16>(block, arguments, result) |
1009 | || executeNumber<UInt32>(block, arguments, result) |
1010 | || executeNumber<UInt64>(block, arguments, result) |
1011 | || executeNumber<Int8>(block, arguments, result) |
1012 | || executeNumber<Int16>(block, arguments, result) |
1013 | || executeNumber<Int32>(block, arguments, result) |
1014 | || executeNumber<Int64>(block, arguments, result) |
1015 | || executeNumber<Float32>(block, arguments, result) |
1016 | || executeNumber<Float64>(block, arguments, result) |
1017 | || executeConst(block, arguments, result) |
1018 | || executeString(block, arguments, result) |
1019 | || executeGeneric(block, arguments, result))) |
1020 | throw Exception{"Illegal column " + block.getByPosition(arguments[0]).column->getName() |
1021 | + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; |
1022 | } |
1023 | }; |
1024 | |
1025 | } |
1026 | |