1#include <mutex>
2#include <Common/FieldVisitors.h>
3#include <DataTypes/DataTypesNumber.h>
4#include <DataTypes/DataTypeString.h>
5#include <DataTypes/DataTypeArray.h>
6#include <Columns/ColumnString.h>
7#include <Columns/ColumnArray.h>
8#include <Columns/ColumnConst.h>
9#include <Columns/ColumnsNumber.h>
10#include <Common/Arena.h>
11#include <Common/HashTable/HashMap.h>
12#include <Common/typeid_cast.h>
13#include <common/StringRef.h>
14#include <Functions/IFunctionImpl.h>
15#include <Functions/FunctionHelpers.h>
16#include <Functions/FunctionFactory.h>
17#include <DataTypes/getLeastSupertype.h>
18
19
20namespace DB
21{
22
23namespace ErrorCodes
24{
25 extern const int BAD_ARGUMENTS;
26 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
27 extern const int ILLEGAL_COLUMN;
28}
29
30
31/** transform(x, from_array, to_array[, default]) - convert x according to an explicitly passed match.
32 */
33
34/** transform(x, [from...], [to...], default)
35 * - converts the values according to the explicitly specified mapping.
36 *
37 * x - what to transform.
38 * from - a constant array of values for the transformation.
39 * to - a constant array of values into which values from `from` must be transformed.
40 * default - what value to use if x is not equal to any of the values in `from`.
41 * `from` and `to` - arrays of the same size.
42 *
43 * Types:
44 * transform(T, Array(T), Array(U), U) -> U
45 *
46 * transform(x, [from...], [to...])
47 * - if `default` is not specified, then for values of `x` for which there is no corresponding element in `from`, the unchanged value of `x` is returned.
48 *
49 * Types:
50 * transform(T, Array(T), Array(T)) -> T
51 *
52 * Note: the implementation is rather cumbersome.
53 */
54class FunctionTransform : public IFunction
55{
56public:
57 static constexpr auto name = "transform";
58 static FunctionPtr create(const Context &) { return std::make_shared<FunctionTransform>(); }
59
60 String getName() const override
61 {
62 return name;
63 }
64
65 bool isVariadic() const override { return true; }
66 size_t getNumberOfArguments() const override { return 0; }
67 bool useDefaultImplementationForConstants() const override { return true; }
68 ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
69
70 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
71 {
72 const auto args_size = arguments.size();
73 if (args_size != 3 && args_size != 4)
74 throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed " + toString(args_size) + ", should be 3 or 4",
75 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
76
77 const DataTypePtr & type_x = arguments[0];
78
79 if (!type_x->isValueRepresentedByNumber() && !isString(type_x))
80 throw Exception{"Unsupported type " + type_x->getName()
81 + " of first argument of function " + getName()
82 + ", must be numeric type or Date/DateTime or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
83
84 const DataTypeArray * type_arr_from = checkAndGetDataType<DataTypeArray>(arguments[1].get());
85
86 if (!type_arr_from)
87 throw Exception{"Second argument of function " + getName()
88 + ", must be array of source values to transform from.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
89
90 const auto type_arr_from_nested = type_arr_from->getNestedType();
91
92 if ((type_x->isValueRepresentedByNumber() != type_arr_from_nested->isValueRepresentedByNumber())
93 || (isString(type_x) != isString(type_arr_from_nested)))
94 {
95 throw Exception{"First argument and elements of array of second argument of function " + getName()
96 + " must have compatible types: both numeric or both strings.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
97 }
98
99 const DataTypeArray * type_arr_to = checkAndGetDataType<DataTypeArray>(arguments[2].get());
100
101 if (!type_arr_to)
102 throw Exception{"Third argument of function " + getName()
103 + ", must be array of destination values to transform to.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
104
105 const DataTypePtr & type_arr_to_nested = type_arr_to->getNestedType();
106
107 if (args_size == 3)
108 {
109 if ((type_x->isValueRepresentedByNumber() != type_arr_to_nested->isValueRepresentedByNumber())
110 || (isString(type_x) != isString(type_arr_to_nested)))
111 throw Exception{"Function " + getName()
112 + " has signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types.",
113 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
114
115 return type_x;
116 }
117 else
118 {
119 const DataTypePtr & type_default = arguments[3];
120
121 if (!type_default->isValueRepresentedByNumber() && !isString(type_default))
122 throw Exception{"Unsupported type " + type_default->getName()
123 + " of fourth argument (default value) of function " + getName()
124 + ", must be numeric type or Date/DateTime or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
125
126 bool default_is_string = WhichDataType(type_default).isString();
127 bool nested_is_string = WhichDataType(type_arr_to_nested).isString();
128
129 if ((type_default->isValueRepresentedByNumber() != type_arr_to_nested->isValueRepresentedByNumber())
130 || (default_is_string != nested_is_string))
131 throw Exception{"Function " + getName()
132 + " have signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types.",
133 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
134
135 if (type_arr_to_nested->isValueRepresentedByNumber() && type_default->isValueRepresentedByNumber())
136 {
137 /// We take the smallest common type for the elements of the array of values `to` and for `default`.
138 return getLeastSupertype({type_arr_to_nested, type_default});
139 }
140
141 /// TODO More checks.
142 return type_arr_to_nested;
143 }
144 }
145
146 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
147 {
148 const ColumnConst * array_from = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[1]).column.get());
149 const ColumnConst * array_to = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[2]).column.get());
150
151 if (!array_from || !array_to)
152 throw Exception{"Second and third arguments of function " + getName() + " must be constant arrays.", ErrorCodes::ILLEGAL_COLUMN};
153
154 initialize(array_from->getValue<Array>(), array_to->getValue<Array>(), block, arguments);
155
156 const auto in = block.getByPosition(arguments.front()).column.get();
157
158 if (isColumnConst(*in))
159 {
160 executeConst(block, arguments, result, input_rows_count);
161 return;
162 }
163
164 const IColumn * default_column = nullptr;
165 if (arguments.size() == 4)
166 default_column = block.getByPosition(arguments[3]).column.get();
167
168 auto column_result = block.getByPosition(result).type->createColumn();
169 auto out = column_result.get();
170
171 if (!executeNum<UInt8>(in, out, default_column)
172 && !executeNum<UInt16>(in, out, default_column)
173 && !executeNum<UInt32>(in, out, default_column)
174 && !executeNum<UInt64>(in, out, default_column)
175 && !executeNum<Int8>(in, out, default_column)
176 && !executeNum<Int16>(in, out, default_column)
177 && !executeNum<Int32>(in, out, default_column)
178 && !executeNum<Int64>(in, out, default_column)
179 && !executeNum<Float32>(in, out, default_column)
180 && !executeNum<Float64>(in, out, default_column)
181 && !executeString(in, out, default_column))
182 {
183 throw Exception{"Illegal column " + in->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
184 }
185
186 block.getByPosition(result).column = std::move(column_result);
187 }
188
189private:
190 void executeConst(Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
191 {
192 /// Materialize the input column and compute the function as usual.
193
194 Block tmp_block;
195 ColumnNumbers tmp_arguments;
196
197 tmp_block.insert(block.getByPosition(arguments[0]));
198 tmp_block.getByPosition(0).column = tmp_block.getByPosition(0).column->cloneResized(input_rows_count)->convertToFullColumnIfConst();
199 tmp_arguments.push_back(0);
200
201 for (size_t i = 1; i < arguments.size(); ++i)
202 {
203 tmp_block.insert(block.getByPosition(arguments[i]));
204 tmp_arguments.push_back(i);
205 }
206
207 auto impl = FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(std::make_shared<FunctionTransform>()))
208 .build(tmp_block.getColumnsWithTypeAndName());
209
210 tmp_block.insert(block.getByPosition(result));
211 size_t tmp_result = arguments.size();
212
213 impl->execute(tmp_block, tmp_arguments, tmp_result, input_rows_count);
214
215 block.getByPosition(result).column = tmp_block.getByPosition(tmp_result).column;
216 }
217
218 template <typename T>
219 bool executeNum(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * default_untyped)
220 {
221 if (const auto in = checkAndGetColumn<ColumnVector<T>>(in_untyped))
222 {
223 if (!default_untyped)
224 {
225 auto out = typeid_cast<ColumnVector<T> *>(out_untyped);
226 if (!out)
227 {
228 throw Exception{"Illegal column " + out_untyped->getName() + " of elements of array of third argument of function " + getName()
229 + ", must be " + in->getName(), ErrorCodes::ILLEGAL_COLUMN};
230 }
231
232 executeImplNumToNum<T>(in->getData(), out->getData());
233 }
234 else if (isColumnConst(*default_untyped))
235 {
236 if (!executeNumToNumWithConstDefault<T, UInt8>(in, out_untyped)
237 && !executeNumToNumWithConstDefault<T, UInt16>(in, out_untyped)
238 && !executeNumToNumWithConstDefault<T, UInt32>(in, out_untyped)
239 && !executeNumToNumWithConstDefault<T, UInt64>(in, out_untyped)
240 && !executeNumToNumWithConstDefault<T, Int8>(in, out_untyped)
241 && !executeNumToNumWithConstDefault<T, Int16>(in, out_untyped)
242 && !executeNumToNumWithConstDefault<T, Int32>(in, out_untyped)
243 && !executeNumToNumWithConstDefault<T, Int64>(in, out_untyped)
244 && !executeNumToNumWithConstDefault<T, Float32>(in, out_untyped)
245 && !executeNumToNumWithConstDefault<T, Float64>(in, out_untyped)
246 && !executeNumToStringWithConstDefault<T>(in, out_untyped))
247 {
248 throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(),
249 ErrorCodes::ILLEGAL_COLUMN};
250 }
251 }
252 else
253 {
254 if (!executeNumToNumWithNonConstDefault<T, UInt8>(in, out_untyped, default_untyped)
255 && !executeNumToNumWithNonConstDefault<T, UInt16>(in, out_untyped, default_untyped)
256 && !executeNumToNumWithNonConstDefault<T, UInt32>(in, out_untyped, default_untyped)
257 && !executeNumToNumWithNonConstDefault<T, UInt64>(in, out_untyped, default_untyped)
258 && !executeNumToNumWithNonConstDefault<T, Int8>(in, out_untyped, default_untyped)
259 && !executeNumToNumWithNonConstDefault<T, Int16>(in, out_untyped, default_untyped)
260 && !executeNumToNumWithNonConstDefault<T, Int32>(in, out_untyped, default_untyped)
261 && !executeNumToNumWithNonConstDefault<T, Int64>(in, out_untyped, default_untyped)
262 && !executeNumToNumWithNonConstDefault<T, Float32>(in, out_untyped, default_untyped)
263 && !executeNumToNumWithNonConstDefault<T, Float64>(in, out_untyped, default_untyped)
264 && !executeNumToStringWithNonConstDefault<T>(in, out_untyped, default_untyped))
265 {
266 throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(),
267 ErrorCodes::ILLEGAL_COLUMN};
268 }
269 }
270
271 return true;
272 }
273
274 return false;
275 }
276
277 bool executeString(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * default_untyped)
278 {
279 if (const auto in = checkAndGetColumn<ColumnString>(in_untyped))
280 {
281 if (!default_untyped)
282 {
283 if (!executeStringToString(in, out_untyped))
284 throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(),
285 ErrorCodes::ILLEGAL_COLUMN};
286 }
287 else if (isColumnConst(*default_untyped))
288 {
289 if (!executeStringToNumWithConstDefault<UInt8>(in, out_untyped)
290 && !executeStringToNumWithConstDefault<UInt16>(in, out_untyped)
291 && !executeStringToNumWithConstDefault<UInt32>(in, out_untyped)
292 && !executeStringToNumWithConstDefault<UInt64>(in, out_untyped)
293 && !executeStringToNumWithConstDefault<Int8>(in, out_untyped)
294 && !executeStringToNumWithConstDefault<Int16>(in, out_untyped)
295 && !executeStringToNumWithConstDefault<Int32>(in, out_untyped)
296 && !executeStringToNumWithConstDefault<Int64>(in, out_untyped)
297 && !executeStringToNumWithConstDefault<Float32>(in, out_untyped)
298 && !executeStringToNumWithConstDefault<Float64>(in, out_untyped)
299 && !executeStringToStringWithConstDefault(in, out_untyped))
300 {
301 throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(),
302 ErrorCodes::ILLEGAL_COLUMN};
303 }
304 }
305 else
306 {
307 if (!executeStringToNumWithNonConstDefault<UInt8>(in, out_untyped, default_untyped)
308 && !executeStringToNumWithNonConstDefault<UInt16>(in, out_untyped, default_untyped)
309 && !executeStringToNumWithNonConstDefault<UInt32>(in, out_untyped, default_untyped)
310 && !executeStringToNumWithNonConstDefault<UInt64>(in, out_untyped, default_untyped)
311 && !executeStringToNumWithNonConstDefault<Int8>(in, out_untyped, default_untyped)
312 && !executeStringToNumWithNonConstDefault<Int16>(in, out_untyped, default_untyped)
313 && !executeStringToNumWithNonConstDefault<Int32>(in, out_untyped, default_untyped)
314 && !executeStringToNumWithNonConstDefault<Int64>(in, out_untyped, default_untyped)
315 && !executeStringToNumWithNonConstDefault<Float32>(in, out_untyped, default_untyped)
316 && !executeStringToNumWithNonConstDefault<Float64>(in, out_untyped, default_untyped)
317 && !executeStringToStringWithNonConstDefault(in, out_untyped, default_untyped))
318 {
319 throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(),
320 ErrorCodes::ILLEGAL_COLUMN};
321 }
322 }
323
324 return true;
325 }
326
327 return false;
328 }
329
330 template <typename T, typename U>
331 bool executeNumToNumWithConstDefault(const ColumnVector<T> * in, IColumn * out_untyped)
332 {
333 auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
334 if (!out)
335 return false;
336
337 executeImplNumToNumWithConstDefault<T, U>(in->getData(), out->getData(), const_default_value.get<U>());
338 return true;
339 }
340
341 template <typename T, typename U>
342 bool executeNumToNumWithNonConstDefault(const ColumnVector<T> * in, IColumn * out_untyped, const IColumn * default_untyped)
343 {
344 auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
345 if (!out)
346 return false;
347
348 if (!executeNumToNumWithNonConstDefault2<T, U, UInt8>(in, out, default_untyped)
349 && !executeNumToNumWithNonConstDefault2<T, U, UInt16>(in, out, default_untyped)
350 && !executeNumToNumWithNonConstDefault2<T, U, UInt32>(in, out, default_untyped)
351 && !executeNumToNumWithNonConstDefault2<T, U, UInt64>(in, out, default_untyped)
352 && !executeNumToNumWithNonConstDefault2<T, U, Int8>(in, out, default_untyped)
353 && !executeNumToNumWithNonConstDefault2<T, U, Int16>(in, out, default_untyped)
354 && !executeNumToNumWithNonConstDefault2<T, U, Int32>(in, out, default_untyped)
355 && !executeNumToNumWithNonConstDefault2<T, U, Int64>(in, out, default_untyped)
356 && !executeNumToNumWithNonConstDefault2<T, U, Float32>(in, out, default_untyped)
357 && !executeNumToNumWithNonConstDefault2<T, U, Float64>(in, out, default_untyped))
358 {
359 throw Exception(
360 "Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(),
361 ErrorCodes::ILLEGAL_COLUMN);
362 }
363
364 return true;
365 }
366
367 template <typename T, typename U, typename V>
368 bool executeNumToNumWithNonConstDefault2(const ColumnVector<T> * in, ColumnVector<U> * out, const IColumn * default_untyped)
369 {
370 auto col_default = checkAndGetColumn<ColumnVector<V>>(default_untyped);
371 if (!col_default)
372 return false;
373
374 executeImplNumToNumWithNonConstDefault<T, U, V>(in->getData(), out->getData(), col_default->getData());
375 return true;
376 }
377
378 template <typename T>
379 bool executeNumToStringWithConstDefault(const ColumnVector<T> * in, IColumn * out_untyped)
380 {
381 auto out = typeid_cast<ColumnString *>(out_untyped);
382 if (!out)
383 return false;
384
385 const String & default_str = const_default_value.get<const String &>();
386 StringRef default_string_ref{default_str.data(), default_str.size() + 1};
387 executeImplNumToStringWithConstDefault<T>(in->getData(), out->getChars(), out->getOffsets(), default_string_ref);
388 return true;
389 }
390
391 template <typename T>
392 bool executeNumToStringWithNonConstDefault(const ColumnVector<T> * in, IColumn * out_untyped, const IColumn * default_untyped)
393 {
394 auto out = typeid_cast<ColumnString *>(out_untyped);
395 if (!out)
396 return false;
397
398 auto default_col = checkAndGetColumn<ColumnString>(default_untyped);
399 if (!default_col)
400 {
401 throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(),
402 ErrorCodes::ILLEGAL_COLUMN};
403 }
404
405 executeImplNumToStringWithNonConstDefault<T>(
406 in->getData(),
407 out->getChars(), out->getOffsets(),
408 default_col->getChars(), default_col->getOffsets());
409
410 return true;
411 }
412
413 template <typename U>
414 bool executeStringToNumWithConstDefault(const ColumnString * in, IColumn * out_untyped)
415 {
416 auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
417 if (!out)
418 return false;
419
420 executeImplStringToNumWithConstDefault<U>(in->getChars(), in->getOffsets(), out->getData(), const_default_value.get<U>());
421 return true;
422 }
423
424 template <typename U>
425 bool executeStringToNumWithNonConstDefault(const ColumnString * in, IColumn * out_untyped, const IColumn * default_untyped)
426 {
427 auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
428 if (!out)
429 return false;
430
431 if (!executeStringToNumWithNonConstDefault2<U, UInt8>(in, out, default_untyped)
432 && !executeStringToNumWithNonConstDefault2<U, UInt16>(in, out, default_untyped)
433 && !executeStringToNumWithNonConstDefault2<U, UInt32>(in, out, default_untyped)
434 && !executeStringToNumWithNonConstDefault2<U, UInt64>(in, out, default_untyped)
435 && !executeStringToNumWithNonConstDefault2<U, Int8>(in, out, default_untyped)
436 && !executeStringToNumWithNonConstDefault2<U, Int16>(in, out, default_untyped)
437 && !executeStringToNumWithNonConstDefault2<U, Int32>(in, out, default_untyped)
438 && !executeStringToNumWithNonConstDefault2<U, Int64>(in, out, default_untyped)
439 && !executeStringToNumWithNonConstDefault2<U, Float32>(in, out, default_untyped)
440 && !executeStringToNumWithNonConstDefault2<U, Float64>(in, out, default_untyped))
441 {
442 throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(),
443 ErrorCodes::ILLEGAL_COLUMN};
444 }
445
446 return true;
447 }
448
449 template <typename U, typename V>
450 bool executeStringToNumWithNonConstDefault2(const ColumnString * in, ColumnVector<U> * out, const IColumn * default_untyped)
451 {
452 auto col_default = checkAndGetColumn<ColumnVector<V>>(default_untyped);
453 if (!col_default)
454 return false;
455
456 executeImplStringToNumWithNonConstDefault<U, V>(in->getChars(), in->getOffsets(), out->getData(), col_default->getData());
457 return true;
458 }
459
460 bool executeStringToString(const ColumnString * in, IColumn * out_untyped)
461 {
462 auto out = typeid_cast<ColumnString *>(out_untyped);
463 if (!out)
464 return false;
465
466 executeImplStringToString(in->getChars(), in->getOffsets(), out->getChars(), out->getOffsets());
467 return true;
468 }
469
470 bool executeStringToStringWithConstDefault(const ColumnString * in, IColumn * out_untyped)
471 {
472 auto out = typeid_cast<ColumnString *>(out_untyped);
473 if (!out)
474 return false;
475
476 const String & default_str = const_default_value.get<const String &>();
477 StringRef default_string_ref{default_str.data(), default_str.size() + 1};
478 executeImplStringToStringWithConstDefault(in->getChars(), in->getOffsets(), out->getChars(), out->getOffsets(), default_string_ref);
479 return true;
480 }
481
482 bool executeStringToStringWithNonConstDefault(const ColumnString * in, IColumn * out_untyped, const IColumn * default_untyped)
483 {
484 auto out = typeid_cast<ColumnString *>(out_untyped);
485 if (!out)
486 return false;
487
488 auto default_col = checkAndGetColumn<ColumnString>(default_untyped);
489 if (!default_col)
490 {
491 throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(),
492 ErrorCodes::ILLEGAL_COLUMN};
493 }
494
495 executeImplStringToStringWithNonConstDefault(
496 in->getChars(), in->getOffsets(),
497 out->getChars(), out->getOffsets(),
498 default_col->getChars(), default_col->getOffsets());
499
500 return true;
501 }
502
503
504 template <typename T, typename U>
505 void executeImplNumToNumWithConstDefault(const PaddedPODArray<T> & src, PaddedPODArray<U> & dst, U dst_default)
506 {
507 const auto & table = *table_num_to_num;
508 size_t size = src.size();
509 dst.resize(size);
510 for (size_t i = 0; i < size; ++i)
511 {
512 auto it = table.find(src[i]);
513 if (it)
514 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian.
515 else
516 dst[i] = dst_default;
517 }
518 }
519
520 template <typename T, typename U, typename V>
521 void executeImplNumToNumWithNonConstDefault(const PaddedPODArray<T> & src, PaddedPODArray<U> & dst, const PaddedPODArray<V> & dst_default)
522 {
523 const auto & table = *table_num_to_num;
524 size_t size = src.size();
525 dst.resize(size);
526 for (size_t i = 0; i < size; ++i)
527 {
528 auto it = table.find(src[i]);
529 if (it)
530 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian.
531 else
532 dst[i] = dst_default[i];
533 }
534 }
535
536 template <typename T>
537 void executeImplNumToNum(const PaddedPODArray<T> & src, PaddedPODArray<T> & dst)
538 {
539 const auto & table = *table_num_to_num;
540 size_t size = src.size();
541 dst.resize(size);
542 for (size_t i = 0; i < size; ++i)
543 {
544 auto it = table.find(src[i]);
545 if (it)
546 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
547 else
548 dst[i] = src[i];
549 }
550 }
551
552 template <typename T>
553 void executeImplNumToStringWithConstDefault(const PaddedPODArray<T> & src,
554 ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, StringRef dst_default)
555 {
556 const auto & table = *table_num_to_string;
557 size_t size = src.size();
558 dst_offsets.resize(size);
559 ColumnString::Offset current_dst_offset = 0;
560 for (size_t i = 0; i < size; ++i)
561 {
562 auto it = table.find(src[i]);
563 StringRef ref = it ? it->getMapped() : dst_default;
564 dst_data.resize(current_dst_offset + ref.size);
565 memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
566 current_dst_offset += ref.size;
567 dst_offsets[i] = current_dst_offset;
568 }
569 }
570
571 template <typename T>
572 void executeImplNumToStringWithNonConstDefault(const PaddedPODArray<T> & src,
573 ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets,
574 const ColumnString::Chars & dst_default_data, const ColumnString::Offsets & dst_default_offsets)
575 {
576 const auto & table = *table_num_to_string;
577 size_t size = src.size();
578 dst_offsets.resize(size);
579 ColumnString::Offset current_dst_offset = 0;
580 ColumnString::Offset current_dst_default_offset = 0;
581 for (size_t i = 0; i < size; ++i)
582 {
583 auto it = table.find(src[i]);
584 StringRef ref;
585
586 if (it)
587 ref = it->getMapped();
588 else
589 {
590 ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
591 ref.size = dst_default_offsets[i] - current_dst_default_offset;
592 }
593
594 dst_data.resize(current_dst_offset + ref.size);
595 memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
596 current_dst_offset += ref.size;
597 current_dst_default_offset = dst_default_offsets[i];
598 dst_offsets[i] = current_dst_offset;
599 }
600 }
601
602 template <typename U>
603 void executeImplStringToNumWithConstDefault(
604 const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
605 PaddedPODArray<U> & dst, U dst_default)
606 {
607 const auto & table = *table_string_to_num;
608 size_t size = src_offsets.size();
609 dst.resize(size);
610 ColumnString::Offset current_src_offset = 0;
611 for (size_t i = 0; i < size; ++i)
612 {
613 StringRef ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset};
614 current_src_offset = src_offsets[i];
615 auto it = table.find(ref);
616 if (it)
617 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
618 else
619 dst[i] = dst_default;
620 }
621 }
622
623 template <typename U, typename V>
624 void executeImplStringToNumWithNonConstDefault(
625 const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
626 PaddedPODArray<U> & dst, const PaddedPODArray<V> & dst_default)
627 {
628 const auto & table = *table_string_to_num;
629 size_t size = src_offsets.size();
630 dst.resize(size);
631 ColumnString::Offset current_src_offset = 0;
632 for (size_t i = 0; i < size; ++i)
633 {
634 StringRef ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset};
635 current_src_offset = src_offsets[i];
636 auto it = table.find(ref);
637 if (it)
638 memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
639 else
640 dst[i] = dst_default[i];
641 }
642 }
643
644 template <bool with_default>
645 void executeImplStringToStringWithOrWithoutConstDefault(
646 const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
647 ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, StringRef dst_default)
648 {
649 const auto & table = *table_string_to_string;
650 size_t size = src_offsets.size();
651 dst_offsets.resize(size);
652 ColumnString::Offset current_src_offset = 0;
653 ColumnString::Offset current_dst_offset = 0;
654 for (size_t i = 0; i < size; ++i)
655 {
656 StringRef src_ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset};
657 current_src_offset = src_offsets[i];
658
659 auto it = table.find(src_ref);
660
661 StringRef dst_ref = it ? it->getMapped() : (with_default ? dst_default : src_ref);
662 dst_data.resize(current_dst_offset + dst_ref.size);
663 memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
664 current_dst_offset += dst_ref.size;
665 dst_offsets[i] = current_dst_offset;
666 }
667 }
668
669 void executeImplStringToString(
670 const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
671 ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets)
672 {
673 executeImplStringToStringWithOrWithoutConstDefault<false>(src_data, src_offsets, dst_data, dst_offsets, {});
674 }
675
676 void executeImplStringToStringWithConstDefault(
677 const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
678 ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, StringRef dst_default)
679 {
680 executeImplStringToStringWithOrWithoutConstDefault<true>(src_data, src_offsets, dst_data, dst_offsets, dst_default);
681 }
682
683 void executeImplStringToStringWithNonConstDefault(
684 const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
685 ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets,
686 const ColumnString::Chars & dst_default_data, const ColumnString::Offsets & dst_default_offsets)
687 {
688 const auto & table = *table_string_to_string;
689 size_t size = src_offsets.size();
690 dst_offsets.resize(size);
691 ColumnString::Offset current_src_offset = 0;
692 ColumnString::Offset current_dst_offset = 0;
693 ColumnString::Offset current_dst_default_offset = 0;
694 for (size_t i = 0; i < size; ++i)
695 {
696 StringRef src_ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset};
697 current_src_offset = src_offsets[i];
698
699 auto it = table.find(src_ref);
700 StringRef dst_ref;
701
702 if (it)
703 dst_ref = it->getMapped();
704 else
705 {
706 dst_ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
707 dst_ref.size = dst_default_offsets[i] - current_dst_default_offset;
708 }
709
710 dst_data.resize(current_dst_offset + dst_ref.size);
711 memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
712 current_dst_offset += dst_ref.size;
713 current_dst_default_offset = dst_default_offsets[i];
714 dst_offsets[i] = current_dst_offset;
715 }
716 }
717
718
719 /// Different versions of the hash tables to implement the mapping.
720
721 using NumToNum = HashMap<UInt64, UInt64, HashCRC32<UInt64>>;
722 using NumToString = HashMap <UInt64, StringRef, HashCRC32<UInt64>>; /// Everywhere StringRef's with trailing zero.
723 using StringToNum = HashMap<StringRef, UInt64, StringRefHash>;
724 using StringToString = HashMap<StringRef, StringRef, StringRefHash>;
725
726 std::unique_ptr<NumToNum> table_num_to_num;
727 std::unique_ptr<NumToString> table_num_to_string;
728 std::unique_ptr<StringToNum> table_string_to_num;
729 std::unique_ptr<StringToString> table_string_to_string;
730
731 Arena string_pool;
732
733 Field const_default_value; /// Null, if not specified.
734
735 std::atomic<bool> initialized {false};
736 std::mutex mutex;
737
738 /// Can be called from different threads. It works only on the first call.
739 void initialize(const Array & from, const Array & to, Block & block, const ColumnNumbers & arguments)
740 {
741 if (initialized)
742 return;
743
744 const size_t size = from.size();
745 if (0 == size)
746 throw Exception{"Empty arrays are illegal in function " + getName(), ErrorCodes::BAD_ARGUMENTS};
747
748 std::lock_guard lock(mutex);
749
750 if (initialized)
751 return;
752
753 if (size != to.size())
754 throw Exception{"Second and third arguments of function " + getName() + " must be arrays of same size", ErrorCodes::BAD_ARGUMENTS};
755
756 Array converted_to;
757 const Array * used_to = &to;
758
759 /// Whether the default value is set.
760
761 if (arguments.size() == 4)
762 {
763 const IColumn * default_col = block.getByPosition(arguments[3]).column.get();
764 const ColumnConst * const_default_col = typeid_cast<const ColumnConst *>(default_col);
765
766 if (const_default_col)
767 const_default_value = (*const_default_col)[0];
768
769 /// Do we need to convert the elements `to` and `default_value` to the smallest common type that is Float64?
770 bool default_col_is_float =
771 checkColumn<ColumnFloat32>(default_col)
772 || checkColumn<ColumnFloat64>(default_col)
773 || checkColumnConst<ColumnFloat32>(default_col)
774 || checkColumnConst<ColumnFloat64>(default_col);
775
776 bool to_is_float = to[0].getType() == Field::Types::Float64;
777
778 if (default_col_is_float && !to_is_float)
779 {
780 converted_to.resize(size);
781 for (size_t i = 0; i < size; ++i)
782 converted_to[i] = applyVisitor(FieldVisitorConvertToNumber<Float64>(), to[i]);
783 used_to = &converted_to;
784 }
785 else if (!default_col_is_float && to_is_float)
786 {
787 if (const_default_col)
788 const_default_value = applyVisitor(FieldVisitorConvertToNumber<Float64>(), const_default_value);
789 }
790 }
791
792 /// Note: Doesn't check the duplicates in the `from` array.
793
794 if (from[0].getType() != Field::Types::String && to[0].getType() != Field::Types::String)
795 {
796 table_num_to_num = std::make_unique<NumToNum>();
797 auto & table = *table_num_to_num;
798 for (size_t i = 0; i < size; ++i)
799 {
800 // Field may be of Float type, but for the purpose of bitwise
801 // equality we can treat them as UInt64, hence the reinterpret().
802 table[from[i].reinterpret<UInt64>()] = (*used_to)[i].reinterpret<UInt64>();
803 }
804 }
805 else if (from[0].getType() != Field::Types::String && to[0].getType() == Field::Types::String)
806 {
807 table_num_to_string = std::make_unique<NumToString>();
808 auto & table = *table_num_to_string;
809 for (size_t i = 0; i < size; ++i)
810 {
811 const String & str_to = to[i].get<const String &>();
812 StringRef ref{string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
813 table[from[i].reinterpret<UInt64>()] = ref;
814 }
815 }
816 else if (from[0].getType() == Field::Types::String && to[0].getType() != Field::Types::String)
817 {
818 table_string_to_num = std::make_unique<StringToNum>();
819 auto & table = *table_string_to_num;
820 for (size_t i = 0; i < size; ++i)
821 {
822 const String & str_from = from[i].get<const String &>();
823 StringRef ref{string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
824 table[ref] = (*used_to)[i].reinterpret<UInt64>();
825 }
826 }
827 else if (from[0].getType() == Field::Types::String && to[0].getType() == Field::Types::String)
828 {
829 table_string_to_string = std::make_unique<StringToString>();
830 auto & table = *table_string_to_string;
831 for (size_t i = 0; i < size; ++i)
832 {
833 const String & str_from = from[i].get<const String &>();
834 const String & str_to = to[i].get<const String &>();
835 StringRef ref_from{string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
836 StringRef ref_to{string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
837 table[ref_from] = ref_to;
838 }
839 }
840
841 initialized = true;
842 }
843};
844
845void registerFunctionTransform(FunctionFactory & factory)
846{
847 factory.registerFunction<FunctionTransform>();
848}
849
850}
851