1 | #include <mutex> |
2 | #include <Common/FieldVisitors.h> |
3 | #include <DataTypes/DataTypesNumber.h> |
4 | #include <DataTypes/DataTypeString.h> |
5 | #include <DataTypes/DataTypeArray.h> |
6 | #include <Columns/ColumnString.h> |
7 | #include <Columns/ColumnArray.h> |
8 | #include <Columns/ColumnConst.h> |
9 | #include <Columns/ColumnsNumber.h> |
10 | #include <Common/Arena.h> |
11 | #include <Common/HashTable/HashMap.h> |
12 | #include <Common/typeid_cast.h> |
13 | #include <common/StringRef.h> |
14 | #include <Functions/IFunctionImpl.h> |
15 | #include <Functions/FunctionHelpers.h> |
16 | #include <Functions/FunctionFactory.h> |
17 | #include <DataTypes/getLeastSupertype.h> |
18 | |
19 | |
20 | namespace DB |
21 | { |
22 | |
23 | namespace ErrorCodes |
24 | { |
25 | extern const int BAD_ARGUMENTS; |
26 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
27 | extern const int ILLEGAL_COLUMN; |
28 | } |
29 | |
30 | |
31 | /** transform(x, from_array, to_array[, default]) - convert x according to an explicitly passed match. |
32 | */ |
33 | |
34 | /** transform(x, [from...], [to...], default) |
35 | * - converts the values according to the explicitly specified mapping. |
36 | * |
37 | * x - what to transform. |
38 | * from - a constant array of values for the transformation. |
39 | * to - a constant array of values into which values from `from` must be transformed. |
40 | * default - what value to use if x is not equal to any of the values in `from`. |
41 | * `from` and `to` - arrays of the same size. |
42 | * |
43 | * Types: |
44 | * transform(T, Array(T), Array(U), U) -> U |
45 | * |
46 | * transform(x, [from...], [to...]) |
47 | * - if `default` is not specified, then for values of `x` for which there is no corresponding element in `from`, the unchanged value of `x` is returned. |
48 | * |
49 | * Types: |
50 | * transform(T, Array(T), Array(T)) -> T |
51 | * |
52 | * Note: the implementation is rather cumbersome. |
53 | */ |
54 | class FunctionTransform : public IFunction |
55 | { |
56 | public: |
57 | static constexpr auto name = "transform" ; |
58 | static FunctionPtr create(const Context &) { return std::make_shared<FunctionTransform>(); } |
59 | |
60 | String getName() const override |
61 | { |
62 | return name; |
63 | } |
64 | |
65 | bool isVariadic() const override { return true; } |
66 | size_t getNumberOfArguments() const override { return 0; } |
67 | bool useDefaultImplementationForConstants() const override { return true; } |
68 | ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } |
69 | |
70 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
71 | { |
72 | const auto args_size = arguments.size(); |
73 | if (args_size != 3 && args_size != 4) |
74 | throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed " + toString(args_size) + ", should be 3 or 4" , |
75 | ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; |
76 | |
77 | const DataTypePtr & type_x = arguments[0]; |
78 | |
79 | if (!type_x->isValueRepresentedByNumber() && !isString(type_x)) |
80 | throw Exception{"Unsupported type " + type_x->getName() |
81 | + " of first argument of function " + getName() |
82 | + ", must be numeric type or Date/DateTime or String" , ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; |
83 | |
84 | const DataTypeArray * type_arr_from = checkAndGetDataType<DataTypeArray>(arguments[1].get()); |
85 | |
86 | if (!type_arr_from) |
87 | throw Exception{"Second argument of function " + getName() |
88 | + ", must be array of source values to transform from." , ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; |
89 | |
90 | const auto type_arr_from_nested = type_arr_from->getNestedType(); |
91 | |
92 | if ((type_x->isValueRepresentedByNumber() != type_arr_from_nested->isValueRepresentedByNumber()) |
93 | || (isString(type_x) != isString(type_arr_from_nested))) |
94 | { |
95 | throw Exception{"First argument and elements of array of second argument of function " + getName() |
96 | + " must have compatible types: both numeric or both strings." , ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; |
97 | } |
98 | |
99 | const DataTypeArray * type_arr_to = checkAndGetDataType<DataTypeArray>(arguments[2].get()); |
100 | |
101 | if (!type_arr_to) |
102 | throw Exception{"Third argument of function " + getName() |
103 | + ", must be array of destination values to transform to." , ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; |
104 | |
105 | const DataTypePtr & type_arr_to_nested = type_arr_to->getNestedType(); |
106 | |
107 | if (args_size == 3) |
108 | { |
109 | if ((type_x->isValueRepresentedByNumber() != type_arr_to_nested->isValueRepresentedByNumber()) |
110 | || (isString(type_x) != isString(type_arr_to_nested))) |
111 | throw Exception{"Function " + getName() |
112 | + " has signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types." , |
113 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; |
114 | |
115 | return type_x; |
116 | } |
117 | else |
118 | { |
119 | const DataTypePtr & type_default = arguments[3]; |
120 | |
121 | if (!type_default->isValueRepresentedByNumber() && !isString(type_default)) |
122 | throw Exception{"Unsupported type " + type_default->getName() |
123 | + " of fourth argument (default value) of function " + getName() |
124 | + ", must be numeric type or Date/DateTime or String" , ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; |
125 | |
126 | bool default_is_string = WhichDataType(type_default).isString(); |
127 | bool nested_is_string = WhichDataType(type_arr_to_nested).isString(); |
128 | |
129 | if ((type_default->isValueRepresentedByNumber() != type_arr_to_nested->isValueRepresentedByNumber()) |
130 | || (default_is_string != nested_is_string)) |
131 | throw Exception{"Function " + getName() |
132 | + " have signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types." , |
133 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; |
134 | |
135 | if (type_arr_to_nested->isValueRepresentedByNumber() && type_default->isValueRepresentedByNumber()) |
136 | { |
137 | /// We take the smallest common type for the elements of the array of values `to` and for `default`. |
138 | return getLeastSupertype({type_arr_to_nested, type_default}); |
139 | } |
140 | |
141 | /// TODO More checks. |
142 | return type_arr_to_nested; |
143 | } |
144 | } |
145 | |
146 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override |
147 | { |
148 | const ColumnConst * array_from = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[1]).column.get()); |
149 | const ColumnConst * array_to = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[2]).column.get()); |
150 | |
151 | if (!array_from || !array_to) |
152 | throw Exception{"Second and third arguments of function " + getName() + " must be constant arrays." , ErrorCodes::ILLEGAL_COLUMN}; |
153 | |
154 | initialize(array_from->getValue<Array>(), array_to->getValue<Array>(), block, arguments); |
155 | |
156 | const auto in = block.getByPosition(arguments.front()).column.get(); |
157 | |
158 | if (isColumnConst(*in)) |
159 | { |
160 | executeConst(block, arguments, result, input_rows_count); |
161 | return; |
162 | } |
163 | |
164 | const IColumn * default_column = nullptr; |
165 | if (arguments.size() == 4) |
166 | default_column = block.getByPosition(arguments[3]).column.get(); |
167 | |
168 | auto column_result = block.getByPosition(result).type->createColumn(); |
169 | auto out = column_result.get(); |
170 | |
171 | if (!executeNum<UInt8>(in, out, default_column) |
172 | && !executeNum<UInt16>(in, out, default_column) |
173 | && !executeNum<UInt32>(in, out, default_column) |
174 | && !executeNum<UInt64>(in, out, default_column) |
175 | && !executeNum<Int8>(in, out, default_column) |
176 | && !executeNum<Int16>(in, out, default_column) |
177 | && !executeNum<Int32>(in, out, default_column) |
178 | && !executeNum<Int64>(in, out, default_column) |
179 | && !executeNum<Float32>(in, out, default_column) |
180 | && !executeNum<Float64>(in, out, default_column) |
181 | && !executeString(in, out, default_column)) |
182 | { |
183 | throw Exception{"Illegal column " + in->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; |
184 | } |
185 | |
186 | block.getByPosition(result).column = std::move(column_result); |
187 | } |
188 | |
189 | private: |
190 | void executeConst(Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) |
191 | { |
192 | /// Materialize the input column and compute the function as usual. |
193 | |
194 | Block tmp_block; |
195 | ColumnNumbers tmp_arguments; |
196 | |
197 | tmp_block.insert(block.getByPosition(arguments[0])); |
198 | tmp_block.getByPosition(0).column = tmp_block.getByPosition(0).column->cloneResized(input_rows_count)->convertToFullColumnIfConst(); |
199 | tmp_arguments.push_back(0); |
200 | |
201 | for (size_t i = 1; i < arguments.size(); ++i) |
202 | { |
203 | tmp_block.insert(block.getByPosition(arguments[i])); |
204 | tmp_arguments.push_back(i); |
205 | } |
206 | |
207 | auto impl = FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(std::make_shared<FunctionTransform>())) |
208 | .build(tmp_block.getColumnsWithTypeAndName()); |
209 | |
210 | tmp_block.insert(block.getByPosition(result)); |
211 | size_t tmp_result = arguments.size(); |
212 | |
213 | impl->execute(tmp_block, tmp_arguments, tmp_result, input_rows_count); |
214 | |
215 | block.getByPosition(result).column = tmp_block.getByPosition(tmp_result).column; |
216 | } |
217 | |
218 | template <typename T> |
219 | bool executeNum(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * default_untyped) |
220 | { |
221 | if (const auto in = checkAndGetColumn<ColumnVector<T>>(in_untyped)) |
222 | { |
223 | if (!default_untyped) |
224 | { |
225 | auto out = typeid_cast<ColumnVector<T> *>(out_untyped); |
226 | if (!out) |
227 | { |
228 | throw Exception{"Illegal column " + out_untyped->getName() + " of elements of array of third argument of function " + getName() |
229 | + ", must be " + in->getName(), ErrorCodes::ILLEGAL_COLUMN}; |
230 | } |
231 | |
232 | executeImplNumToNum<T>(in->getData(), out->getData()); |
233 | } |
234 | else if (isColumnConst(*default_untyped)) |
235 | { |
236 | if (!executeNumToNumWithConstDefault<T, UInt8>(in, out_untyped) |
237 | && !executeNumToNumWithConstDefault<T, UInt16>(in, out_untyped) |
238 | && !executeNumToNumWithConstDefault<T, UInt32>(in, out_untyped) |
239 | && !executeNumToNumWithConstDefault<T, UInt64>(in, out_untyped) |
240 | && !executeNumToNumWithConstDefault<T, Int8>(in, out_untyped) |
241 | && !executeNumToNumWithConstDefault<T, Int16>(in, out_untyped) |
242 | && !executeNumToNumWithConstDefault<T, Int32>(in, out_untyped) |
243 | && !executeNumToNumWithConstDefault<T, Int64>(in, out_untyped) |
244 | && !executeNumToNumWithConstDefault<T, Float32>(in, out_untyped) |
245 | && !executeNumToNumWithConstDefault<T, Float64>(in, out_untyped) |
246 | && !executeNumToStringWithConstDefault<T>(in, out_untyped)) |
247 | { |
248 | throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), |
249 | ErrorCodes::ILLEGAL_COLUMN}; |
250 | } |
251 | } |
252 | else |
253 | { |
254 | if (!executeNumToNumWithNonConstDefault<T, UInt8>(in, out_untyped, default_untyped) |
255 | && !executeNumToNumWithNonConstDefault<T, UInt16>(in, out_untyped, default_untyped) |
256 | && !executeNumToNumWithNonConstDefault<T, UInt32>(in, out_untyped, default_untyped) |
257 | && !executeNumToNumWithNonConstDefault<T, UInt64>(in, out_untyped, default_untyped) |
258 | && !executeNumToNumWithNonConstDefault<T, Int8>(in, out_untyped, default_untyped) |
259 | && !executeNumToNumWithNonConstDefault<T, Int16>(in, out_untyped, default_untyped) |
260 | && !executeNumToNumWithNonConstDefault<T, Int32>(in, out_untyped, default_untyped) |
261 | && !executeNumToNumWithNonConstDefault<T, Int64>(in, out_untyped, default_untyped) |
262 | && !executeNumToNumWithNonConstDefault<T, Float32>(in, out_untyped, default_untyped) |
263 | && !executeNumToNumWithNonConstDefault<T, Float64>(in, out_untyped, default_untyped) |
264 | && !executeNumToStringWithNonConstDefault<T>(in, out_untyped, default_untyped)) |
265 | { |
266 | throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), |
267 | ErrorCodes::ILLEGAL_COLUMN}; |
268 | } |
269 | } |
270 | |
271 | return true; |
272 | } |
273 | |
274 | return false; |
275 | } |
276 | |
277 | bool executeString(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * default_untyped) |
278 | { |
279 | if (const auto in = checkAndGetColumn<ColumnString>(in_untyped)) |
280 | { |
281 | if (!default_untyped) |
282 | { |
283 | if (!executeStringToString(in, out_untyped)) |
284 | throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), |
285 | ErrorCodes::ILLEGAL_COLUMN}; |
286 | } |
287 | else if (isColumnConst(*default_untyped)) |
288 | { |
289 | if (!executeStringToNumWithConstDefault<UInt8>(in, out_untyped) |
290 | && !executeStringToNumWithConstDefault<UInt16>(in, out_untyped) |
291 | && !executeStringToNumWithConstDefault<UInt32>(in, out_untyped) |
292 | && !executeStringToNumWithConstDefault<UInt64>(in, out_untyped) |
293 | && !executeStringToNumWithConstDefault<Int8>(in, out_untyped) |
294 | && !executeStringToNumWithConstDefault<Int16>(in, out_untyped) |
295 | && !executeStringToNumWithConstDefault<Int32>(in, out_untyped) |
296 | && !executeStringToNumWithConstDefault<Int64>(in, out_untyped) |
297 | && !executeStringToNumWithConstDefault<Float32>(in, out_untyped) |
298 | && !executeStringToNumWithConstDefault<Float64>(in, out_untyped) |
299 | && !executeStringToStringWithConstDefault(in, out_untyped)) |
300 | { |
301 | throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), |
302 | ErrorCodes::ILLEGAL_COLUMN}; |
303 | } |
304 | } |
305 | else |
306 | { |
307 | if (!executeStringToNumWithNonConstDefault<UInt8>(in, out_untyped, default_untyped) |
308 | && !executeStringToNumWithNonConstDefault<UInt16>(in, out_untyped, default_untyped) |
309 | && !executeStringToNumWithNonConstDefault<UInt32>(in, out_untyped, default_untyped) |
310 | && !executeStringToNumWithNonConstDefault<UInt64>(in, out_untyped, default_untyped) |
311 | && !executeStringToNumWithNonConstDefault<Int8>(in, out_untyped, default_untyped) |
312 | && !executeStringToNumWithNonConstDefault<Int16>(in, out_untyped, default_untyped) |
313 | && !executeStringToNumWithNonConstDefault<Int32>(in, out_untyped, default_untyped) |
314 | && !executeStringToNumWithNonConstDefault<Int64>(in, out_untyped, default_untyped) |
315 | && !executeStringToNumWithNonConstDefault<Float32>(in, out_untyped, default_untyped) |
316 | && !executeStringToNumWithNonConstDefault<Float64>(in, out_untyped, default_untyped) |
317 | && !executeStringToStringWithNonConstDefault(in, out_untyped, default_untyped)) |
318 | { |
319 | throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), |
320 | ErrorCodes::ILLEGAL_COLUMN}; |
321 | } |
322 | } |
323 | |
324 | return true; |
325 | } |
326 | |
327 | return false; |
328 | } |
329 | |
330 | template <typename T, typename U> |
331 | bool executeNumToNumWithConstDefault(const ColumnVector<T> * in, IColumn * out_untyped) |
332 | { |
333 | auto out = typeid_cast<ColumnVector<U> *>(out_untyped); |
334 | if (!out) |
335 | return false; |
336 | |
337 | executeImplNumToNumWithConstDefault<T, U>(in->getData(), out->getData(), const_default_value.get<U>()); |
338 | return true; |
339 | } |
340 | |
341 | template <typename T, typename U> |
342 | bool executeNumToNumWithNonConstDefault(const ColumnVector<T> * in, IColumn * out_untyped, const IColumn * default_untyped) |
343 | { |
344 | auto out = typeid_cast<ColumnVector<U> *>(out_untyped); |
345 | if (!out) |
346 | return false; |
347 | |
348 | if (!executeNumToNumWithNonConstDefault2<T, U, UInt8>(in, out, default_untyped) |
349 | && !executeNumToNumWithNonConstDefault2<T, U, UInt16>(in, out, default_untyped) |
350 | && !executeNumToNumWithNonConstDefault2<T, U, UInt32>(in, out, default_untyped) |
351 | && !executeNumToNumWithNonConstDefault2<T, U, UInt64>(in, out, default_untyped) |
352 | && !executeNumToNumWithNonConstDefault2<T, U, Int8>(in, out, default_untyped) |
353 | && !executeNumToNumWithNonConstDefault2<T, U, Int16>(in, out, default_untyped) |
354 | && !executeNumToNumWithNonConstDefault2<T, U, Int32>(in, out, default_untyped) |
355 | && !executeNumToNumWithNonConstDefault2<T, U, Int64>(in, out, default_untyped) |
356 | && !executeNumToNumWithNonConstDefault2<T, U, Float32>(in, out, default_untyped) |
357 | && !executeNumToNumWithNonConstDefault2<T, U, Float64>(in, out, default_untyped)) |
358 | { |
359 | throw Exception( |
360 | "Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), |
361 | ErrorCodes::ILLEGAL_COLUMN); |
362 | } |
363 | |
364 | return true; |
365 | } |
366 | |
367 | template <typename T, typename U, typename V> |
368 | bool executeNumToNumWithNonConstDefault2(const ColumnVector<T> * in, ColumnVector<U> * out, const IColumn * default_untyped) |
369 | { |
370 | auto col_default = checkAndGetColumn<ColumnVector<V>>(default_untyped); |
371 | if (!col_default) |
372 | return false; |
373 | |
374 | executeImplNumToNumWithNonConstDefault<T, U, V>(in->getData(), out->getData(), col_default->getData()); |
375 | return true; |
376 | } |
377 | |
378 | template <typename T> |
379 | bool executeNumToStringWithConstDefault(const ColumnVector<T> * in, IColumn * out_untyped) |
380 | { |
381 | auto out = typeid_cast<ColumnString *>(out_untyped); |
382 | if (!out) |
383 | return false; |
384 | |
385 | const String & default_str = const_default_value.get<const String &>(); |
386 | StringRef default_string_ref{default_str.data(), default_str.size() + 1}; |
387 | executeImplNumToStringWithConstDefault<T>(in->getData(), out->getChars(), out->getOffsets(), default_string_ref); |
388 | return true; |
389 | } |
390 | |
391 | template <typename T> |
392 | bool executeNumToStringWithNonConstDefault(const ColumnVector<T> * in, IColumn * out_untyped, const IColumn * default_untyped) |
393 | { |
394 | auto out = typeid_cast<ColumnString *>(out_untyped); |
395 | if (!out) |
396 | return false; |
397 | |
398 | auto default_col = checkAndGetColumn<ColumnString>(default_untyped); |
399 | if (!default_col) |
400 | { |
401 | throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), |
402 | ErrorCodes::ILLEGAL_COLUMN}; |
403 | } |
404 | |
405 | executeImplNumToStringWithNonConstDefault<T>( |
406 | in->getData(), |
407 | out->getChars(), out->getOffsets(), |
408 | default_col->getChars(), default_col->getOffsets()); |
409 | |
410 | return true; |
411 | } |
412 | |
413 | template <typename U> |
414 | bool executeStringToNumWithConstDefault(const ColumnString * in, IColumn * out_untyped) |
415 | { |
416 | auto out = typeid_cast<ColumnVector<U> *>(out_untyped); |
417 | if (!out) |
418 | return false; |
419 | |
420 | executeImplStringToNumWithConstDefault<U>(in->getChars(), in->getOffsets(), out->getData(), const_default_value.get<U>()); |
421 | return true; |
422 | } |
423 | |
424 | template <typename U> |
425 | bool executeStringToNumWithNonConstDefault(const ColumnString * in, IColumn * out_untyped, const IColumn * default_untyped) |
426 | { |
427 | auto out = typeid_cast<ColumnVector<U> *>(out_untyped); |
428 | if (!out) |
429 | return false; |
430 | |
431 | if (!executeStringToNumWithNonConstDefault2<U, UInt8>(in, out, default_untyped) |
432 | && !executeStringToNumWithNonConstDefault2<U, UInt16>(in, out, default_untyped) |
433 | && !executeStringToNumWithNonConstDefault2<U, UInt32>(in, out, default_untyped) |
434 | && !executeStringToNumWithNonConstDefault2<U, UInt64>(in, out, default_untyped) |
435 | && !executeStringToNumWithNonConstDefault2<U, Int8>(in, out, default_untyped) |
436 | && !executeStringToNumWithNonConstDefault2<U, Int16>(in, out, default_untyped) |
437 | && !executeStringToNumWithNonConstDefault2<U, Int32>(in, out, default_untyped) |
438 | && !executeStringToNumWithNonConstDefault2<U, Int64>(in, out, default_untyped) |
439 | && !executeStringToNumWithNonConstDefault2<U, Float32>(in, out, default_untyped) |
440 | && !executeStringToNumWithNonConstDefault2<U, Float64>(in, out, default_untyped)) |
441 | { |
442 | throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), |
443 | ErrorCodes::ILLEGAL_COLUMN}; |
444 | } |
445 | |
446 | return true; |
447 | } |
448 | |
449 | template <typename U, typename V> |
450 | bool executeStringToNumWithNonConstDefault2(const ColumnString * in, ColumnVector<U> * out, const IColumn * default_untyped) |
451 | { |
452 | auto col_default = checkAndGetColumn<ColumnVector<V>>(default_untyped); |
453 | if (!col_default) |
454 | return false; |
455 | |
456 | executeImplStringToNumWithNonConstDefault<U, V>(in->getChars(), in->getOffsets(), out->getData(), col_default->getData()); |
457 | return true; |
458 | } |
459 | |
460 | bool executeStringToString(const ColumnString * in, IColumn * out_untyped) |
461 | { |
462 | auto out = typeid_cast<ColumnString *>(out_untyped); |
463 | if (!out) |
464 | return false; |
465 | |
466 | executeImplStringToString(in->getChars(), in->getOffsets(), out->getChars(), out->getOffsets()); |
467 | return true; |
468 | } |
469 | |
470 | bool executeStringToStringWithConstDefault(const ColumnString * in, IColumn * out_untyped) |
471 | { |
472 | auto out = typeid_cast<ColumnString *>(out_untyped); |
473 | if (!out) |
474 | return false; |
475 | |
476 | const String & default_str = const_default_value.get<const String &>(); |
477 | StringRef default_string_ref{default_str.data(), default_str.size() + 1}; |
478 | executeImplStringToStringWithConstDefault(in->getChars(), in->getOffsets(), out->getChars(), out->getOffsets(), default_string_ref); |
479 | return true; |
480 | } |
481 | |
482 | bool executeStringToStringWithNonConstDefault(const ColumnString * in, IColumn * out_untyped, const IColumn * default_untyped) |
483 | { |
484 | auto out = typeid_cast<ColumnString *>(out_untyped); |
485 | if (!out) |
486 | return false; |
487 | |
488 | auto default_col = checkAndGetColumn<ColumnString>(default_untyped); |
489 | if (!default_col) |
490 | { |
491 | throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), |
492 | ErrorCodes::ILLEGAL_COLUMN}; |
493 | } |
494 | |
495 | executeImplStringToStringWithNonConstDefault( |
496 | in->getChars(), in->getOffsets(), |
497 | out->getChars(), out->getOffsets(), |
498 | default_col->getChars(), default_col->getOffsets()); |
499 | |
500 | return true; |
501 | } |
502 | |
503 | |
504 | template <typename T, typename U> |
505 | void executeImplNumToNumWithConstDefault(const PaddedPODArray<T> & src, PaddedPODArray<U> & dst, U dst_default) |
506 | { |
507 | const auto & table = *table_num_to_num; |
508 | size_t size = src.size(); |
509 | dst.resize(size); |
510 | for (size_t i = 0; i < size; ++i) |
511 | { |
512 | auto it = table.find(src[i]); |
513 | if (it) |
514 | memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian. |
515 | else |
516 | dst[i] = dst_default; |
517 | } |
518 | } |
519 | |
520 | template <typename T, typename U, typename V> |
521 | void executeImplNumToNumWithNonConstDefault(const PaddedPODArray<T> & src, PaddedPODArray<U> & dst, const PaddedPODArray<V> & dst_default) |
522 | { |
523 | const auto & table = *table_num_to_num; |
524 | size_t size = src.size(); |
525 | dst.resize(size); |
526 | for (size_t i = 0; i < size; ++i) |
527 | { |
528 | auto it = table.find(src[i]); |
529 | if (it) |
530 | memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian. |
531 | else |
532 | dst[i] = dst_default[i]; |
533 | } |
534 | } |
535 | |
536 | template <typename T> |
537 | void executeImplNumToNum(const PaddedPODArray<T> & src, PaddedPODArray<T> & dst) |
538 | { |
539 | const auto & table = *table_num_to_num; |
540 | size_t size = src.size(); |
541 | dst.resize(size); |
542 | for (size_t i = 0; i < size; ++i) |
543 | { |
544 | auto it = table.find(src[i]); |
545 | if (it) |
546 | memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); |
547 | else |
548 | dst[i] = src[i]; |
549 | } |
550 | } |
551 | |
552 | template <typename T> |
553 | void executeImplNumToStringWithConstDefault(const PaddedPODArray<T> & src, |
554 | ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, StringRef dst_default) |
555 | { |
556 | const auto & table = *table_num_to_string; |
557 | size_t size = src.size(); |
558 | dst_offsets.resize(size); |
559 | ColumnString::Offset current_dst_offset = 0; |
560 | for (size_t i = 0; i < size; ++i) |
561 | { |
562 | auto it = table.find(src[i]); |
563 | StringRef ref = it ? it->getMapped() : dst_default; |
564 | dst_data.resize(current_dst_offset + ref.size); |
565 | memcpy(&dst_data[current_dst_offset], ref.data, ref.size); |
566 | current_dst_offset += ref.size; |
567 | dst_offsets[i] = current_dst_offset; |
568 | } |
569 | } |
570 | |
571 | template <typename T> |
572 | void executeImplNumToStringWithNonConstDefault(const PaddedPODArray<T> & src, |
573 | ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, |
574 | const ColumnString::Chars & dst_default_data, const ColumnString::Offsets & dst_default_offsets) |
575 | { |
576 | const auto & table = *table_num_to_string; |
577 | size_t size = src.size(); |
578 | dst_offsets.resize(size); |
579 | ColumnString::Offset current_dst_offset = 0; |
580 | ColumnString::Offset current_dst_default_offset = 0; |
581 | for (size_t i = 0; i < size; ++i) |
582 | { |
583 | auto it = table.find(src[i]); |
584 | StringRef ref; |
585 | |
586 | if (it) |
587 | ref = it->getMapped(); |
588 | else |
589 | { |
590 | ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]); |
591 | ref.size = dst_default_offsets[i] - current_dst_default_offset; |
592 | } |
593 | |
594 | dst_data.resize(current_dst_offset + ref.size); |
595 | memcpy(&dst_data[current_dst_offset], ref.data, ref.size); |
596 | current_dst_offset += ref.size; |
597 | current_dst_default_offset = dst_default_offsets[i]; |
598 | dst_offsets[i] = current_dst_offset; |
599 | } |
600 | } |
601 | |
602 | template <typename U> |
603 | void executeImplStringToNumWithConstDefault( |
604 | const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets, |
605 | PaddedPODArray<U> & dst, U dst_default) |
606 | { |
607 | const auto & table = *table_string_to_num; |
608 | size_t size = src_offsets.size(); |
609 | dst.resize(size); |
610 | ColumnString::Offset current_src_offset = 0; |
611 | for (size_t i = 0; i < size; ++i) |
612 | { |
613 | StringRef ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset}; |
614 | current_src_offset = src_offsets[i]; |
615 | auto it = table.find(ref); |
616 | if (it) |
617 | memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); |
618 | else |
619 | dst[i] = dst_default; |
620 | } |
621 | } |
622 | |
623 | template <typename U, typename V> |
624 | void executeImplStringToNumWithNonConstDefault( |
625 | const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets, |
626 | PaddedPODArray<U> & dst, const PaddedPODArray<V> & dst_default) |
627 | { |
628 | const auto & table = *table_string_to_num; |
629 | size_t size = src_offsets.size(); |
630 | dst.resize(size); |
631 | ColumnString::Offset current_src_offset = 0; |
632 | for (size_t i = 0; i < size; ++i) |
633 | { |
634 | StringRef ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset}; |
635 | current_src_offset = src_offsets[i]; |
636 | auto it = table.find(ref); |
637 | if (it) |
638 | memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); |
639 | else |
640 | dst[i] = dst_default[i]; |
641 | } |
642 | } |
643 | |
644 | template <bool with_default> |
645 | void executeImplStringToStringWithOrWithoutConstDefault( |
646 | const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets, |
647 | ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, StringRef dst_default) |
648 | { |
649 | const auto & table = *table_string_to_string; |
650 | size_t size = src_offsets.size(); |
651 | dst_offsets.resize(size); |
652 | ColumnString::Offset current_src_offset = 0; |
653 | ColumnString::Offset current_dst_offset = 0; |
654 | for (size_t i = 0; i < size; ++i) |
655 | { |
656 | StringRef src_ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset}; |
657 | current_src_offset = src_offsets[i]; |
658 | |
659 | auto it = table.find(src_ref); |
660 | |
661 | StringRef dst_ref = it ? it->getMapped() : (with_default ? dst_default : src_ref); |
662 | dst_data.resize(current_dst_offset + dst_ref.size); |
663 | memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size); |
664 | current_dst_offset += dst_ref.size; |
665 | dst_offsets[i] = current_dst_offset; |
666 | } |
667 | } |
668 | |
669 | void executeImplStringToString( |
670 | const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets, |
671 | ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets) |
672 | { |
673 | executeImplStringToStringWithOrWithoutConstDefault<false>(src_data, src_offsets, dst_data, dst_offsets, {}); |
674 | } |
675 | |
676 | void executeImplStringToStringWithConstDefault( |
677 | const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets, |
678 | ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, StringRef dst_default) |
679 | { |
680 | executeImplStringToStringWithOrWithoutConstDefault<true>(src_data, src_offsets, dst_data, dst_offsets, dst_default); |
681 | } |
682 | |
683 | void executeImplStringToStringWithNonConstDefault( |
684 | const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets, |
685 | ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, |
686 | const ColumnString::Chars & dst_default_data, const ColumnString::Offsets & dst_default_offsets) |
687 | { |
688 | const auto & table = *table_string_to_string; |
689 | size_t size = src_offsets.size(); |
690 | dst_offsets.resize(size); |
691 | ColumnString::Offset current_src_offset = 0; |
692 | ColumnString::Offset current_dst_offset = 0; |
693 | ColumnString::Offset current_dst_default_offset = 0; |
694 | for (size_t i = 0; i < size; ++i) |
695 | { |
696 | StringRef src_ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset}; |
697 | current_src_offset = src_offsets[i]; |
698 | |
699 | auto it = table.find(src_ref); |
700 | StringRef dst_ref; |
701 | |
702 | if (it) |
703 | dst_ref = it->getMapped(); |
704 | else |
705 | { |
706 | dst_ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]); |
707 | dst_ref.size = dst_default_offsets[i] - current_dst_default_offset; |
708 | } |
709 | |
710 | dst_data.resize(current_dst_offset + dst_ref.size); |
711 | memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size); |
712 | current_dst_offset += dst_ref.size; |
713 | current_dst_default_offset = dst_default_offsets[i]; |
714 | dst_offsets[i] = current_dst_offset; |
715 | } |
716 | } |
717 | |
718 | |
719 | /// Different versions of the hash tables to implement the mapping. |
720 | |
721 | using NumToNum = HashMap<UInt64, UInt64, HashCRC32<UInt64>>; |
722 | using NumToString = HashMap <UInt64, StringRef, HashCRC32<UInt64>>; /// Everywhere StringRef's with trailing zero. |
723 | using StringToNum = HashMap<StringRef, UInt64, StringRefHash>; |
724 | using StringToString = HashMap<StringRef, StringRef, StringRefHash>; |
725 | |
726 | std::unique_ptr<NumToNum> table_num_to_num; |
727 | std::unique_ptr<NumToString> table_num_to_string; |
728 | std::unique_ptr<StringToNum> table_string_to_num; |
729 | std::unique_ptr<StringToString> table_string_to_string; |
730 | |
731 | Arena string_pool; |
732 | |
733 | Field const_default_value; /// Null, if not specified. |
734 | |
735 | std::atomic<bool> initialized {false}; |
736 | std::mutex mutex; |
737 | |
738 | /// Can be called from different threads. It works only on the first call. |
739 | void initialize(const Array & from, const Array & to, Block & block, const ColumnNumbers & arguments) |
740 | { |
741 | if (initialized) |
742 | return; |
743 | |
744 | const size_t size = from.size(); |
745 | if (0 == size) |
746 | throw Exception{"Empty arrays are illegal in function " + getName(), ErrorCodes::BAD_ARGUMENTS}; |
747 | |
748 | std::lock_guard lock(mutex); |
749 | |
750 | if (initialized) |
751 | return; |
752 | |
753 | if (size != to.size()) |
754 | throw Exception{"Second and third arguments of function " + getName() + " must be arrays of same size" , ErrorCodes::BAD_ARGUMENTS}; |
755 | |
756 | Array converted_to; |
757 | const Array * used_to = &to; |
758 | |
759 | /// Whether the default value is set. |
760 | |
761 | if (arguments.size() == 4) |
762 | { |
763 | const IColumn * default_col = block.getByPosition(arguments[3]).column.get(); |
764 | const ColumnConst * const_default_col = typeid_cast<const ColumnConst *>(default_col); |
765 | |
766 | if (const_default_col) |
767 | const_default_value = (*const_default_col)[0]; |
768 | |
769 | /// Do we need to convert the elements `to` and `default_value` to the smallest common type that is Float64? |
770 | bool default_col_is_float = |
771 | checkColumn<ColumnFloat32>(default_col) |
772 | || checkColumn<ColumnFloat64>(default_col) |
773 | || checkColumnConst<ColumnFloat32>(default_col) |
774 | || checkColumnConst<ColumnFloat64>(default_col); |
775 | |
776 | bool to_is_float = to[0].getType() == Field::Types::Float64; |
777 | |
778 | if (default_col_is_float && !to_is_float) |
779 | { |
780 | converted_to.resize(size); |
781 | for (size_t i = 0; i < size; ++i) |
782 | converted_to[i] = applyVisitor(FieldVisitorConvertToNumber<Float64>(), to[i]); |
783 | used_to = &converted_to; |
784 | } |
785 | else if (!default_col_is_float && to_is_float) |
786 | { |
787 | if (const_default_col) |
788 | const_default_value = applyVisitor(FieldVisitorConvertToNumber<Float64>(), const_default_value); |
789 | } |
790 | } |
791 | |
792 | /// Note: Doesn't check the duplicates in the `from` array. |
793 | |
794 | if (from[0].getType() != Field::Types::String && to[0].getType() != Field::Types::String) |
795 | { |
796 | table_num_to_num = std::make_unique<NumToNum>(); |
797 | auto & table = *table_num_to_num; |
798 | for (size_t i = 0; i < size; ++i) |
799 | { |
800 | // Field may be of Float type, but for the purpose of bitwise |
801 | // equality we can treat them as UInt64, hence the reinterpret(). |
802 | table[from[i].reinterpret<UInt64>()] = (*used_to)[i].reinterpret<UInt64>(); |
803 | } |
804 | } |
805 | else if (from[0].getType() != Field::Types::String && to[0].getType() == Field::Types::String) |
806 | { |
807 | table_num_to_string = std::make_unique<NumToString>(); |
808 | auto & table = *table_num_to_string; |
809 | for (size_t i = 0; i < size; ++i) |
810 | { |
811 | const String & str_to = to[i].get<const String &>(); |
812 | StringRef ref{string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1}; |
813 | table[from[i].reinterpret<UInt64>()] = ref; |
814 | } |
815 | } |
816 | else if (from[0].getType() == Field::Types::String && to[0].getType() != Field::Types::String) |
817 | { |
818 | table_string_to_num = std::make_unique<StringToNum>(); |
819 | auto & table = *table_string_to_num; |
820 | for (size_t i = 0; i < size; ++i) |
821 | { |
822 | const String & str_from = from[i].get<const String &>(); |
823 | StringRef ref{string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1}; |
824 | table[ref] = (*used_to)[i].reinterpret<UInt64>(); |
825 | } |
826 | } |
827 | else if (from[0].getType() == Field::Types::String && to[0].getType() == Field::Types::String) |
828 | { |
829 | table_string_to_string = std::make_unique<StringToString>(); |
830 | auto & table = *table_string_to_string; |
831 | for (size_t i = 0; i < size; ++i) |
832 | { |
833 | const String & str_from = from[i].get<const String &>(); |
834 | const String & str_to = to[i].get<const String &>(); |
835 | StringRef ref_from{string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1}; |
836 | StringRef ref_to{string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1}; |
837 | table[ref_from] = ref_to; |
838 | } |
839 | } |
840 | |
841 | initialized = true; |
842 | } |
843 | }; |
844 | |
845 | void registerFunctionTransform(FunctionFactory & factory) |
846 | { |
847 | factory.registerFunction<FunctionTransform>(); |
848 | } |
849 | |
850 | } |
851 | |