1#pragma once
2
3#include <Common/hex.h>
4#include <Common/formatIPv6.h>
5#include <Common/typeid_cast.h>
6#include <IO/WriteHelpers.h>
7#include <DataTypes/DataTypeFactory.h>
8#include <DataTypes/DataTypesNumber.h>
9#include <DataTypes/DataTypeString.h>
10#include <DataTypes/DataTypeFixedString.h>
11#include <DataTypes/DataTypeArray.h>
12#include <DataTypes/DataTypeDate.h>
13#include <DataTypes/DataTypeDateTime.h>
14#include <DataTypes/DataTypeUUID.h>
15#include <DataTypes/DataTypeTuple.h>
16#include <Columns/ColumnsNumber.h>
17#include <Columns/ColumnString.h>
18#include <Columns/ColumnFixedString.h>
19#include <Columns/ColumnArray.h>
20#include <Columns/ColumnConst.h>
21#include <Columns/ColumnTuple.h>
22#include <Columns/ColumnDecimal.h>
23#include <Functions/IFunctionImpl.h>
24#include <Functions/FunctionHelpers.h>
25
26#include <arpa/inet.h>
27#include <ext/range.h>
28#include <type_traits>
29#include <array>
30
31
32namespace DB
33{
34
35namespace ErrorCodes
36{
37 extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
38 extern const int LOGICAL_ERROR;
39 extern const int ILLEGAL_COLUMN;
40}
41
42
43/** TODO This file contains ridiculous amount of copy-paste.
44 */
45
46/** Encoding functions:
47 *
48 * IPv4NumToString (num) - See below.
49 * IPv4StringToNum(string) - Convert, for example, '192.168.0.1' to 3232235521 and vice versa.
50 *
51 * hex(x) - Returns hex; capital letters; there are no prefixes 0x or suffixes h.
52 * For numbers, returns a variable-length string - hex in the "human" (big endian) format, with the leading zeros being cut,
53 * but only by whole bytes. For dates and datetimes - the same as for numbers.
54 * For example, hex(257) = '0101'.
55 * unhex(string) - Returns a string, hex of which is equal to `string` with regard of case and discarding one leading zero.
56 * If such a string does not exist, could return arbitrary implementation specific value.
57 *
58 * bitmaskToArray(x) - Returns an array of powers of two in the binary form of x. For example, bitmaskToArray(50) = [2, 16, 32].
59 */
60
61
62constexpr size_t uuid_bytes_length = 16;
63constexpr size_t uuid_text_length = 36;
64
65
66class FunctionIPv6NumToString : public IFunction
67{
68public:
69 static constexpr auto name = "IPv6NumToString";
70 static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv6NumToString>(); }
71
72 String getName() const override { return name; }
73
74 size_t getNumberOfArguments() const override { return 1; }
75 bool isInjective(const Block &) override { return true; }
76
77 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
78 {
79 const auto ptr = checkAndGetDataType<DataTypeFixedString>(arguments[0].get());
80 if (!ptr || ptr->getN() != IPV6_BINARY_LENGTH)
81 throw Exception("Illegal type " + arguments[0]->getName() +
82 " of argument of function " + getName() +
83 ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")",
84 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
85
86 return std::make_shared<DataTypeString>();
87 }
88
89 bool useDefaultImplementationForConstants() const override { return true; }
90
91 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
92 {
93 const auto & col_type_name = block.getByPosition(arguments[0]);
94 const ColumnPtr & column = col_type_name.column;
95
96 if (const auto col_in = checkAndGetColumn<ColumnFixedString>(column.get()))
97 {
98 if (col_in->getN() != IPV6_BINARY_LENGTH)
99 throw Exception("Illegal type " + col_type_name.type->getName() +
100 " of column " + col_in->getName() +
101 " argument of function " + getName() +
102 ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")",
103 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
104
105 const auto size = col_in->size();
106 const auto & vec_in = col_in->getChars();
107
108 auto col_res = ColumnString::create();
109
110 ColumnString::Chars & vec_res = col_res->getChars();
111 ColumnString::Offsets & offsets_res = col_res->getOffsets();
112 vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1));
113 offsets_res.resize(size);
114
115 auto begin = reinterpret_cast<char *>(vec_res.data());
116 auto pos = begin;
117
118 for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += IPV6_BINARY_LENGTH, ++i)
119 {
120 formatIPv6(&vec_in[offset], pos);
121 offsets_res[i] = pos - begin;
122 }
123
124 vec_res.resize(pos - begin);
125
126 block.getByPosition(result).column = std::move(col_res);
127 }
128 else
129 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
130 + " of argument of function " + getName(),
131 ErrorCodes::ILLEGAL_COLUMN);
132 }
133};
134
135
136class FunctionCutIPv6 : public IFunction
137{
138public:
139 static constexpr auto name = "cutIPv6";
140 static FunctionPtr create(const Context &) { return std::make_shared<FunctionCutIPv6>(); }
141
142 String getName() const override { return name; }
143
144 size_t getNumberOfArguments() const override { return 3; }
145
146 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
147 {
148 const auto ptr = checkAndGetDataType<DataTypeFixedString>(arguments[0].get());
149 if (!ptr || ptr->getN() != IPV6_BINARY_LENGTH)
150 throw Exception("Illegal type " + arguments[0]->getName() +
151 " of argument 1 of function " + getName() +
152 ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")",
153 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
154
155 if (!WhichDataType(arguments[1]).isUInt8())
156 throw Exception("Illegal type " + arguments[1]->getName() +
157 " of argument 2 of function " + getName(),
158 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
159
160 if (!WhichDataType(arguments[2]).isUInt8())
161 throw Exception("Illegal type " + arguments[2]->getName() +
162 " of argument 3 of function " + getName(),
163 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
164
165 return std::make_shared<DataTypeString>();
166 }
167
168 bool useDefaultImplementationForConstants() const override { return true; }
169 ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
170
171 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
172 {
173 const auto & col_type_name = block.getByPosition(arguments[0]);
174 const ColumnPtr & column = col_type_name.column;
175
176 const auto & col_ipv6_zeroed_tail_bytes_type = block.getByPosition(arguments[1]);
177 const auto & col_ipv6_zeroed_tail_bytes = col_ipv6_zeroed_tail_bytes_type.column;
178 const auto & col_ipv4_zeroed_tail_bytes_type = block.getByPosition(arguments[2]);
179 const auto & col_ipv4_zeroed_tail_bytes = col_ipv4_zeroed_tail_bytes_type.column;
180
181 if (const auto col_in = checkAndGetColumn<ColumnFixedString>(column.get()))
182 {
183 if (col_in->getN() != IPV6_BINARY_LENGTH)
184 throw Exception("Illegal type " + col_type_name.type->getName() +
185 " of column " + col_in->getName() +
186 " argument of function " + getName() +
187 ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")",
188 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
189
190 const auto ipv6_zeroed_tail_bytes = checkAndGetColumnConst<ColumnVector<UInt8>>(col_ipv6_zeroed_tail_bytes.get());
191 if (!ipv6_zeroed_tail_bytes)
192 throw Exception("Illegal type " + col_ipv6_zeroed_tail_bytes_type.type->getName() +
193 " of argument 2 of function " + getName(),
194 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
195
196 UInt8 ipv6_zeroed_tail_bytes_count = ipv6_zeroed_tail_bytes->getValue<UInt8>();
197 if (ipv6_zeroed_tail_bytes_count > IPV6_BINARY_LENGTH)
198 throw Exception("Illegal value for argument 2 " + col_ipv6_zeroed_tail_bytes_type.type->getName() +
199 " of function " + getName(),
200 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
201
202 const auto ipv4_zeroed_tail_bytes = checkAndGetColumnConst<ColumnVector<UInt8>>(col_ipv4_zeroed_tail_bytes.get());
203 if (!ipv4_zeroed_tail_bytes)
204 throw Exception("Illegal type " + col_ipv4_zeroed_tail_bytes_type.type->getName() +
205 " of argument 3 of function " + getName(),
206 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
207
208 UInt8 ipv4_zeroed_tail_bytes_count = ipv4_zeroed_tail_bytes->getValue<UInt8>();
209 if (ipv4_zeroed_tail_bytes_count > IPV6_BINARY_LENGTH)
210 throw Exception("Illegal value for argument 3 " + col_ipv4_zeroed_tail_bytes_type.type->getName() +
211 " of function " + getName(),
212 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
213
214 const auto size = col_in->size();
215 const auto & vec_in = col_in->getChars();
216
217 auto col_res = ColumnString::create();
218
219 ColumnString::Chars & vec_res = col_res->getChars();
220 ColumnString::Offsets & offsets_res = col_res->getOffsets();
221 vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1));
222 offsets_res.resize(size);
223
224 auto begin = reinterpret_cast<char *>(vec_res.data());
225 auto pos = begin;
226
227 for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += IPV6_BINARY_LENGTH, ++i)
228 {
229 const auto address = &vec_in[offset];
230 UInt8 zeroed_tail_bytes_count = isIPv4Mapped(address) ? ipv4_zeroed_tail_bytes_count : ipv6_zeroed_tail_bytes_count;
231 cutAddress(address, pos, zeroed_tail_bytes_count);
232 offsets_res[i] = pos - begin;
233 }
234
235 vec_res.resize(pos - begin);
236
237 block.getByPosition(result).column = std::move(col_res);
238 }
239 else
240 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
241 + " of argument of function " + getName(),
242 ErrorCodes::ILLEGAL_COLUMN);
243 }
244
245private:
246 bool isIPv4Mapped(const unsigned char * address) const
247 {
248 return (*reinterpret_cast<const UInt64 *>(address) == 0) &&
249 ((*reinterpret_cast<const UInt64 *>(address + 8) & 0x00000000FFFFFFFFull) == 0x00000000FFFF0000ull);
250 }
251
252 void cutAddress(const unsigned char * address, char *& dst, UInt8 zeroed_tail_bytes_count)
253 {
254 formatIPv6(address, dst, zeroed_tail_bytes_count);
255 }
256};
257
258
259class FunctionIPv6StringToNum : public IFunction
260{
261public:
262 static constexpr auto name = "IPv6StringToNum";
263 static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv6StringToNum>(); }
264
265 String getName() const override { return name; }
266
267 size_t getNumberOfArguments() const override { return 1; }
268
269 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
270 {
271 if (!isString(arguments[0]))
272 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
273 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
274
275 return std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH);
276 }
277
278 bool useDefaultImplementationForConstants() const override { return true; }
279
280 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
281 {
282 const ColumnPtr & column = block.getByPosition(arguments[0]).column;
283
284 if (const auto col_in = checkAndGetColumn<ColumnString>(column.get()))
285 {
286 auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH);
287
288 auto & vec_res = col_res->getChars();
289 vec_res.resize(col_in->size() * IPV6_BINARY_LENGTH);
290
291 const ColumnString::Chars & vec_src = col_in->getChars();
292 const ColumnString::Offsets & offsets_src = col_in->getOffsets();
293 size_t src_offset = 0;
294
295 for (size_t out_offset = 0, i = 0;
296 out_offset < vec_res.size();
297 out_offset += IPV6_BINARY_LENGTH, ++i)
298 {
299 //TODO(nemkov): handle failure ?
300 parseIPv6(reinterpret_cast<const char *>(&vec_src[src_offset]), &vec_res[out_offset]);
301 src_offset = offsets_src[i];
302 }
303
304 block.getByPosition(result).column = std::move(col_res);
305 }
306 else
307 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
308 + " of argument of function " + getName(),
309 ErrorCodes::ILLEGAL_COLUMN);
310 }
311};
312
313
314/** If mask_tail_octets > 0, the last specified number of octets will be filled with "xxx".
315 */
316template <size_t mask_tail_octets, typename Name>
317class FunctionIPv4NumToString : public IFunction
318{
319public:
320 static constexpr auto name = Name::name;
321 static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv4NumToString<mask_tail_octets, Name>>(); }
322
323 String getName() const override
324 {
325 return name;
326 }
327
328 size_t getNumberOfArguments() const override { return 1; }
329 bool isInjective(const Block &) override { return mask_tail_octets == 0; }
330
331 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
332 {
333 if (!WhichDataType(arguments[0]).isUInt32())
334 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName() + ", expected UInt32",
335 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
336
337 return std::make_shared<DataTypeString>();
338 }
339
340 bool useDefaultImplementationForConstants() const override { return true; }
341
342 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
343 {
344 const ColumnPtr & column = block.getByPosition(arguments[0]).column;
345
346 if (const ColumnUInt32 * col = typeid_cast<const ColumnUInt32 *>(column.get()))
347 {
348 const ColumnUInt32::Container & vec_in = col->getData();
349
350 auto col_res = ColumnString::create();
351
352 ColumnString::Chars & vec_res = col_res->getChars();
353 ColumnString::Offsets & offsets_res = col_res->getOffsets();
354
355 vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
356 offsets_res.resize(vec_in.size());
357 char * begin = reinterpret_cast<char *>(vec_res.data());
358 char * pos = begin;
359
360 for (size_t i = 0; i < vec_in.size(); ++i)
361 {
362 DB::formatIPv4(reinterpret_cast<const unsigned char*>(&vec_in[i]), pos, mask_tail_octets, "xxx");
363 offsets_res[i] = pos - begin;
364 }
365
366 vec_res.resize(pos - begin);
367
368 block.getByPosition(result).column = std::move(col_res);
369 }
370 else
371 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
372 + " of argument of function " + getName(),
373 ErrorCodes::ILLEGAL_COLUMN);
374 }
375};
376
377
378class FunctionIPv4StringToNum : public IFunction
379{
380public:
381 static constexpr auto name = "IPv4StringToNum";
382 static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv4StringToNum>(); }
383
384 String getName() const override
385 {
386 return name;
387 }
388
389 size_t getNumberOfArguments() const override { return 1; }
390
391 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
392 {
393 if (!isString(arguments[0]))
394 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
395 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
396
397 return std::make_shared<DataTypeUInt32>();
398 }
399
400 static inline UInt32 parseIPv4(const char * pos)
401 {
402 UInt32 result = 0;
403 DB::parseIPv4(pos, reinterpret_cast<unsigned char*>(&result));
404
405 return result;
406 }
407
408 bool useDefaultImplementationForConstants() const override { return true; }
409
410 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
411 {
412 const ColumnPtr & column = block.getByPosition(arguments[0]).column;
413
414 if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
415 {
416 auto col_res = ColumnUInt32::create();
417
418 ColumnUInt32::Container & vec_res = col_res->getData();
419 vec_res.resize(col->size());
420
421 const ColumnString::Chars & vec_src = col->getChars();
422 const ColumnString::Offsets & offsets_src = col->getOffsets();
423 size_t prev_offset = 0;
424
425 for (size_t i = 0; i < vec_res.size(); ++i)
426 {
427 vec_res[i] = parseIPv4(reinterpret_cast<const char *>(&vec_src[prev_offset]));
428 prev_offset = offsets_src[i];
429 }
430
431 block.getByPosition(result).column = std::move(col_res);
432 }
433 else
434 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
435 + " of argument of function " + getName(),
436 ErrorCodes::ILLEGAL_COLUMN);
437 }
438};
439
440
441class FunctionIPv4ToIPv6 : public IFunction
442{
443public:
444 static constexpr auto name = "IPv4ToIPv6";
445 static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv4ToIPv6>(); }
446
447 String getName() const override { return name; }
448
449 size_t getNumberOfArguments() const override { return 1; }
450 bool isInjective(const Block &) override { return true; }
451
452 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
453 {
454 if (!checkAndGetDataType<DataTypeUInt32>(arguments[0].get()))
455 throw Exception("Illegal type " + arguments[0]->getName() +
456 " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
457
458 return std::make_shared<DataTypeFixedString>(16);
459 }
460
461 bool useDefaultImplementationForConstants() const override { return true; }
462
463 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
464 {
465 const auto & col_type_name = block.getByPosition(arguments[0]);
466 const ColumnPtr & column = col_type_name.column;
467
468 if (const auto col_in = typeid_cast<const ColumnUInt32 *>(column.get()))
469 {
470 auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH);
471
472 auto & vec_res = col_res->getChars();
473 vec_res.resize(col_in->size() * IPV6_BINARY_LENGTH);
474
475 const auto & vec_in = col_in->getData();
476
477 for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i)
478 mapIPv4ToIPv6(vec_in[i], &vec_res[out_offset]);
479
480 block.getByPosition(result).column = std::move(col_res);
481 }
482 else
483 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
484 + " of argument of function " + getName(),
485 ErrorCodes::ILLEGAL_COLUMN);
486 }
487
488private:
489 void mapIPv4ToIPv6(UInt32 in, unsigned char * buf) const
490 {
491 *reinterpret_cast<UInt64 *>(buf) = 0;
492 *reinterpret_cast<UInt64 *>(buf + 8) = 0x00000000FFFF0000ull | (static_cast<UInt64>(ntohl(in)) << 32);
493 }
494};
495
496class FunctionToIPv4 : public FunctionIPv4StringToNum
497{
498public:
499 static constexpr auto name = "toIPv4";
500 static FunctionPtr create(const Context &) { return std::make_shared<FunctionToIPv4>(); }
501
502 String getName() const override
503 {
504 return name;
505 }
506
507 size_t getNumberOfArguments() const override { return 1; }
508
509 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
510 {
511 if (!isString(arguments[0]))
512 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
513 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
514
515 return DataTypeFactory::instance().get("IPv4");
516 }
517};
518
519class FunctionToIPv6 : public FunctionIPv6StringToNum
520{
521public:
522 static constexpr auto name = "toIPv6";
523 static FunctionPtr create(const Context &) { return std::make_shared<FunctionToIPv6>(); }
524
525 String getName() const override { return name; }
526
527 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
528 {
529 if (!isString(arguments[0]))
530 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
531 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
532
533 return DataTypeFactory::instance().get("IPv6");
534 }
535};
536
537class FunctionMACNumToString : public IFunction
538{
539public:
540 static constexpr auto name = "MACNumToString";
541 static FunctionPtr create(const Context &) { return std::make_shared<FunctionMACNumToString>(); }
542
543 String getName() const override
544 {
545 return name;
546 }
547
548 size_t getNumberOfArguments() const override { return 1; }
549 bool isInjective(const Block &) override { return true; }
550
551 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
552 {
553 if (!WhichDataType(arguments[0]).isUInt64())
554 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName() + ", expected UInt64",
555 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
556
557 return std::make_shared<DataTypeString>();
558 }
559
560 static void formatMAC(UInt64 mac, unsigned char * out)
561 {
562 /// MAC address is represented in UInt64 in natural order (so, MAC addresses are compared in same order as UInt64).
563 /// Higher two bytes in UInt64 are just ignored.
564
565 writeHexByteUppercase(mac >> 40, &out[0]);
566 out[2] = ':';
567 writeHexByteUppercase(mac >> 32, &out[3]);
568 out[5] = ':';
569 writeHexByteUppercase(mac >> 24, &out[6]);
570 out[8] = ':';
571 writeHexByteUppercase(mac >> 16, &out[9]);
572 out[11] = ':';
573 writeHexByteUppercase(mac >> 8, &out[12]);
574 out[14] = ':';
575 writeHexByteUppercase(mac, &out[15]);
576 out[17] = '\0';
577 }
578
579 bool useDefaultImplementationForConstants() const override { return true; }
580
581 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
582 {
583 const ColumnPtr & column = block.getByPosition(arguments[0]).column;
584
585 if (const ColumnUInt64 * col = typeid_cast<const ColumnUInt64 *>(column.get()))
586 {
587 const ColumnUInt64::Container & vec_in = col->getData();
588
589 auto col_res = ColumnString::create();
590
591 ColumnString::Chars & vec_res = col_res->getChars();
592 ColumnString::Offsets & offsets_res = col_res->getOffsets();
593
594 vec_res.resize(vec_in.size() * 18); /// the value is: xx:xx:xx:xx:xx:xx\0
595 offsets_res.resize(vec_in.size());
596
597 size_t current_offset = 0;
598 for (size_t i = 0; i < vec_in.size(); ++i)
599 {
600 formatMAC(vec_in[i], &vec_res[current_offset]);
601 current_offset += 18;
602 offsets_res[i] = current_offset;
603 }
604
605 block.getByPosition(result).column = std::move(col_res);
606 }
607 else
608 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
609 + " of argument of function " + getName(),
610 ErrorCodes::ILLEGAL_COLUMN);
611 }
612};
613
614
615struct ParseMACImpl
616{
617 static constexpr size_t min_string_size = 17;
618 static constexpr size_t max_string_size = 17;
619
620 /** Example: 01:02:03:04:05:06.
621 * There could be any separators instead of : and them are just ignored.
622 * The order of resulting integers are correspond to the order of MAC address.
623 * If there are any chars other than valid hex digits for bytes, the behaviour is implementation specific.
624 */
625 static UInt64 parse(const char * pos)
626 {
627 return (UInt64(unhex(pos[0])) << 44)
628 | (UInt64(unhex(pos[1])) << 40)
629 | (UInt64(unhex(pos[3])) << 36)
630 | (UInt64(unhex(pos[4])) << 32)
631 | (UInt64(unhex(pos[6])) << 28)
632 | (UInt64(unhex(pos[7])) << 24)
633 | (UInt64(unhex(pos[9])) << 20)
634 | (UInt64(unhex(pos[10])) << 16)
635 | (UInt64(unhex(pos[12])) << 12)
636 | (UInt64(unhex(pos[13])) << 8)
637 | (UInt64(unhex(pos[15])) << 4)
638 | (UInt64(unhex(pos[16])));
639 }
640
641 static constexpr auto name = "MACStringToNum";
642};
643
644struct ParseOUIImpl
645{
646 static constexpr size_t min_string_size = 8;
647 static constexpr size_t max_string_size = 17;
648
649 /** OUI is the first three bytes of MAC address.
650 * Example: 01:02:03.
651 */
652 static UInt64 parse(const char * pos)
653 {
654 return (UInt64(unhex(pos[0])) << 20)
655 | (UInt64(unhex(pos[1])) << 16)
656 | (UInt64(unhex(pos[3])) << 12)
657 | (UInt64(unhex(pos[4])) << 8)
658 | (UInt64(unhex(pos[6])) << 4)
659 | (UInt64(unhex(pos[7])));
660 }
661
662 static constexpr auto name = "MACStringToOUI";
663};
664
665
666template <typename Impl>
667class FunctionMACStringTo : public IFunction
668{
669public:
670 static constexpr auto name = Impl::name;
671 static FunctionPtr create(const Context &) { return std::make_shared<FunctionMACStringTo<Impl>>(); }
672
673 String getName() const override
674 {
675 return name;
676 }
677
678 size_t getNumberOfArguments() const override { return 1; }
679
680 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
681 {
682 if (!isString(arguments[0]))
683 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
684 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
685
686 return std::make_shared<DataTypeUInt64>();
687 }
688
689 bool useDefaultImplementationForConstants() const override { return true; }
690
691 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
692 {
693 const ColumnPtr & column = block.getByPosition(arguments[0]).column;
694
695 if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
696 {
697 auto col_res = ColumnUInt64::create();
698
699 ColumnUInt64::Container & vec_res = col_res->getData();
700 vec_res.resize(col->size());
701
702 const ColumnString::Chars & vec_src = col->getChars();
703 const ColumnString::Offsets & offsets_src = col->getOffsets();
704 size_t prev_offset = 0;
705
706 for (size_t i = 0; i < vec_res.size(); ++i)
707 {
708 size_t current_offset = offsets_src[i];
709 size_t string_size = current_offset - prev_offset - 1; /// mind the terminating zero byte
710
711 if (string_size >= Impl::min_string_size && string_size <= Impl::max_string_size)
712 vec_res[i] = Impl::parse(reinterpret_cast<const char *>(&vec_src[prev_offset]));
713 else
714 vec_res[i] = 0;
715
716 prev_offset = current_offset;
717 }
718
719 block.getByPosition(result).column = std::move(col_res);
720 }
721 else
722 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
723 + " of argument of function " + getName(),
724 ErrorCodes::ILLEGAL_COLUMN);
725 }
726};
727
728
729class FunctionUUIDNumToString : public IFunction
730{
731
732public:
733 static constexpr auto name = "UUIDNumToString";
734 static FunctionPtr create(const Context &) { return std::make_shared<FunctionUUIDNumToString>(); }
735
736 String getName() const override
737 {
738 return name;
739 }
740
741 size_t getNumberOfArguments() const override { return 1; }
742 bool isInjective(const Block &) override { return true; }
743
744 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
745 {
746 const auto ptr = checkAndGetDataType<DataTypeFixedString>(arguments[0].get());
747 if (!ptr || ptr->getN() != uuid_bytes_length)
748 throw Exception("Illegal type " + arguments[0]->getName() +
749 " of argument of function " + getName() +
750 ", expected FixedString(" + toString(uuid_bytes_length) + ")",
751 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
752
753 return std::make_shared<DataTypeString>();
754 }
755
756 bool useDefaultImplementationForConstants() const override { return true; }
757
758 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
759 {
760 const ColumnWithTypeAndName & col_type_name = block.getByPosition(arguments[0]);
761 const ColumnPtr & column = col_type_name.column;
762
763 if (const auto col_in = checkAndGetColumn<ColumnFixedString>(column.get()))
764 {
765 if (col_in->getN() != uuid_bytes_length)
766 throw Exception("Illegal type " + col_type_name.type->getName() +
767 " of column " + col_in->getName() +
768 " argument of function " + getName() +
769 ", expected FixedString(" + toString(uuid_bytes_length) + ")",
770 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
771
772 const auto size = col_in->size();
773 const auto & vec_in = col_in->getChars();
774
775 auto col_res = ColumnString::create();
776
777 ColumnString::Chars & vec_res = col_res->getChars();
778 ColumnString::Offsets & offsets_res = col_res->getOffsets();
779 vec_res.resize(size * (uuid_text_length + 1));
780 offsets_res.resize(size);
781
782 size_t src_offset = 0;
783 size_t dst_offset = 0;
784
785 for (size_t i = 0; i < size; ++i)
786 {
787 formatUUID(&vec_in[src_offset], &vec_res[dst_offset]);
788 src_offset += uuid_bytes_length;
789 dst_offset += uuid_text_length;
790 vec_res[dst_offset] = 0;
791 ++dst_offset;
792 offsets_res[i] = dst_offset;
793 }
794
795 block.getByPosition(result).column = std::move(col_res);
796 }
797 else
798 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
799 + " of argument of function " + getName(),
800 ErrorCodes::ILLEGAL_COLUMN);
801 }
802};
803
804
805class FunctionUUIDStringToNum : public IFunction
806{
807private:
808 static void parseHex(const UInt8 * __restrict src, UInt8 * __restrict dst, const size_t num_bytes)
809 {
810 size_t src_pos = 0;
811 size_t dst_pos = 0;
812 for (; dst_pos < num_bytes; ++dst_pos)
813 {
814 dst[dst_pos] = unhex2(reinterpret_cast<const char *>(&src[src_pos]));
815 src_pos += 2;
816 }
817 }
818
819 static void parseUUID(const UInt8 * src36, UInt8 * dst16)
820 {
821 /// If string is not like UUID - implementation specific behaviour.
822
823 parseHex(&src36[0], &dst16[0], 4);
824 parseHex(&src36[9], &dst16[4], 2);
825 parseHex(&src36[14], &dst16[6], 2);
826 parseHex(&src36[19], &dst16[8], 2);
827 parseHex(&src36[24], &dst16[10], 6);
828 }
829
830public:
831 static constexpr auto name = "UUIDStringToNum";
832 static FunctionPtr create(const Context &) { return std::make_shared<FunctionUUIDStringToNum>(); }
833
834 String getName() const override
835 {
836 return name;
837 }
838
839 size_t getNumberOfArguments() const override { return 1; }
840 bool isInjective(const Block &) override { return true; }
841
842 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
843 {
844 /// String or FixedString(36)
845 if (!isString(arguments[0]))
846 {
847 const auto ptr = checkAndGetDataType<DataTypeFixedString>(arguments[0].get());
848 if (!ptr || ptr->getN() != uuid_text_length)
849 throw Exception("Illegal type " + arguments[0]->getName() +
850 " of argument of function " + getName() +
851 ", expected FixedString(" + toString(uuid_text_length) + ")",
852 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
853 }
854
855 return std::make_shared<DataTypeFixedString>(uuid_bytes_length);
856 }
857
858 bool useDefaultImplementationForConstants() const override { return true; }
859
860 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
861 {
862 const ColumnWithTypeAndName & col_type_name = block.getByPosition(arguments[0]);
863 const ColumnPtr & column = col_type_name.column;
864
865 if (const auto col_in = checkAndGetColumn<ColumnString>(column.get()))
866 {
867 const auto & vec_in = col_in->getChars();
868 const auto & offsets_in = col_in->getOffsets();
869 const size_t size = offsets_in.size();
870
871 auto col_res = ColumnFixedString::create(uuid_bytes_length);
872
873 ColumnString::Chars & vec_res = col_res->getChars();
874 vec_res.resize(size * uuid_bytes_length);
875
876 size_t src_offset = 0;
877 size_t dst_offset = 0;
878
879 for (size_t i = 0; i < size; ++i)
880 {
881 /// If string has incorrect length - then return zero UUID.
882 /// If string has correct length but contains something not like UUID - implementation specific behaviour.
883
884 size_t string_size = offsets_in[i] - src_offset;
885 if (string_size == uuid_text_length + 1)
886 parseUUID(&vec_in[src_offset], &vec_res[dst_offset]);
887 else
888 memset(&vec_res[dst_offset], 0, uuid_bytes_length);
889
890 dst_offset += uuid_bytes_length;
891 src_offset += string_size;
892 }
893
894 block.getByPosition(result).column = std::move(col_res);
895 }
896 else if (const auto col_in_fixed = checkAndGetColumn<ColumnFixedString>(column.get()))
897 {
898 if (col_in_fixed->getN() != uuid_text_length)
899 throw Exception("Illegal type " + col_type_name.type->getName() +
900 " of column " + col_in_fixed->getName() +
901 " argument of function " + getName() +
902 ", expected FixedString(" + toString(uuid_text_length) + ")",
903 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
904
905 const auto size = col_in_fixed->size();
906 const auto & vec_in = col_in_fixed->getChars();
907
908 auto col_res = ColumnFixedString::create(uuid_bytes_length);
909
910 ColumnString::Chars & vec_res = col_res->getChars();
911 vec_res.resize(size * uuid_bytes_length);
912
913 size_t src_offset = 0;
914 size_t dst_offset = 0;
915
916 for (size_t i = 0; i < size; ++i)
917 {
918 parseUUID(&vec_in[src_offset], &vec_res[dst_offset]);
919 src_offset += uuid_text_length;
920 dst_offset += uuid_bytes_length;
921 }
922
923 block.getByPosition(result).column = std::move(col_res);
924 }
925 else
926 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
927 + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
928 }
929};
930
931
932class FunctionHex : public IFunction
933{
934public:
935 static constexpr auto name = "hex";
936 static FunctionPtr create(const Context &) { return std::make_shared<FunctionHex>(); }
937
938 String getName() const override
939 {
940 return name;
941 }
942
943 size_t getNumberOfArguments() const override { return 1; }
944 bool isInjective(const Block &) override { return true; }
945
946 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
947 {
948 WhichDataType which(arguments[0]);
949
950 if (!which.isStringOrFixedString() &&
951 !which.isDateOrDateTime() &&
952 !which.isUInt() &&
953 !which.isFloat() &&
954 !which.isDecimal())
955 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
956 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
957
958 return std::make_shared<DataTypeString>();
959 }
960
961 template <typename T>
962 void executeOneUInt(T x, char *& out)
963 {
964 bool was_nonzero = false;
965 for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8)
966 {
967 UInt8 byte = x >> offset;
968
969 /// Leading zeros.
970 if (byte == 0 && !was_nonzero && offset)
971 continue;
972
973 was_nonzero = true;
974
975 writeHexByteUppercase(byte, out);
976 out += 2;
977 }
978 *out = '\0';
979 ++out;
980 }
981
982 template <typename T>
983 bool tryExecuteUInt(const IColumn * col, ColumnPtr & col_res)
984 {
985 const ColumnVector<T> * col_vec = checkAndGetColumn<ColumnVector<T>>(col);
986
987 static constexpr size_t MAX_UINT_HEX_LENGTH = sizeof(T) * 2 + 1; /// Including trailing zero byte.
988
989 if (col_vec)
990 {
991 auto col_str = ColumnString::create();
992 ColumnString::Chars & out_vec = col_str->getChars();
993 ColumnString::Offsets & out_offsets = col_str->getOffsets();
994
995 const typename ColumnVector<T>::Container & in_vec = col_vec->getData();
996
997 size_t size = in_vec.size();
998 out_offsets.resize(size);
999 out_vec.resize(size * 3 + MAX_UINT_HEX_LENGTH); /// 3 is length of one byte in hex plus zero byte.
1000
1001 size_t pos = 0;
1002 for (size_t i = 0; i < size; ++i)
1003 {
1004 /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it).
1005 if (pos + MAX_UINT_HEX_LENGTH > out_vec.size())
1006 out_vec.resize(out_vec.size() * 2 + MAX_UINT_HEX_LENGTH);
1007
1008 char * begin = reinterpret_cast<char *>(&out_vec[pos]);
1009 char * end = begin;
1010 executeOneUInt<T>(in_vec[i], end);
1011
1012 pos += end - begin;
1013 out_offsets[i] = pos;
1014 }
1015
1016 out_vec.resize(pos);
1017
1018 col_res = std::move(col_str);
1019 return true;
1020 }
1021 else
1022 {
1023 return false;
1024 }
1025 }
1026
1027 template <typename T>
1028 void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes)
1029 {
1030 const size_t hex_length = type_size_in_bytes * 2 + 1; /// Including trailing zero byte.
1031 auto col_str = ColumnString::create();
1032
1033 ColumnString::Chars & out_vec = col_str->getChars();
1034 ColumnString::Offsets & out_offsets = col_str->getOffsets();
1035
1036 size_t size = in_vec.size();
1037 out_offsets.resize(size);
1038 out_vec.resize(size * hex_length);
1039
1040 size_t pos = 0;
1041 char * out = reinterpret_cast<char *>(&out_vec[0]);
1042 for (size_t i = 0; i < size; ++i)
1043 {
1044 const UInt8 * in_pos = reinterpret_cast<const UInt8 *>(&in_vec[i]);
1045 executeOneString(in_pos, in_pos + type_size_in_bytes, out);
1046
1047 pos += hex_length;
1048 out_offsets[i] = pos;
1049 }
1050 col_res = std::move(col_str);
1051 }
1052
1053 template <typename T>
1054 bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res)
1055 {
1056 const ColumnVector<T> * col_vec = checkAndGetColumn<ColumnVector<T>>(col);
1057 if (col_vec)
1058 {
1059 const typename ColumnVector<T>::Container & in_vec = col_vec->getData();
1060 executeFloatAndDecimal<typename ColumnVector<T>::Container>(in_vec, col_res, sizeof(T));
1061 return true;
1062 }
1063 else
1064 {
1065 return false;
1066 }
1067 }
1068
1069 template <typename T>
1070 bool tryExecuteDecimal(const IColumn * col, ColumnPtr & col_res)
1071 {
1072 const ColumnDecimal<T> * col_dec = checkAndGetColumn<ColumnDecimal<T>>(col);
1073 if (col_dec)
1074 {
1075 const typename ColumnDecimal<T>::Container & in_vec = col_dec->getData();
1076 executeFloatAndDecimal<typename ColumnDecimal<T>::Container>(in_vec, col_res, sizeof(T));
1077 return true;
1078 }
1079 else
1080 {
1081 return false;
1082 }
1083 }
1084
1085
1086 void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out)
1087 {
1088 while (pos < end)
1089 {
1090 writeHexByteUppercase(*pos, out);
1091 ++pos;
1092 out += 2;
1093 }
1094 *out = '\0';
1095 ++out;
1096 }
1097
1098 bool tryExecuteString(const IColumn * col, ColumnPtr & col_res)
1099 {
1100 const ColumnString * col_str_in = checkAndGetColumn<ColumnString>(col);
1101
1102 if (col_str_in)
1103 {
1104 auto col_str = ColumnString::create();
1105 ColumnString::Chars & out_vec = col_str->getChars();
1106 ColumnString::Offsets & out_offsets = col_str->getOffsets();
1107
1108 const ColumnString::Chars & in_vec = col_str_in->getChars();
1109 const ColumnString::Offsets & in_offsets = col_str_in->getOffsets();
1110
1111 size_t size = in_offsets.size();
1112 out_offsets.resize(size);
1113 out_vec.resize(in_vec.size() * 2 - size);
1114
1115 char * begin = reinterpret_cast<char *>(out_vec.data());
1116 char * pos = begin;
1117 size_t prev_offset = 0;
1118
1119 for (size_t i = 0; i < size; ++i)
1120 {
1121 size_t new_offset = in_offsets[i];
1122
1123 executeOneString(&in_vec[prev_offset], &in_vec[new_offset - 1], pos);
1124
1125 out_offsets[i] = pos - begin;
1126
1127 prev_offset = new_offset;
1128 }
1129
1130 if (!out_offsets.empty() && out_offsets.back() != out_vec.size())
1131 throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR);
1132
1133 col_res = std::move(col_str);
1134 return true;
1135 }
1136 else
1137 {
1138 return false;
1139 }
1140 }
1141
1142 bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res)
1143 {
1144 const ColumnFixedString * col_fstr_in = checkAndGetColumn<ColumnFixedString>(col);
1145
1146 if (col_fstr_in)
1147 {
1148 auto col_str = ColumnString::create();
1149 ColumnString::Chars & out_vec = col_str->getChars();
1150 ColumnString::Offsets & out_offsets = col_str->getOffsets();
1151
1152 const ColumnString::Chars & in_vec = col_fstr_in->getChars();
1153
1154 size_t size = col_fstr_in->size();
1155
1156 out_offsets.resize(size);
1157 out_vec.resize(in_vec.size() * 2 + size);
1158
1159 char * begin = reinterpret_cast<char *>(out_vec.data());
1160 char * pos = begin;
1161
1162 size_t n = col_fstr_in->getN();
1163
1164 size_t prev_offset = 0;
1165
1166 for (size_t i = 0; i < size; ++i)
1167 {
1168 size_t new_offset = prev_offset + n;
1169
1170 executeOneString(&in_vec[prev_offset], &in_vec[new_offset], pos);
1171
1172 out_offsets[i] = pos - begin;
1173 prev_offset = new_offset;
1174 }
1175
1176 if (!out_offsets.empty() && out_offsets.back() != out_vec.size())
1177 throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR);
1178
1179 col_res = std::move(col_str);
1180 return true;
1181 }
1182 else
1183 {
1184 return false;
1185 }
1186 }
1187
1188 bool useDefaultImplementationForConstants() const override { return true; }
1189
1190 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
1191 {
1192 const IColumn * column = block.getByPosition(arguments[0]).column.get();
1193 ColumnPtr & res_column = block.getByPosition(result).column;
1194
1195 if (tryExecuteUInt<UInt8>(column, res_column) ||
1196 tryExecuteUInt<UInt16>(column, res_column) ||
1197 tryExecuteUInt<UInt32>(column, res_column) ||
1198 tryExecuteUInt<UInt64>(column, res_column) ||
1199 tryExecuteString(column, res_column) ||
1200 tryExecuteFixedString(column, res_column) ||
1201 tryExecuteFloat<Float32>(column, res_column) ||
1202 tryExecuteFloat<Float64>(column, res_column) ||
1203 tryExecuteDecimal<Decimal32>(column, res_column) ||
1204 tryExecuteDecimal<Decimal64>(column, res_column) ||
1205 tryExecuteDecimal<Decimal128>(column, res_column))
1206 return;
1207
1208 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
1209 + " of argument of function " + getName(),
1210 ErrorCodes::ILLEGAL_COLUMN);
1211 }
1212};
1213
1214
1215class FunctionUnhex : public IFunction
1216{
1217public:
1218 static constexpr auto name = "unhex";
1219 static FunctionPtr create(const Context &) { return std::make_shared<FunctionUnhex>(); }
1220
1221 String getName() const override
1222 {
1223 return name;
1224 }
1225
1226 size_t getNumberOfArguments() const override { return 1; }
1227 bool isInjective(const Block &) override { return true; }
1228
1229 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
1230 {
1231 if (!isString(arguments[0]))
1232 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
1233 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1234
1235 return std::make_shared<DataTypeString>();
1236 }
1237
1238 void unhexOne(const char * pos, const char * end, char *& out)
1239 {
1240 if ((end - pos) & 1)
1241 {
1242 *out = unhex(*pos);
1243 ++out;
1244 ++pos;
1245 }
1246 while (pos < end)
1247 {
1248 *out = unhex2(pos);
1249 pos += 2;
1250 ++out;
1251 }
1252 *out = '\0';
1253 ++out;
1254 }
1255
1256 bool useDefaultImplementationForConstants() const override { return true; }
1257
1258 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
1259 {
1260 const ColumnPtr & column = block.getByPosition(arguments[0]).column;
1261
1262 if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
1263 {
1264 auto col_res = ColumnString::create();
1265
1266 ColumnString::Chars & out_vec = col_res->getChars();
1267 ColumnString::Offsets & out_offsets = col_res->getOffsets();
1268
1269 const ColumnString::Chars & in_vec = col->getChars();
1270 const ColumnString::Offsets & in_offsets = col->getOffsets();
1271
1272 size_t size = in_offsets.size();
1273 out_offsets.resize(size);
1274 out_vec.resize(in_vec.size() / 2 + size);
1275
1276 char * begin = reinterpret_cast<char *>(out_vec.data());
1277 char * pos = begin;
1278 size_t prev_offset = 0;
1279
1280 for (size_t i = 0; i < size; ++i)
1281 {
1282 size_t new_offset = in_offsets[i];
1283
1284 unhexOne(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset - 1]), pos);
1285
1286 out_offsets[i] = pos - begin;
1287
1288 prev_offset = new_offset;
1289 }
1290
1291 out_vec.resize(pos - begin);
1292
1293 block.getByPosition(result).column = std::move(col_res);
1294 }
1295 else
1296 {
1297 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
1298 + " of argument of function " + getName(),
1299 ErrorCodes::ILLEGAL_COLUMN);
1300 }
1301 }
1302};
1303
1304class FunctionChar : public IFunction
1305{
1306public:
1307 static constexpr auto name = "char";
1308 static FunctionPtr create(const Context &) { return std::make_shared<FunctionChar>(); }
1309
1310 String getName() const override
1311 {
1312 return name;
1313 }
1314
1315 bool isVariadic() const override { return true; }
1316 bool isInjective(const Block &) override { return true; }
1317 size_t getNumberOfArguments() const override { return 0; }
1318
1319 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
1320 {
1321 if (arguments.empty())
1322 throw Exception("Number of arguments for function " + getName() + " can't be " + toString(arguments.size())
1323 + ", should be at least 1", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1324
1325 for (const auto & arg : arguments)
1326 {
1327 WhichDataType which(arg);
1328 if (!(which.isInt() || which.isUInt() || which.isFloat()))
1329 throw Exception("Illegal type " + arg->getName() + " of argument of function " + getName()
1330 + ", must be Int, UInt or Float number",
1331 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1332 }
1333 return std::make_shared<DataTypeString>();
1334 }
1335
1336 bool useDefaultImplementationForConstants() const override { return true; }
1337
1338 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
1339 {
1340 auto col_str = ColumnString::create();
1341 ColumnString::Chars & out_vec = col_str->getChars();
1342 ColumnString::Offsets & out_offsets = col_str->getOffsets();
1343
1344 const auto size_per_row = arguments.size() + 1;
1345 out_vec.resize(size_per_row * input_rows_count);
1346 out_offsets.resize(input_rows_count);
1347
1348 for (size_t row = 0; row < input_rows_count; ++row)
1349 {
1350 out_offsets[row] = size_per_row + out_offsets[row - 1];
1351 out_vec[row * size_per_row + size_per_row - 1] = '\0';
1352 }
1353
1354 Columns columns_holder(arguments.size());
1355 for (size_t idx = 0; idx < arguments.size(); ++idx)
1356 {
1357 //partial const column
1358 columns_holder[idx] = block.getByPosition(arguments[idx]).column->convertToFullColumnIfConst();
1359 const IColumn * column = columns_holder[idx].get();
1360
1361 if (!(executeNumber<UInt8>(*column, out_vec, idx, input_rows_count, size_per_row)
1362 || executeNumber<UInt16>(*column, out_vec, idx, input_rows_count, size_per_row)
1363 || executeNumber<UInt32>(*column, out_vec, idx, input_rows_count, size_per_row)
1364 || executeNumber<UInt64>(*column, out_vec, idx, input_rows_count, size_per_row)
1365 || executeNumber<Int8>(*column, out_vec, idx, input_rows_count, size_per_row)
1366 || executeNumber<Int16>(*column, out_vec, idx, input_rows_count, size_per_row)
1367 || executeNumber<Int32>(*column, out_vec, idx, input_rows_count, size_per_row)
1368 || executeNumber<Int64>(*column, out_vec, idx, input_rows_count, size_per_row)
1369 || executeNumber<Float32>(*column, out_vec, idx, input_rows_count, size_per_row)
1370 || executeNumber<Float64>(*column, out_vec, idx, input_rows_count, size_per_row)))
1371 {
1372 throw Exception{"Illegal column " + block.getByPosition(arguments[idx]).column->getName()
1373 + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
1374 }
1375 }
1376
1377 block.getByPosition(result).column = std::move(col_str);
1378 }
1379
1380private:
1381 template <typename T>
1382 bool executeNumber(const IColumn & src_data, ColumnString::Chars & out_vec, const size_t & column_idx, const size_t & rows, const size_t & size_per_row)
1383 {
1384 const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
1385
1386 if (!src_data_concrete)
1387 {
1388 return false;
1389 }
1390
1391 for (size_t row = 0; row < rows; ++row)
1392 {
1393 out_vec[row * size_per_row + column_idx] = static_cast<char>(src_data_concrete->getInt(row));
1394 }
1395 return true;
1396 }
1397};
1398
1399class FunctionBitmaskToArray : public IFunction
1400{
1401public:
1402 static constexpr auto name = "bitmaskToArray";
1403 static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitmaskToArray>(); }
1404
1405 String getName() const override
1406 {
1407 return name;
1408 }
1409
1410 size_t getNumberOfArguments() const override { return 1; }
1411 bool isInjective(const Block &) override { return true; }
1412
1413 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
1414 {
1415 if (!isInteger(arguments[0]))
1416 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
1417 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1418
1419 return std::make_shared<DataTypeArray>(arguments[0]);
1420 }
1421
1422 bool useDefaultImplementationForConstants() const override { return true; }
1423
1424 template <typename T>
1425 bool tryExecute(const IColumn * column, ColumnPtr & out_column)
1426 {
1427 using UnsignedT = std::make_unsigned_t<T>;
1428
1429 if (const ColumnVector<T> * col_from = checkAndGetColumn<ColumnVector<T>>(column))
1430 {
1431 auto col_values = ColumnVector<T>::create();
1432 auto col_offsets = ColumnArray::ColumnOffsets::create();
1433
1434 typename ColumnVector<T>::Container & res_values = col_values->getData();
1435 ColumnArray::Offsets & res_offsets = col_offsets->getData();
1436
1437 const typename ColumnVector<T>::Container & vec_from = col_from->getData();
1438 size_t size = vec_from.size();
1439 res_offsets.resize(size);
1440 res_values.reserve(size * 2);
1441
1442 for (size_t row = 0; row < size; ++row)
1443 {
1444 UnsignedT x = vec_from[row];
1445 while (x)
1446 {
1447 UnsignedT y = x & (x - 1);
1448 UnsignedT bit = x ^ y;
1449 x = y;
1450 res_values.push_back(bit);
1451 }
1452 res_offsets[row] = res_values.size();
1453 }
1454
1455 out_column = ColumnArray::create(std::move(col_values), std::move(col_offsets));
1456 return true;
1457 }
1458 else
1459 {
1460 return false;
1461 }
1462 }
1463
1464 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
1465 {
1466 const IColumn * in_column = block.getByPosition(arguments[0]).column.get();
1467 ColumnPtr & out_column = block.getByPosition(result).column;
1468
1469 if (tryExecute<UInt8>(in_column, out_column) ||
1470 tryExecute<UInt16>(in_column, out_column) ||
1471 tryExecute<UInt32>(in_column, out_column) ||
1472 tryExecute<UInt64>(in_column, out_column) ||
1473 tryExecute<Int8>(in_column, out_column) ||
1474 tryExecute<Int16>(in_column, out_column) ||
1475 tryExecute<Int32>(in_column, out_column) ||
1476 tryExecute<Int64>(in_column, out_column))
1477 return;
1478
1479 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
1480 + " of first argument of function " + getName(),
1481 ErrorCodes::ILLEGAL_COLUMN);
1482 }
1483};
1484
1485class FunctionToStringCutToZero : public IFunction
1486{
1487public:
1488 static constexpr auto name = "toStringCutToZero";
1489 static FunctionPtr create(const Context &) { return std::make_shared<FunctionToStringCutToZero>(); }
1490
1491 String getName() const override
1492 {
1493 return name;
1494 }
1495
1496 size_t getNumberOfArguments() const override { return 1; }
1497
1498 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
1499 {
1500 if (!isStringOrFixedString(arguments[0]))
1501 throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
1502 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1503
1504 return std::make_shared<DataTypeString>();
1505 }
1506
1507 bool useDefaultImplementationForConstants() const override { return true; }
1508
1509 bool tryExecuteString(const IColumn * col, ColumnPtr & col_res)
1510 {
1511 const ColumnString * col_str_in = checkAndGetColumn<ColumnString>(col);
1512
1513 if (col_str_in)
1514 {
1515 auto col_str = ColumnString::create();
1516 ColumnString::Chars & out_vec = col_str->getChars();
1517 ColumnString::Offsets & out_offsets = col_str->getOffsets();
1518
1519 const ColumnString::Chars & in_vec = col_str_in->getChars();
1520 const ColumnString::Offsets & in_offsets = col_str_in->getOffsets();
1521
1522 size_t size = in_offsets.size();
1523 out_offsets.resize(size);
1524 out_vec.resize(in_vec.size());
1525
1526 char * begin = reinterpret_cast<char *>(out_vec.data());
1527 char * pos = begin;
1528
1529 ColumnString::Offset current_in_offset = 0;
1530
1531 for (size_t i = 0; i < size; ++i)
1532 {
1533 const char * pos_in = reinterpret_cast<const char *>(&in_vec[current_in_offset]);
1534 size_t current_size = strlen(pos_in);
1535 memcpySmallAllowReadWriteOverflow15(pos, pos_in, current_size);
1536 pos += current_size;
1537 *pos = '\0';
1538 ++pos;
1539 out_offsets[i] = pos - begin;
1540 current_in_offset = in_offsets[i];
1541 }
1542 out_vec.resize(pos - begin);
1543
1544 if (!out_offsets.empty() && out_offsets.back() != out_vec.size())
1545 throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR);
1546
1547 col_res = std::move(col_str);
1548 return true;
1549 }
1550 else
1551 {
1552 return false;
1553 }
1554 }
1555
1556 bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res)
1557 {
1558 const ColumnFixedString * col_fstr_in = checkAndGetColumn<ColumnFixedString>(col);
1559
1560 if (col_fstr_in)
1561 {
1562 auto col_str = ColumnString::create();
1563 ColumnString::Chars & out_vec = col_str->getChars();
1564 ColumnString::Offsets & out_offsets = col_str->getOffsets();
1565
1566 const ColumnString::Chars & in_vec = col_fstr_in->getChars();
1567
1568 size_t size = col_fstr_in->size();
1569
1570 out_offsets.resize(size);
1571 out_vec.resize(in_vec.size() + size);
1572
1573 char * begin = reinterpret_cast<char *>(out_vec.data());
1574 char * pos = begin;
1575 const char * pos_in = reinterpret_cast<const char *>(in_vec.data());
1576
1577 size_t n = col_fstr_in->getN();
1578
1579 for (size_t i = 0; i < size; ++i)
1580 {
1581 size_t current_size = strnlen(pos_in, n);
1582 memcpySmallAllowReadWriteOverflow15(pos, pos_in, current_size);
1583 pos += current_size;
1584 *pos = '\0';
1585 out_offsets[i] = ++pos - begin;
1586 pos_in += n;
1587 }
1588 out_vec.resize(pos - begin);
1589
1590 if (!out_offsets.empty() && out_offsets.back() != out_vec.size())
1591 throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR);
1592
1593 col_res = std::move(col_str);
1594 return true;
1595 }
1596 else
1597 {
1598 return false;
1599 }
1600 }
1601
1602 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
1603 {
1604 const IColumn * column = block.getByPosition(arguments[0]).column.get();
1605 ColumnPtr & res_column = block.getByPosition(result).column;
1606
1607 if (tryExecuteFixedString(column, res_column) || tryExecuteString(column, res_column))
1608 return;
1609
1610 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
1611 + " of argument of function " + getName(),
1612 ErrorCodes::ILLEGAL_COLUMN);
1613 }
1614};
1615
1616
1617class FunctionIPv6CIDRToRange : public IFunction
1618{
1619private:
1620 /// TODO Inefficient.
1621 /// NOTE IPv6 is stored in memory in big endian format that makes some difficulties.
1622 static void applyCIDRMask(const UInt8 * __restrict src, UInt8 * __restrict dst_lower, UInt8 * __restrict dst_upper, UInt8 bits_to_keep)
1623 {
1624 UInt8 mask[16]{};
1625
1626 UInt8 bytes_to_keep = bits_to_keep / 8;
1627 UInt8 bits_to_keep_in_last_byte = bits_to_keep % 8;
1628
1629 for (size_t i = 0; i < bits_to_keep / 8; ++i)
1630 mask[i] = 0xFFU;
1631
1632 if (bits_to_keep_in_last_byte)
1633 mask[bytes_to_keep] = 0xFFU << (8 - bits_to_keep_in_last_byte);
1634
1635 for (size_t i = 0; i < 16; ++i)
1636 {
1637 dst_lower[i] = src[i] & mask[i];
1638 dst_upper[i] = dst_lower[i] | ~mask[i];
1639 }
1640 }
1641
1642public:
1643 static constexpr auto name = "IPv6CIDRToRange";
1644 static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv6CIDRToRange>(); }
1645
1646 String getName() const override { return name; }
1647 size_t getNumberOfArguments() const override { return 2; }
1648
1649 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
1650 {
1651 const auto first_argument = checkAndGetDataType<DataTypeFixedString>(arguments[0].get());
1652 if (!first_argument || first_argument->getN() != IPV6_BINARY_LENGTH)
1653 throw Exception("Illegal type " + arguments[0]->getName() +
1654 " of first argument of function " + getName() +
1655 ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")",
1656 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1657
1658 const DataTypePtr & second_argument = arguments[1];
1659 if (!isUInt8(second_argument))
1660 throw Exception{"Illegal type " + second_argument->getName()
1661 + " of second argument of function " + getName()
1662 + ", expected numeric type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
1663
1664 DataTypePtr element = DataTypeFactory::instance().get("IPv6");
1665 return std::make_shared<DataTypeTuple>(DataTypes{element, element});
1666 }
1667
1668 bool useDefaultImplementationForConstants() const override { return true; }
1669
1670
1671 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
1672 {
1673 const auto & col_type_name_ip = block.getByPosition(arguments[0]);
1674 const ColumnPtr & column_ip = col_type_name_ip.column;
1675
1676 const auto col_ip_in = checkAndGetColumn<ColumnFixedString>(column_ip.get());
1677
1678 if (!col_ip_in)
1679 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
1680 + " of argument of function " + getName(),
1681 ErrorCodes::ILLEGAL_COLUMN);
1682
1683 if (col_ip_in->getN() != IPV6_BINARY_LENGTH)
1684 throw Exception("Illegal type " + col_type_name_ip.type->getName() +
1685 " of column " + col_ip_in->getName() +
1686 " argument of function " + getName() +
1687 ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")",
1688 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1689
1690 const auto & col_type_name_cidr = block.getByPosition(arguments[1]);
1691 const ColumnPtr & column_cidr = col_type_name_cidr.column;
1692
1693 const auto col_const_cidr_in = checkAndGetColumnConst<ColumnUInt8>(column_cidr.get());
1694 const auto col_cidr_in = checkAndGetColumn<ColumnUInt8>(column_cidr.get());
1695
1696 if (!col_const_cidr_in && !col_cidr_in)
1697 throw Exception("Illegal column " + block.getByPosition(arguments[1]).column->getName()
1698 + " of argument of function " + getName(),
1699 ErrorCodes::ILLEGAL_COLUMN);
1700
1701 const auto & vec_in = col_ip_in->getChars();
1702
1703 auto col_res_lower_range = ColumnFixedString::create(IPV6_BINARY_LENGTH);
1704 auto col_res_upper_range = ColumnFixedString::create(IPV6_BINARY_LENGTH);
1705
1706 ColumnString::Chars & vec_res_lower_range = col_res_lower_range->getChars();
1707 vec_res_lower_range.resize(input_rows_count * IPV6_BINARY_LENGTH);
1708
1709 ColumnString::Chars & vec_res_upper_range = col_res_upper_range->getChars();
1710 vec_res_upper_range.resize(input_rows_count * IPV6_BINARY_LENGTH);
1711
1712 static constexpr UInt8 max_cidr_mask = IPV6_BINARY_LENGTH * 8;
1713
1714 for (size_t offset = 0; offset < input_rows_count; ++offset)
1715 {
1716 const size_t offset_ipv6 = offset * IPV6_BINARY_LENGTH;
1717 UInt8 cidr = col_const_cidr_in
1718 ? col_const_cidr_in->getValue<UInt8>()
1719 : col_cidr_in->getData()[offset];
1720 cidr = std::min(cidr, max_cidr_mask);
1721 applyCIDRMask(&vec_in[offset_ipv6], &vec_res_lower_range[offset_ipv6], &vec_res_upper_range[offset_ipv6], cidr);
1722 }
1723
1724 block.getByPosition(result).column = ColumnTuple::create(Columns{std::move(col_res_lower_range), std::move(col_res_upper_range)});
1725 }
1726};
1727
1728
1729class FunctionIPv4CIDRToRange : public IFunction
1730{
1731private:
1732 static inline std::pair<UInt32, UInt32> applyCIDRMask(UInt32 src, UInt8 bits_to_keep)
1733 {
1734 if (bits_to_keep >= 8 * sizeof(UInt32))
1735 return { src, src };
1736 if (bits_to_keep == 0)
1737 return { UInt32(0), UInt32(-1) };
1738
1739 UInt32 mask = UInt32(-1) << (8 * sizeof(UInt32) - bits_to_keep);
1740 UInt32 lower = src & mask;
1741 UInt32 upper = lower | ~mask;
1742
1743 return { lower, upper };
1744 }
1745
1746public:
1747 static constexpr auto name = "IPv4CIDRToRange";
1748 static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv4CIDRToRange>(); }
1749
1750 String getName() const override { return name; }
1751 size_t getNumberOfArguments() const override { return 2; }
1752
1753 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
1754 {
1755 if (!WhichDataType(arguments[0]).isUInt32())
1756 throw Exception("Illegal type " + arguments[0]->getName() +
1757 " of first argument of function " + getName() +
1758 ", expected UInt32",
1759 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1760
1761
1762 const DataTypePtr & second_argument = arguments[1];
1763 if (!isUInt8(second_argument))
1764 throw Exception{"Illegal type " + second_argument->getName()
1765 + " of second argument of function " + getName()
1766 + ", expected numeric type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
1767
1768 DataTypePtr element = DataTypeFactory::instance().get("IPv4");
1769 return std::make_shared<DataTypeTuple>(DataTypes{element, element});
1770 }
1771
1772 bool useDefaultImplementationForConstants() const override { return true; }
1773
1774
1775 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
1776 {
1777 const auto & col_type_name_ip = block.getByPosition(arguments[0]);
1778 const ColumnPtr & column_ip = col_type_name_ip.column;
1779
1780 const auto col_ip_in = checkAndGetColumn<ColumnUInt32>(column_ip.get());
1781 if (!col_ip_in)
1782 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
1783 + " of argument of function " + getName(),
1784 ErrorCodes::ILLEGAL_COLUMN);
1785
1786 const auto & col_type_name_cidr = block.getByPosition(arguments[1]);
1787 const ColumnPtr & column_cidr = col_type_name_cidr.column;
1788
1789 const auto col_const_cidr_in = checkAndGetColumnConst<ColumnUInt8>(column_cidr.get());
1790 const auto col_cidr_in = checkAndGetColumn<ColumnUInt8>(column_cidr.get());
1791
1792 if (!col_const_cidr_in && !col_cidr_in)
1793 throw Exception("Illegal column " + block.getByPosition(arguments[1]).column->getName()
1794 + " of argument of function " + getName(),
1795 ErrorCodes::ILLEGAL_COLUMN);
1796
1797 const auto & vec_in = col_ip_in->getData();
1798
1799 auto col_res_lower_range = ColumnUInt32::create();
1800 auto col_res_upper_range = ColumnUInt32::create();
1801
1802 auto & vec_res_lower_range = col_res_lower_range->getData();
1803 vec_res_lower_range.resize(input_rows_count);
1804
1805 auto & vec_res_upper_range = col_res_upper_range->getData();
1806 vec_res_upper_range.resize(input_rows_count);
1807
1808 for (size_t i = 0; i < input_rows_count; ++i)
1809 {
1810 UInt8 cidr = col_const_cidr_in
1811 ? col_const_cidr_in->getValue<UInt8>()
1812 : col_cidr_in->getData()[i];
1813
1814 std::tie(vec_res_lower_range[i], vec_res_upper_range[i]) = applyCIDRMask(vec_in[i], cidr);
1815 }
1816
1817 block.getByPosition(result).column = ColumnTuple::create(Columns{std::move(col_res_lower_range), std::move(col_res_upper_range)});
1818 }
1819};
1820
1821
1822}
1823