1 | #include <Functions/IFunctionImpl.h> |
2 | #include <Functions/FunctionFactory.h> |
3 | #include <Functions/FunctionHelpers.h> |
4 | #include <Columns/ColumnString.h> |
5 | #include <DataTypes/DataTypeString.h> |
6 | #include <Common/thread_local_rng.h> |
7 | |
8 | |
9 | namespace DB |
10 | { |
11 | |
12 | namespace ErrorCodes |
13 | { |
14 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
15 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
16 | extern const int TOO_LARGE_STRING_SIZE; |
17 | } |
18 | |
19 | |
20 | /** Generate random string of specified length with printable ASCII characters, almost uniformly distributed. |
21 | * First argument is length, other optional arguments are ignored and used to prevent common subexpression elimination to get different values. |
22 | */ |
23 | class FunctionRandomPrintableASCII : public IFunction |
24 | { |
25 | public: |
26 | static constexpr auto name = "randomPrintableASCII" ; |
27 | static FunctionPtr create(const Context &) { return std::make_shared<FunctionRandomPrintableASCII>(); } |
28 | |
29 | String getName() const override |
30 | { |
31 | return name; |
32 | } |
33 | |
34 | bool isVariadic() const override { return true; } |
35 | size_t getNumberOfArguments() const override { return 0; } |
36 | |
37 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
38 | { |
39 | if (arguments.size() < 1) |
40 | throw Exception("Function " + getName() + " requires at least one argument: the size of resulting string" , |
41 | ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
42 | |
43 | if (arguments.size() > 2) |
44 | throw Exception("Function " + getName() + " requires at most two arguments: the size of resulting string and optional disambiguation tag" , |
45 | ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
46 | |
47 | const IDataType & length_type = *arguments[0]; |
48 | if (!isNumber(length_type)) |
49 | throw Exception("First argument of function " + getName() + " must have numeric type" , ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
50 | |
51 | return std::make_shared<DataTypeString>(); |
52 | } |
53 | |
54 | bool isDeterministic() const override { return false; } |
55 | bool isDeterministicInScopeOfQuery() const override { return false; } |
56 | |
57 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override |
58 | { |
59 | auto col_to = ColumnString::create(); |
60 | ColumnString::Chars & data_to = col_to->getChars(); |
61 | ColumnString::Offsets & offsets_to = col_to->getOffsets(); |
62 | offsets_to.resize(input_rows_count); |
63 | |
64 | const IColumn & length_column = *block.getByPosition(arguments[0]).column; |
65 | |
66 | IColumn::Offset offset = 0; |
67 | for (size_t row_num = 0; row_num < input_rows_count; ++row_num) |
68 | { |
69 | size_t length = length_column.getUInt(row_num); |
70 | if (length > (1 << 30)) |
71 | throw Exception("Too large string size in function " + getName(), ErrorCodes::TOO_LARGE_STRING_SIZE); |
72 | |
73 | IColumn::Offset next_offset = offset + length + 1; |
74 | data_to.resize(next_offset); |
75 | offsets_to[row_num] = next_offset; |
76 | |
77 | for (size_t pos = offset, end = offset + length; pos < end; pos += 4) /// We have padding in column buffers that we can overwrite. |
78 | { |
79 | UInt64 rand = thread_local_rng(); |
80 | |
81 | UInt16 rand1 = rand; |
82 | UInt16 rand2 = rand >> 16; |
83 | UInt16 rand3 = rand >> 32; |
84 | UInt16 rand4 = rand >> 48; |
85 | |
86 | /// Printable characters are from range [32; 126]. |
87 | /// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ |
88 | |
89 | data_to[pos + 0] = 32 + ((rand1 * 95) >> 16); |
90 | data_to[pos + 1] = 32 + ((rand2 * 95) >> 16); |
91 | data_to[pos + 2] = 32 + ((rand3 * 95) >> 16); |
92 | data_to[pos + 3] = 32 + ((rand4 * 95) >> 16); |
93 | |
94 | /// NOTE gcc failed to vectorize this code (aliasing of char?) |
95 | /// TODO Implement SIMD optimizations from Danila Kutenin. |
96 | } |
97 | |
98 | data_to[offset + length] = 0; |
99 | |
100 | offset = next_offset; |
101 | } |
102 | |
103 | block.getByPosition(result).column = std::move(col_to); |
104 | } |
105 | }; |
106 | |
107 | void registerFunctionRandomPrintableASCII(FunctionFactory & factory) |
108 | { |
109 | factory.registerFunction<FunctionRandomPrintableASCII>(); |
110 | } |
111 | |
112 | } |
113 | |