1#include <Functions/IFunctionImpl.h>
2#include <Functions/FunctionFactory.h>
3#include <Functions/FunctionHelpers.h>
4#include <Columns/ColumnString.h>
5#include <DataTypes/DataTypeString.h>
6#include <Common/thread_local_rng.h>
7
8
9namespace DB
10{
11
12namespace ErrorCodes
13{
14 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
15 extern const int ILLEGAL_TYPE_OF_ARGUMENT;
16 extern const int TOO_LARGE_STRING_SIZE;
17}
18
19
20/** Generate random string of specified length with printable ASCII characters, almost uniformly distributed.
21 * First argument is length, other optional arguments are ignored and used to prevent common subexpression elimination to get different values.
22 */
23class FunctionRandomPrintableASCII : public IFunction
24{
25public:
26 static constexpr auto name = "randomPrintableASCII";
27 static FunctionPtr create(const Context &) { return std::make_shared<FunctionRandomPrintableASCII>(); }
28
29 String getName() const override
30 {
31 return name;
32 }
33
34 bool isVariadic() const override { return true; }
35 size_t getNumberOfArguments() const override { return 0; }
36
37 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
38 {
39 if (arguments.size() < 1)
40 throw Exception("Function " + getName() + " requires at least one argument: the size of resulting string",
41 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
42
43 if (arguments.size() > 2)
44 throw Exception("Function " + getName() + " requires at most two arguments: the size of resulting string and optional disambiguation tag",
45 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
46
47 const IDataType & length_type = *arguments[0];
48 if (!isNumber(length_type))
49 throw Exception("First argument of function " + getName() + " must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
50
51 return std::make_shared<DataTypeString>();
52 }
53
54 bool isDeterministic() const override { return false; }
55 bool isDeterministicInScopeOfQuery() const override { return false; }
56
57 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
58 {
59 auto col_to = ColumnString::create();
60 ColumnString::Chars & data_to = col_to->getChars();
61 ColumnString::Offsets & offsets_to = col_to->getOffsets();
62 offsets_to.resize(input_rows_count);
63
64 const IColumn & length_column = *block.getByPosition(arguments[0]).column;
65
66 IColumn::Offset offset = 0;
67 for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
68 {
69 size_t length = length_column.getUInt(row_num);
70 if (length > (1 << 30))
71 throw Exception("Too large string size in function " + getName(), ErrorCodes::TOO_LARGE_STRING_SIZE);
72
73 IColumn::Offset next_offset = offset + length + 1;
74 data_to.resize(next_offset);
75 offsets_to[row_num] = next_offset;
76
77 for (size_t pos = offset, end = offset + length; pos < end; pos += 4) /// We have padding in column buffers that we can overwrite.
78 {
79 UInt64 rand = thread_local_rng();
80
81 UInt16 rand1 = rand;
82 UInt16 rand2 = rand >> 16;
83 UInt16 rand3 = rand >> 32;
84 UInt16 rand4 = rand >> 48;
85
86 /// Printable characters are from range [32; 126].
87 /// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
88
89 data_to[pos + 0] = 32 + ((rand1 * 95) >> 16);
90 data_to[pos + 1] = 32 + ((rand2 * 95) >> 16);
91 data_to[pos + 2] = 32 + ((rand3 * 95) >> 16);
92 data_to[pos + 3] = 32 + ((rand4 * 95) >> 16);
93
94 /// NOTE gcc failed to vectorize this code (aliasing of char?)
95 /// TODO Implement SIMD optimizations from Danila Kutenin.
96 }
97
98 data_to[offset + length] = 0;
99
100 offset = next_offset;
101 }
102
103 block.getByPosition(result).column = std::move(col_to);
104 }
105};
106
107void registerFunctionRandomPrintableASCII(FunctionFactory & factory)
108{
109 factory.registerFunction<FunctionRandomPrintableASCII>();
110}
111
112}
113