1#pragma once
2
3#include <DataTypes/DataTypesNumber.h>
4#include <DataTypes/DataTypeString.h>
5#include <DataTypes/DataTypeFixedString.h>
6#include <DataTypes/DataTypeDate.h>
7#include <DataTypes/DataTypeDateTime.h>
8#include <DataTypes/DataTypeDateTime64.h>
9#include <Columns/ColumnString.h>
10#include <Columns/ColumnFixedString.h>
11#include <Columns/ColumnConst.h>
12#include <Columns/ColumnVector.h>
13#include <Columns/ColumnDecimal.h>
14#include <Common/typeid_cast.h>
15#include <Common/memcpySmall.h>
16#include <Functions/IFunctionImpl.h>
17#include <Functions/FunctionHelpers.h>
18
19
20namespace DB
21{
22
23namespace ErrorCodes
24{
25 extern const int ILLEGAL_COLUMN;
26}
27
28
29/** Functions for transforming numbers and dates to strings that contain the same set of bytes in the machine representation, and vice versa.
30 */
31
32
33template <typename Name>
34class FunctionReinterpretAsStringImpl : public IFunction
35{
36public:
37 static constexpr auto name = Name::name;
38 static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsStringImpl>(); }
39
40 String getName() const override
41 {
42 return name;
43 }
44
45 size_t getNumberOfArguments() const override { return 1; }
46
47 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
48 {
49 const IDataType & type = *arguments[0];
50
51 if (type.isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
52 return std::make_shared<DataTypeString>();
53 throw Exception("Cannot reinterpret " + type.getName() + " as String because it is not contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
54 }
55
56 void executeToString(const IColumn & src, ColumnString & dst)
57 {
58 size_t rows = src.size();
59 ColumnString::Chars & data_to = dst.getChars();
60 ColumnString::Offsets & offsets_to = dst.getOffsets();
61 offsets_to.resize(rows);
62
63 ColumnString::Offset offset = 0;
64 for (size_t i = 0; i < rows; ++i)
65 {
66 StringRef data = src.getDataAt(i);
67
68 /// Cut trailing zero bytes.
69 while (data.size && data.data[data.size - 1] == 0)
70 --data.size;
71
72 data_to.resize(offset + data.size + 1);
73 memcpySmallAllowReadWriteOverflow15(&data_to[offset], data.data, data.size);
74 offset += data.size;
75 data_to[offset] = 0;
76 ++offset;
77 offsets_to[i] = offset;
78 }
79 }
80
81 bool useDefaultImplementationForConstants() const override { return true; }
82
83 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
84 {
85 const IColumn & src = *block.getByPosition(arguments[0]).column;
86 MutableColumnPtr dst = block.getByPosition(result).type->createColumn();
87
88 if (ColumnString * dst_concrete = typeid_cast<ColumnString *>(dst.get()))
89 executeToString(src, *dst_concrete);
90 else
91 throw Exception("Illegal column " + src.getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
92
93 block.getByPosition(result).column = std::move(dst);
94 }
95};
96
97
98template <typename Name>
99class FunctionReinterpretAsFixedStringImpl : public IFunction
100{
101public:
102 static constexpr auto name = Name::name;
103 static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsFixedStringImpl>(); }
104
105 String getName() const override
106 {
107 return name;
108 }
109
110 size_t getNumberOfArguments() const override { return 1; }
111
112 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
113 {
114 const IDataType & type = *arguments[0];
115
116 if (type.isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
117 return std::make_shared<DataTypeFixedString>(type.getSizeOfValueInMemory());
118 throw Exception("Cannot reinterpret " + type.getName() + " as FixedString because it is not fixed size and contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
119 }
120
121 void executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
122 {
123 size_t rows = src.size();
124 ColumnFixedString::Chars & data_to = dst.getChars();
125 data_to.resize(n * rows);
126
127 ColumnFixedString::Offset offset = 0;
128 for (size_t i = 0; i < rows; ++i)
129 {
130 StringRef data = src.getDataAt(i);
131 memcpySmallAllowReadWriteOverflow15(&data_to[offset], data.data, n);
132 offset += n;
133 }
134 }
135
136 bool useDefaultImplementationForConstants() const override { return true; }
137
138 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
139 {
140 const IColumn & src = *block.getByPosition(arguments[0]).column;
141 MutableColumnPtr dst = block.getByPosition(result).type->createColumn();
142
143 if (ColumnFixedString * dst_concrete = typeid_cast<ColumnFixedString *>(dst.get()))
144 executeToFixedString(src, *dst_concrete, dst_concrete->getN());
145 else
146 throw Exception("Illegal column " + src.getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
147
148 block.getByPosition(result).column = std::move(dst);
149 }
150};
151
152template <typename ToDataType, typename Name>
153class FunctionReinterpretStringAs : public IFunction
154{
155public:
156 static constexpr auto name = Name::name;
157 static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretStringAs>(); }
158
159 using ToFieldType = typename ToDataType::FieldType;
160 using ColumnType = typename ToDataType::ColumnType;
161
162 String getName() const override
163 {
164 return name;
165 }
166
167 size_t getNumberOfArguments() const override { return 1; }
168
169 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
170 {
171 const IDataType & type = *arguments[0];
172 if (!isStringOrFixedString(type))
173 throw Exception("Cannot reinterpret " + type.getName() + " as " + ToDataType().getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
174
175 return std::make_shared<ToDataType>();
176 }
177
178 bool useDefaultImplementationForConstants() const override { return true; }
179
180 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
181 {
182 if (const ColumnString * col_from = typeid_cast<const ColumnString *>(block.getByPosition(arguments[0]).column.get()))
183 {
184 auto col_res = ColumnType::create();
185
186 const ColumnString::Chars & data_from = col_from->getChars();
187 const ColumnString::Offsets & offsets_from = col_from->getOffsets();
188 size_t size = offsets_from.size();
189 typename ColumnType::Container & vec_res = col_res->getData();
190 vec_res.resize(size);
191
192 size_t offset = 0;
193 for (size_t i = 0; i < size; ++i)
194 {
195 ToFieldType value = 0;
196 memcpy(&value, &data_from[offset], std::min(static_cast<UInt64>(sizeof(ToFieldType)), offsets_from[i] - offset - 1));
197 vec_res[i] = value;
198 offset = offsets_from[i];
199 }
200
201 block.getByPosition(result).column = std::move(col_res);
202 }
203 else if (const ColumnFixedString * col_from_fixed = typeid_cast<const ColumnFixedString *>(block.getByPosition(arguments[0]).column.get()))
204 {
205 auto col_res = ColumnVector<ToFieldType>::create();
206
207 const ColumnString::Chars & data_from = col_from_fixed->getChars();
208 size_t step = col_from_fixed->getN();
209 size_t size = data_from.size() / step;
210 typename ColumnVector<ToFieldType>::Container & vec_res = col_res->getData();
211 vec_res.resize(size);
212
213 size_t offset = 0;
214 size_t copy_size = std::min(step, sizeof(ToFieldType));
215 for (size_t i = 0; i < size; ++i)
216 {
217 ToFieldType value = 0;
218 memcpy(&value, &data_from[offset], copy_size);
219 vec_res[i] = value;
220 offset += step;
221 }
222
223 block.getByPosition(result).column = std::move(col_res);
224 }
225 else
226 {
227 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
228 + " of argument of function " + getName(),
229 ErrorCodes::ILLEGAL_COLUMN);
230 }
231 }
232};
233
234
235struct NameReinterpretAsUInt8 { static constexpr auto name = "reinterpretAsUInt8"; };
236struct NameReinterpretAsUInt16 { static constexpr auto name = "reinterpretAsUInt16"; };
237struct NameReinterpretAsUInt32 { static constexpr auto name = "reinterpretAsUInt32"; };
238struct NameReinterpretAsUInt64 { static constexpr auto name = "reinterpretAsUInt64"; };
239struct NameReinterpretAsInt8 { static constexpr auto name = "reinterpretAsInt8"; };
240struct NameReinterpretAsInt16 { static constexpr auto name = "reinterpretAsInt16"; };
241struct NameReinterpretAsInt32 { static constexpr auto name = "reinterpretAsInt32"; };
242struct NameReinterpretAsInt64 { static constexpr auto name = "reinterpretAsInt64"; };
243struct NameReinterpretAsFloat32 { static constexpr auto name = "reinterpretAsFloat32"; };
244struct NameReinterpretAsFloat64 { static constexpr auto name = "reinterpretAsFloat64"; };
245struct NameReinterpretAsDate { static constexpr auto name = "reinterpretAsDate"; };
246struct NameReinterpretAsDateTime { static constexpr auto name = "reinterpretAsDateTime"; };
247struct NameReinterpretAsString { static constexpr auto name = "reinterpretAsString"; };
248struct NameReinterpretAsFixedString { static constexpr auto name = "reinterpretAsFixedString"; };
249
250using FunctionReinterpretAsUInt8 = FunctionReinterpretStringAs<DataTypeUInt8, NameReinterpretAsUInt8>;
251using FunctionReinterpretAsUInt16 = FunctionReinterpretStringAs<DataTypeUInt16, NameReinterpretAsUInt16>;
252using FunctionReinterpretAsUInt32 = FunctionReinterpretStringAs<DataTypeUInt32, NameReinterpretAsUInt32>;
253using FunctionReinterpretAsUInt64 = FunctionReinterpretStringAs<DataTypeUInt64, NameReinterpretAsUInt64>;
254using FunctionReinterpretAsInt8 = FunctionReinterpretStringAs<DataTypeInt8, NameReinterpretAsInt8>;
255using FunctionReinterpretAsInt16 = FunctionReinterpretStringAs<DataTypeInt16, NameReinterpretAsInt16>;
256using FunctionReinterpretAsInt32 = FunctionReinterpretStringAs<DataTypeInt32, NameReinterpretAsInt32>;
257using FunctionReinterpretAsInt64 = FunctionReinterpretStringAs<DataTypeInt64, NameReinterpretAsInt64>;
258using FunctionReinterpretAsFloat32 = FunctionReinterpretStringAs<DataTypeFloat32, NameReinterpretAsFloat32>;
259using FunctionReinterpretAsFloat64 = FunctionReinterpretStringAs<DataTypeFloat64, NameReinterpretAsFloat64>;
260using FunctionReinterpretAsDate = FunctionReinterpretStringAs<DataTypeDate, NameReinterpretAsDate>;
261using FunctionReinterpretAsDateTime = FunctionReinterpretStringAs<DataTypeDateTime, NameReinterpretAsDateTime>;
262
263using FunctionReinterpretAsString = FunctionReinterpretAsStringImpl<NameReinterpretAsString>;
264using FunctionReinterpretAsFixedString = FunctionReinterpretAsStringImpl<NameReinterpretAsFixedString>;
265
266
267}
268