1 | #include <Columns/ColumnFixedString.h> |
2 | #include <Columns/ColumnsNumber.h> |
3 | #include <Columns/ColumnConst.h> |
4 | |
5 | #include <Formats/FormatSettings.h> |
6 | #include <Formats/ProtobufReader.h> |
7 | #include <Formats/ProtobufWriter.h> |
8 | #include <DataTypes/DataTypeFixedString.h> |
9 | #include <DataTypes/DataTypeFactory.h> |
10 | |
11 | #include <IO/WriteBuffer.h> |
12 | #include <IO/ReadHelpers.h> |
13 | #include <IO/WriteHelpers.h> |
14 | #include <IO/VarInt.h> |
15 | |
16 | #include <Parsers/IAST.h> |
17 | #include <Parsers/ASTLiteral.h> |
18 | |
19 | #include <Common/typeid_cast.h> |
20 | #include <Common/assert_cast.h> |
21 | |
22 | |
23 | namespace DB |
24 | { |
25 | |
26 | namespace ErrorCodes |
27 | { |
28 | extern const int CANNOT_READ_ALL_DATA; |
29 | extern const int TOO_LARGE_STRING_SIZE; |
30 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
31 | extern const int UNEXPECTED_AST_STRUCTURE; |
32 | } |
33 | |
34 | |
35 | std::string DataTypeFixedString::doGetName() const |
36 | { |
37 | return type_name + "(" + toString(n) + ")" ; |
38 | } |
39 | |
40 | |
41 | void DataTypeFixedString::serializeBinary(const Field & field, WriteBuffer & ostr) const |
42 | { |
43 | const String & s = get<const String &>(field); |
44 | ostr.write(s.data(), std::min(s.size(), n)); |
45 | if (s.size() < n) |
46 | for (size_t i = s.size(); i < n; ++i) |
47 | ostr.write(0); |
48 | } |
49 | |
50 | |
51 | void DataTypeFixedString::deserializeBinary(Field & field, ReadBuffer & istr) const |
52 | { |
53 | field = String(); |
54 | String & s = get<String &>(field); |
55 | s.resize(n); |
56 | istr.readStrict(s.data(), n); |
57 | } |
58 | |
59 | |
60 | void DataTypeFixedString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const |
61 | { |
62 | ostr.write(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n); |
63 | } |
64 | |
65 | |
66 | void DataTypeFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr) const |
67 | { |
68 | ColumnFixedString::Chars & data = assert_cast<ColumnFixedString &>(column).getChars(); |
69 | size_t old_size = data.size(); |
70 | data.resize(old_size + n); |
71 | try |
72 | { |
73 | istr.readStrict(reinterpret_cast<char *>(data.data() + old_size), n); |
74 | } |
75 | catch (...) |
76 | { |
77 | data.resize_assume_reserved(old_size); |
78 | throw; |
79 | } |
80 | } |
81 | |
82 | |
83 | void DataTypeFixedString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const |
84 | { |
85 | const ColumnFixedString::Chars & data = typeid_cast<const ColumnFixedString &>(column).getChars(); |
86 | |
87 | size_t size = data.size() / n; |
88 | |
89 | if (limit == 0 || offset + limit > size) |
90 | limit = size - offset; |
91 | |
92 | if (limit) |
93 | ostr.write(reinterpret_cast<const char *>(&data[n * offset]), n * limit); |
94 | } |
95 | |
96 | |
97 | void DataTypeFixedString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const |
98 | { |
99 | ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars(); |
100 | |
101 | size_t initial_size = data.size(); |
102 | size_t max_bytes = limit * n; |
103 | data.resize(initial_size + max_bytes); |
104 | size_t read_bytes = istr.readBig(reinterpret_cast<char *>(&data[initial_size]), max_bytes); |
105 | |
106 | if (read_bytes % n != 0) |
107 | throw Exception("Cannot read all data of type FixedString. Bytes read:" + toString(read_bytes) + ". String size:" + toString(n) + "." , |
108 | ErrorCodes::CANNOT_READ_ALL_DATA); |
109 | |
110 | data.resize(initial_size + read_bytes); |
111 | } |
112 | |
113 | |
114 | void DataTypeFixedString::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const |
115 | { |
116 | writeString(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n, ostr); |
117 | } |
118 | |
119 | |
120 | void DataTypeFixedString::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const |
121 | { |
122 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
123 | writeAnyEscapedString<'\''>(pos, pos + n, ostr); |
124 | } |
125 | |
126 | |
127 | static inline void alignStringLength(const DataTypeFixedString & type, |
128 | ColumnFixedString::Chars & data, |
129 | size_t string_start) |
130 | { |
131 | size_t length = data.size() - string_start; |
132 | if (length < type.getN()) |
133 | { |
134 | data.resize_fill(string_start + type.getN()); |
135 | } |
136 | else if (length > type.getN()) |
137 | { |
138 | data.resize_assume_reserved(string_start); |
139 | throw Exception("Too large value for " + type.getName(), ErrorCodes::TOO_LARGE_STRING_SIZE); |
140 | } |
141 | } |
142 | |
143 | template <typename Reader> |
144 | static inline void read(const DataTypeFixedString & self, IColumn & column, Reader && reader) |
145 | { |
146 | ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars(); |
147 | size_t prev_size = data.size(); |
148 | try |
149 | { |
150 | reader(data); |
151 | alignStringLength(self, data, prev_size); |
152 | } |
153 | catch (...) |
154 | { |
155 | data.resize_assume_reserved(prev_size); |
156 | throw; |
157 | } |
158 | } |
159 | |
160 | |
161 | void DataTypeFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const |
162 | { |
163 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); }); |
164 | } |
165 | |
166 | |
167 | void DataTypeFixedString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const |
168 | { |
169 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
170 | writeAnyQuotedString<'\''>(pos, pos + n, ostr); |
171 | } |
172 | |
173 | |
174 | void DataTypeFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const |
175 | { |
176 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readQuotedStringInto<true>(data, istr); }); |
177 | } |
178 | |
179 | |
180 | void DataTypeFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const |
181 | { |
182 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringInto(data, istr); }); |
183 | } |
184 | |
185 | |
186 | void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
187 | { |
188 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
189 | writeJSONString(pos, pos + n, ostr, settings); |
190 | } |
191 | |
192 | |
193 | void DataTypeFixedString::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const |
194 | { |
195 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readJSONStringInto(data, istr); }); |
196 | } |
197 | |
198 | |
199 | void DataTypeFixedString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const |
200 | { |
201 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
202 | writeXMLString(pos, pos + n, ostr); |
203 | } |
204 | |
205 | |
206 | void DataTypeFixedString::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const |
207 | { |
208 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
209 | writeCSVString(pos, pos + n, ostr); |
210 | } |
211 | |
212 | |
213 | void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
214 | { |
215 | read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); }); |
216 | } |
217 | |
218 | |
219 | void DataTypeFixedString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const |
220 | { |
221 | if (value_index) |
222 | return; |
223 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
224 | value_index = static_cast<bool>(protobuf.writeString(StringRef(pos, n))); |
225 | } |
226 | |
227 | |
228 | void DataTypeFixedString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const |
229 | { |
230 | row_added = false; |
231 | auto & column_string = assert_cast<ColumnFixedString &>(column); |
232 | ColumnFixedString::Chars & data = column_string.getChars(); |
233 | size_t old_size = data.size(); |
234 | try |
235 | { |
236 | if (allow_add_row) |
237 | { |
238 | if (protobuf.readStringInto(data)) |
239 | { |
240 | alignStringLength(*this, data, old_size); |
241 | row_added = true; |
242 | } |
243 | else |
244 | data.resize_assume_reserved(old_size); |
245 | } |
246 | else |
247 | { |
248 | ColumnFixedString::Chars temp_data; |
249 | if (protobuf.readStringInto(temp_data)) |
250 | { |
251 | alignStringLength(*this, temp_data, 0); |
252 | column_string.popBack(1); |
253 | old_size = data.size(); |
254 | data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end()); |
255 | } |
256 | } |
257 | } |
258 | catch (...) |
259 | { |
260 | data.resize_assume_reserved(old_size); |
261 | throw; |
262 | } |
263 | } |
264 | |
265 | |
266 | MutableColumnPtr DataTypeFixedString::createColumn() const |
267 | { |
268 | return ColumnFixedString::create(n); |
269 | } |
270 | |
271 | Field DataTypeFixedString::getDefault() const |
272 | { |
273 | return String(); |
274 | } |
275 | |
276 | bool DataTypeFixedString::equals(const IDataType & rhs) const |
277 | { |
278 | return typeid(rhs) == typeid(*this) && n == static_cast<const DataTypeFixedString &>(rhs).n; |
279 | } |
280 | |
281 | |
282 | static DataTypePtr create(const String & type_name, const ASTPtr & arguments) |
283 | { |
284 | if (!arguments || arguments->children.size() != 1) |
285 | throw Exception("FixedString data type family must have exactly one argument - size in bytes" , ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
286 | |
287 | const auto * argument = arguments->children[0]->as<ASTLiteral>(); |
288 | if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get<UInt64>() == 0) |
289 | throw Exception("FixedString data type family must have a number (positive integer) as its argument" , ErrorCodes::UNEXPECTED_AST_STRUCTURE); |
290 | |
291 | return std::make_shared<DataTypeFixedString>(argument->value.get<UInt64>(), type_name); |
292 | } |
293 | |
294 | |
295 | void registerDataTypeFixedString(DataTypeFactory & factory) |
296 | { |
297 | factory.registerDataType("FixedString" , create); |
298 | |
299 | /// Compatibility alias. |
300 | factory.registerAlias("BINARY" , "FixedString" , DataTypeFactory::CaseInsensitive); |
301 | } |
302 | |
303 | } |
304 | |