| 1 | #include <Columns/ColumnFixedString.h> | 
|---|
| 2 | #include <Columns/ColumnsNumber.h> | 
|---|
| 3 | #include <Columns/ColumnConst.h> | 
|---|
| 4 |  | 
|---|
| 5 | #include <Formats/FormatSettings.h> | 
|---|
| 6 | #include <Formats/ProtobufReader.h> | 
|---|
| 7 | #include <Formats/ProtobufWriter.h> | 
|---|
| 8 | #include <DataTypes/DataTypeFixedString.h> | 
|---|
| 9 | #include <DataTypes/DataTypeFactory.h> | 
|---|
| 10 |  | 
|---|
| 11 | #include <IO/WriteBuffer.h> | 
|---|
| 12 | #include <IO/ReadHelpers.h> | 
|---|
| 13 | #include <IO/WriteHelpers.h> | 
|---|
| 14 | #include <IO/VarInt.h> | 
|---|
| 15 |  | 
|---|
| 16 | #include <Parsers/IAST.h> | 
|---|
| 17 | #include <Parsers/ASTLiteral.h> | 
|---|
| 18 |  | 
|---|
| 19 | #include <Common/typeid_cast.h> | 
|---|
| 20 | #include <Common/assert_cast.h> | 
|---|
| 21 |  | 
|---|
| 22 |  | 
|---|
| 23 | namespace DB | 
|---|
| 24 | { | 
|---|
| 25 |  | 
|---|
| 26 | namespace ErrorCodes | 
|---|
| 27 | { | 
|---|
| 28 | extern const int CANNOT_READ_ALL_DATA; | 
|---|
| 29 | extern const int TOO_LARGE_STRING_SIZE; | 
|---|
| 30 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; | 
|---|
| 31 | extern const int UNEXPECTED_AST_STRUCTURE; | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 |  | 
|---|
| 35 | std::string DataTypeFixedString::doGetName() const | 
|---|
| 36 | { | 
|---|
| 37 | return type_name + "("+ toString(n) + ")"; | 
|---|
| 38 | } | 
|---|
| 39 |  | 
|---|
| 40 |  | 
|---|
| 41 | void DataTypeFixedString::serializeBinary(const Field & field, WriteBuffer & ostr) const | 
|---|
| 42 | { | 
|---|
| 43 | const String & s = get<const String &>(field); | 
|---|
| 44 | ostr.write(s.data(), std::min(s.size(), n)); | 
|---|
| 45 | if (s.size() < n) | 
|---|
| 46 | for (size_t i = s.size(); i < n; ++i) | 
|---|
| 47 | ostr.write(0); | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 |  | 
|---|
| 51 | void DataTypeFixedString::deserializeBinary(Field & field, ReadBuffer & istr) const | 
|---|
| 52 | { | 
|---|
| 53 | field = String(); | 
|---|
| 54 | String & s = get<String &>(field); | 
|---|
| 55 | s.resize(n); | 
|---|
| 56 | istr.readStrict(s.data(), n); | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 |  | 
|---|
| 60 | void DataTypeFixedString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const | 
|---|
| 61 | { | 
|---|
| 62 | ostr.write(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n); | 
|---|
| 63 | } | 
|---|
| 64 |  | 
|---|
| 65 |  | 
|---|
| 66 | void DataTypeFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr) const | 
|---|
| 67 | { | 
|---|
| 68 | ColumnFixedString::Chars & data = assert_cast<ColumnFixedString &>(column).getChars(); | 
|---|
| 69 | size_t old_size = data.size(); | 
|---|
| 70 | data.resize(old_size + n); | 
|---|
| 71 | try | 
|---|
| 72 | { | 
|---|
| 73 | istr.readStrict(reinterpret_cast<char *>(data.data() + old_size), n); | 
|---|
| 74 | } | 
|---|
| 75 | catch (...) | 
|---|
| 76 | { | 
|---|
| 77 | data.resize_assume_reserved(old_size); | 
|---|
| 78 | throw; | 
|---|
| 79 | } | 
|---|
| 80 | } | 
|---|
| 81 |  | 
|---|
| 82 |  | 
|---|
| 83 | void DataTypeFixedString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const | 
|---|
| 84 | { | 
|---|
| 85 | const ColumnFixedString::Chars & data = typeid_cast<const ColumnFixedString &>(column).getChars(); | 
|---|
| 86 |  | 
|---|
| 87 | size_t size = data.size() / n; | 
|---|
| 88 |  | 
|---|
| 89 | if (limit == 0 || offset + limit > size) | 
|---|
| 90 | limit = size - offset; | 
|---|
| 91 |  | 
|---|
| 92 | if (limit) | 
|---|
| 93 | ostr.write(reinterpret_cast<const char *>(&data[n * offset]), n * limit); | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 |  | 
|---|
| 97 | void DataTypeFixedString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const | 
|---|
| 98 | { | 
|---|
| 99 | ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars(); | 
|---|
| 100 |  | 
|---|
| 101 | size_t initial_size = data.size(); | 
|---|
| 102 | size_t max_bytes = limit * n; | 
|---|
| 103 | data.resize(initial_size + max_bytes); | 
|---|
| 104 | size_t read_bytes = istr.readBig(reinterpret_cast<char *>(&data[initial_size]), max_bytes); | 
|---|
| 105 |  | 
|---|
| 106 | if (read_bytes % n != 0) | 
|---|
| 107 | throw Exception( "Cannot read all data of type FixedString. Bytes read:"+ toString(read_bytes) + ". String size:"+ toString(n) + ".", | 
|---|
| 108 | ErrorCodes::CANNOT_READ_ALL_DATA); | 
|---|
| 109 |  | 
|---|
| 110 | data.resize(initial_size + read_bytes); | 
|---|
| 111 | } | 
|---|
| 112 |  | 
|---|
| 113 |  | 
|---|
| 114 | void DataTypeFixedString::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const | 
|---|
| 115 | { | 
|---|
| 116 | writeString(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n, ostr); | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 |  | 
|---|
| 120 | void DataTypeFixedString::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const | 
|---|
| 121 | { | 
|---|
| 122 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); | 
|---|
| 123 | writeAnyEscapedString<'\''>(pos, pos + n, ostr); | 
|---|
| 124 | } | 
|---|
| 125 |  | 
|---|
| 126 |  | 
|---|
| 127 | static inline void alignStringLength(const DataTypeFixedString & type, | 
|---|
| 128 | ColumnFixedString::Chars & data, | 
|---|
| 129 | size_t string_start) | 
|---|
| 130 | { | 
|---|
| 131 | size_t length = data.size() - string_start; | 
|---|
| 132 | if (length < type.getN()) | 
|---|
| 133 | { | 
|---|
| 134 | data.resize_fill(string_start + type.getN()); | 
|---|
| 135 | } | 
|---|
| 136 | else if (length > type.getN()) | 
|---|
| 137 | { | 
|---|
| 138 | data.resize_assume_reserved(string_start); | 
|---|
| 139 | throw Exception( "Too large value for "+ type.getName(), ErrorCodes::TOO_LARGE_STRING_SIZE); | 
|---|
| 140 | } | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | template <typename Reader> | 
|---|
| 144 | static inline void read(const DataTypeFixedString & self, IColumn & column, Reader && reader) | 
|---|
| 145 | { | 
|---|
| 146 | ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars(); | 
|---|
| 147 | size_t prev_size = data.size(); | 
|---|
| 148 | try | 
|---|
| 149 | { | 
|---|
| 150 | reader(data); | 
|---|
| 151 | alignStringLength(self, data, prev_size); | 
|---|
| 152 | } | 
|---|
| 153 | catch (...) | 
|---|
| 154 | { | 
|---|
| 155 | data.resize_assume_reserved(prev_size); | 
|---|
| 156 | throw; | 
|---|
| 157 | } | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 |  | 
|---|
| 161 | void DataTypeFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const | 
|---|
| 162 | { | 
|---|
| 163 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); }); | 
|---|
| 164 | } | 
|---|
| 165 |  | 
|---|
| 166 |  | 
|---|
| 167 | void DataTypeFixedString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const | 
|---|
| 168 | { | 
|---|
| 169 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); | 
|---|
| 170 | writeAnyQuotedString<'\''>(pos, pos + n, ostr); | 
|---|
| 171 | } | 
|---|
| 172 |  | 
|---|
| 173 |  | 
|---|
| 174 | void DataTypeFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const | 
|---|
| 175 | { | 
|---|
| 176 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readQuotedStringInto<true>(data, istr); }); | 
|---|
| 177 | } | 
|---|
| 178 |  | 
|---|
| 179 |  | 
|---|
| 180 | void DataTypeFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const | 
|---|
| 181 | { | 
|---|
| 182 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringInto(data, istr); }); | 
|---|
| 183 | } | 
|---|
| 184 |  | 
|---|
| 185 |  | 
|---|
| 186 | void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const | 
|---|
| 187 | { | 
|---|
| 188 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); | 
|---|
| 189 | writeJSONString(pos, pos + n, ostr, settings); | 
|---|
| 190 | } | 
|---|
| 191 |  | 
|---|
| 192 |  | 
|---|
| 193 | void DataTypeFixedString::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const | 
|---|
| 194 | { | 
|---|
| 195 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readJSONStringInto(data, istr); }); | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 |  | 
|---|
| 199 | void DataTypeFixedString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const | 
|---|
| 200 | { | 
|---|
| 201 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); | 
|---|
| 202 | writeXMLString(pos, pos + n, ostr); | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 |  | 
|---|
| 206 | void DataTypeFixedString::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const | 
|---|
| 207 | { | 
|---|
| 208 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); | 
|---|
| 209 | writeCSVString(pos, pos + n, ostr); | 
|---|
| 210 | } | 
|---|
| 211 |  | 
|---|
| 212 |  | 
|---|
| 213 | void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const | 
|---|
| 214 | { | 
|---|
| 215 | read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); }); | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 |  | 
|---|
| 219 | void DataTypeFixedString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const | 
|---|
| 220 | { | 
|---|
| 221 | if (value_index) | 
|---|
| 222 | return; | 
|---|
| 223 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); | 
|---|
| 224 | value_index = static_cast<bool>(protobuf.writeString(StringRef(pos, n))); | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|
| 227 |  | 
|---|
| 228 | void DataTypeFixedString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const | 
|---|
| 229 | { | 
|---|
| 230 | row_added = false; | 
|---|
| 231 | auto & column_string = assert_cast<ColumnFixedString &>(column); | 
|---|
| 232 | ColumnFixedString::Chars & data = column_string.getChars(); | 
|---|
| 233 | size_t old_size = data.size(); | 
|---|
| 234 | try | 
|---|
| 235 | { | 
|---|
| 236 | if (allow_add_row) | 
|---|
| 237 | { | 
|---|
| 238 | if (protobuf.readStringInto(data)) | 
|---|
| 239 | { | 
|---|
| 240 | alignStringLength(*this, data, old_size); | 
|---|
| 241 | row_added = true; | 
|---|
| 242 | } | 
|---|
| 243 | else | 
|---|
| 244 | data.resize_assume_reserved(old_size); | 
|---|
| 245 | } | 
|---|
| 246 | else | 
|---|
| 247 | { | 
|---|
| 248 | ColumnFixedString::Chars temp_data; | 
|---|
| 249 | if (protobuf.readStringInto(temp_data)) | 
|---|
| 250 | { | 
|---|
| 251 | alignStringLength(*this, temp_data, 0); | 
|---|
| 252 | column_string.popBack(1); | 
|---|
| 253 | old_size = data.size(); | 
|---|
| 254 | data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end()); | 
|---|
| 255 | } | 
|---|
| 256 | } | 
|---|
| 257 | } | 
|---|
| 258 | catch (...) | 
|---|
| 259 | { | 
|---|
| 260 | data.resize_assume_reserved(old_size); | 
|---|
| 261 | throw; | 
|---|
| 262 | } | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 |  | 
|---|
| 266 | MutableColumnPtr DataTypeFixedString::createColumn() const | 
|---|
| 267 | { | 
|---|
| 268 | return ColumnFixedString::create(n); | 
|---|
| 269 | } | 
|---|
| 270 |  | 
|---|
| 271 | Field DataTypeFixedString::getDefault() const | 
|---|
| 272 | { | 
|---|
| 273 | return String(); | 
|---|
| 274 | } | 
|---|
| 275 |  | 
|---|
| 276 | bool DataTypeFixedString::equals(const IDataType & rhs) const | 
|---|
| 277 | { | 
|---|
| 278 | return typeid(rhs) == typeid(*this) && n == static_cast<const DataTypeFixedString &>(rhs).n; | 
|---|
| 279 | } | 
|---|
| 280 |  | 
|---|
| 281 |  | 
|---|
| 282 | static DataTypePtr create(const String & type_name, const ASTPtr & arguments) | 
|---|
| 283 | { | 
|---|
| 284 | if (!arguments || arguments->children.size() != 1) | 
|---|
| 285 | throw Exception( "FixedString data type family must have exactly one argument - size in bytes", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); | 
|---|
| 286 |  | 
|---|
| 287 | const auto * argument = arguments->children[0]->as<ASTLiteral>(); | 
|---|
| 288 | if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get<UInt64>() == 0) | 
|---|
| 289 | throw Exception( "FixedString data type family must have a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); | 
|---|
| 290 |  | 
|---|
| 291 | return std::make_shared<DataTypeFixedString>(argument->value.get<UInt64>(), type_name); | 
|---|
| 292 | } | 
|---|
| 293 |  | 
|---|
| 294 |  | 
|---|
| 295 | void registerDataTypeFixedString(DataTypeFactory & factory) | 
|---|
| 296 | { | 
|---|
| 297 | factory.registerDataType( "FixedString", create); | 
|---|
| 298 |  | 
|---|
| 299 | /// Compatibility alias. | 
|---|
| 300 | factory.registerAlias( "BINARY", "FixedString", DataTypeFactory::CaseInsensitive); | 
|---|
| 301 | } | 
|---|
| 302 |  | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|