| 1 | #include <Storages/ColumnsDescription.h> |
| 2 | #include <Parsers/ASTLiteral.h> |
| 3 | #include <Parsers/ExpressionElementParsers.h> |
| 4 | #include <Parsers/ExpressionListParsers.h> |
| 5 | #include <Parsers/ParserCreateQuery.h> |
| 6 | #include <Parsers/parseQuery.h> |
| 7 | #include <Parsers/queryToString.h> |
| 8 | #include <IO/WriteBuffer.h> |
| 9 | #include <IO/WriteHelpers.h> |
| 10 | #include <IO/ReadBuffer.h> |
| 11 | #include <IO/ReadHelpers.h> |
| 12 | #include <IO/WriteBufferFromString.h> |
| 13 | #include <IO/ReadBufferFromString.h> |
| 14 | #include <DataTypes/DataTypeFactory.h> |
| 15 | #include <DataTypes/NestedUtils.h> |
| 16 | #include <DataTypes/DataTypeArray.h> |
| 17 | #include <DataTypes/DataTypeTuple.h> |
| 18 | #include <Common/Exception.h> |
| 19 | #include <Interpreters/Context.h> |
| 20 | #include <Storages/IStorage.h> |
| 21 | #include <Common/typeid_cast.h> |
| 22 | #include <Compression/CompressionFactory.h> |
| 23 | |
| 24 | |
| 25 | namespace DB |
| 26 | { |
| 27 | |
| 28 | namespace ErrorCodes |
| 29 | { |
| 30 | extern const int NO_SUCH_COLUMN_IN_TABLE; |
| 31 | extern const int ILLEGAL_COLUMN; |
| 32 | extern const int CANNOT_PARSE_TEXT; |
| 33 | } |
| 34 | |
| 35 | ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_) |
| 36 | : name(std::move(name_)), type(std::move(type_)), is_virtual(is_virtual_) |
| 37 | { |
| 38 | } |
| 39 | |
| 40 | bool ColumnDescription::operator==(const ColumnDescription & other) const |
| 41 | { |
| 42 | auto codec_str = [](const CompressionCodecPtr & codec_ptr) { return codec_ptr ? codec_ptr->getCodecDesc() : String(); }; |
| 43 | auto ttl_str = [](const ASTPtr & ttl_ast) { return ttl_ast ? queryToString(ttl_ast) : String{}; }; |
| 44 | |
| 45 | return name == other.name |
| 46 | && type->equals(*other.type) |
| 47 | && default_desc == other.default_desc |
| 48 | && comment == other.comment |
| 49 | && codec_str(codec) == codec_str(other.codec) |
| 50 | && ttl_str(ttl) == ttl_str(other.ttl); |
| 51 | } |
| 52 | |
| 53 | void ColumnDescription::writeText(WriteBuffer & buf) const |
| 54 | { |
| 55 | writeBackQuotedString(name, buf); |
| 56 | writeChar(' ', buf); |
| 57 | DB::writeText(type->getName(), buf); |
| 58 | |
| 59 | if (default_desc.expression) |
| 60 | { |
| 61 | writeChar('\t', buf); |
| 62 | DB::writeText(DB::toString(default_desc.kind), buf); |
| 63 | writeChar('\t', buf); |
| 64 | DB::writeText(queryToString(default_desc.expression), buf); |
| 65 | } |
| 66 | |
| 67 | if (!comment.empty()) |
| 68 | { |
| 69 | writeChar('\t', buf); |
| 70 | DB::writeText("COMMENT " , buf); |
| 71 | DB::writeText(queryToString(ASTLiteral(Field(comment))), buf); |
| 72 | } |
| 73 | |
| 74 | if (codec) |
| 75 | { |
| 76 | writeChar('\t', buf); |
| 77 | DB::writeText("CODEC(" , buf); |
| 78 | DB::writeText(codec->getCodecDesc(), buf); |
| 79 | DB::writeText(")" , buf); |
| 80 | } |
| 81 | |
| 82 | if (ttl) |
| 83 | { |
| 84 | writeChar('\t', buf); |
| 85 | DB::writeText("TTL " , buf); |
| 86 | DB::writeText(queryToString(ttl), buf); |
| 87 | } |
| 88 | |
| 89 | writeChar('\n', buf); |
| 90 | } |
| 91 | |
| 92 | void ColumnDescription::readText(ReadBuffer & buf) |
| 93 | { |
| 94 | ParserColumnDeclaration column_parser(/* require type */ true); |
| 95 | String column_line; |
| 96 | readEscapedStringUntilEOL(column_line, buf); |
| 97 | ASTPtr ast = parseQuery(column_parser, column_line, "column parser" , 0); |
| 98 | if (const auto * col_ast = ast->as<ASTColumnDeclaration>()) |
| 99 | { |
| 100 | name = col_ast->name; |
| 101 | type = DataTypeFactory::instance().get(col_ast->type); |
| 102 | |
| 103 | if (col_ast->default_expression) |
| 104 | { |
| 105 | default_desc.kind = columnDefaultKindFromString(col_ast->default_specifier); |
| 106 | default_desc.expression = std::move(col_ast->default_expression); |
| 107 | } |
| 108 | |
| 109 | if (col_ast->comment) |
| 110 | comment = col_ast->comment->as<ASTLiteral &>().value.get<String>(); |
| 111 | |
| 112 | if (col_ast->codec) |
| 113 | codec = CompressionCodecFactory::instance().get(col_ast->codec, type); |
| 114 | |
| 115 | if (col_ast->ttl) |
| 116 | ttl = col_ast->ttl; |
| 117 | } |
| 118 | else |
| 119 | throw Exception("Cannot parse column description" , ErrorCodes::CANNOT_PARSE_TEXT); |
| 120 | } |
| 121 | |
| 122 | |
| 123 | ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary, bool all_virtuals) |
| 124 | { |
| 125 | for (auto & elem : ordinary) |
| 126 | add(ColumnDescription(std::move(elem.name), std::move(elem.type), all_virtuals)); |
| 127 | } |
| 128 | |
| 129 | |
| 130 | /// We are trying to find first column from end with name `column_name` or with a name beginning with `column_name` and ".". |
| 131 | /// For example "fruits.bananas" |
| 132 | /// names are considered the same if they completely match or `name_without_dot` matches the part of the name to the point |
| 133 | static auto getNameRange(const ColumnsDescription::Container & columns, const String & name_without_dot) |
| 134 | { |
| 135 | String name_with_dot = name_without_dot + "." ; |
| 136 | |
| 137 | auto begin = columns.begin(); |
| 138 | for (; begin != columns.end(); ++begin) |
| 139 | { |
| 140 | if (begin->name == name_without_dot) |
| 141 | return std::make_pair(begin, std::next(begin)); |
| 142 | |
| 143 | if (startsWith(begin->name, name_with_dot)) |
| 144 | break; |
| 145 | } |
| 146 | |
| 147 | if (begin == columns.end()) |
| 148 | return std::make_pair(begin, begin); |
| 149 | |
| 150 | auto end = std::next(begin); |
| 151 | for (; end != columns.end(); ++end) |
| 152 | { |
| 153 | if (!startsWith(end->name, name_with_dot)) |
| 154 | break; |
| 155 | } |
| 156 | |
| 157 | return std::make_pair(begin, end); |
| 158 | } |
| 159 | |
| 160 | void ColumnsDescription::add(ColumnDescription column, const String & after_column) |
| 161 | { |
| 162 | if (has(column.name)) |
| 163 | throw Exception("Cannot add column " + column.name + ": column with this name already exists" , |
| 164 | ErrorCodes::ILLEGAL_COLUMN); |
| 165 | |
| 166 | auto insert_it = columns.cend(); |
| 167 | |
| 168 | if (!after_column.empty()) |
| 169 | { |
| 170 | auto range = getNameRange(columns, after_column); |
| 171 | if (range.first == range.second) |
| 172 | throw Exception("Wrong column name. Cannot find column " + after_column + " to insert after" , |
| 173 | ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); |
| 174 | |
| 175 | insert_it = range.second; |
| 176 | } |
| 177 | |
| 178 | columns.get<0>().insert(insert_it, std::move(column)); |
| 179 | } |
| 180 | |
| 181 | void ColumnsDescription::remove(const String & column_name) |
| 182 | { |
| 183 | auto range = getNameRange(columns, column_name); |
| 184 | if (range.first == range.second) |
| 185 | throw Exception("There is no column " + column_name + " in table." , |
| 186 | ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); |
| 187 | |
| 188 | for (auto list_it = range.first; list_it != range.second;) |
| 189 | list_it = columns.get<0>().erase(list_it); |
| 190 | } |
| 191 | |
| 192 | |
| 193 | void ColumnsDescription::flattenNested() |
| 194 | { |
| 195 | for (auto it = columns.begin(); it != columns.end();) |
| 196 | { |
| 197 | const auto * type_arr = typeid_cast<const DataTypeArray *>(it->type.get()); |
| 198 | if (!type_arr) |
| 199 | { |
| 200 | ++it; |
| 201 | continue; |
| 202 | } |
| 203 | |
| 204 | const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_arr->getNestedType().get()); |
| 205 | if (!type_tuple) |
| 206 | { |
| 207 | ++it; |
| 208 | continue; |
| 209 | } |
| 210 | |
| 211 | ColumnDescription column = std::move(*it); |
| 212 | it = columns.get<0>().erase(it); |
| 213 | |
| 214 | const DataTypes & elements = type_tuple->getElements(); |
| 215 | const Strings & names = type_tuple->getElementNames(); |
| 216 | size_t tuple_size = elements.size(); |
| 217 | |
| 218 | for (size_t i = 0; i < tuple_size; ++i) |
| 219 | { |
| 220 | auto nested_column = column; |
| 221 | /// TODO: what to do with default expressions? |
| 222 | nested_column.name = Nested::concatenateName(column.name, names[i]); |
| 223 | nested_column.type = std::make_shared<DataTypeArray>(elements[i]); |
| 224 | |
| 225 | columns.get<0>().insert(it, std::move(nested_column)); |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | |
| 231 | NamesAndTypesList ColumnsDescription::getOrdinary() const |
| 232 | { |
| 233 | NamesAndTypesList ret; |
| 234 | for (const auto & col : columns) |
| 235 | if (col.default_desc.kind == ColumnDefaultKind::Default && !col.is_virtual) |
| 236 | ret.emplace_back(col.name, col.type); |
| 237 | return ret; |
| 238 | } |
| 239 | |
| 240 | NamesAndTypesList ColumnsDescription::getMaterialized() const |
| 241 | { |
| 242 | NamesAndTypesList ret; |
| 243 | for (const auto & col : columns) |
| 244 | if (col.default_desc.kind == ColumnDefaultKind::Materialized) |
| 245 | ret.emplace_back(col.name, col.type); |
| 246 | return ret; |
| 247 | } |
| 248 | |
| 249 | NamesAndTypesList ColumnsDescription::getAliases() const |
| 250 | { |
| 251 | NamesAndTypesList ret; |
| 252 | for (const auto & col : columns) |
| 253 | if (col.default_desc.kind == ColumnDefaultKind::Alias) |
| 254 | ret.emplace_back(col.name, col.type); |
| 255 | return ret; |
| 256 | } |
| 257 | |
| 258 | NamesAndTypesList ColumnsDescription::getVirtuals() const |
| 259 | { |
| 260 | NamesAndTypesList result; |
| 261 | for (const auto & column : columns) |
| 262 | if (column.is_virtual) |
| 263 | result.emplace_back(column.name, column.type); |
| 264 | return result; |
| 265 | } |
| 266 | |
| 267 | NamesAndTypesList ColumnsDescription::getAll() const |
| 268 | { |
| 269 | NamesAndTypesList ret; |
| 270 | for (const auto & col : columns) |
| 271 | ret.emplace_back(col.name, col.type); |
| 272 | return ret; |
| 273 | } |
| 274 | |
| 275 | |
| 276 | bool ColumnsDescription::has(const String & column_name) const |
| 277 | { |
| 278 | return columns.get<1>().find(column_name) != columns.get<1>().end(); |
| 279 | } |
| 280 | |
| 281 | bool ColumnsDescription::hasNested(const String & column_name) const |
| 282 | { |
| 283 | auto range = getNameRange(columns, column_name); |
| 284 | return range.first != range.second && range.first->name.length() > column_name.length(); |
| 285 | } |
| 286 | |
| 287 | const ColumnDescription & ColumnsDescription::get(const String & column_name) const |
| 288 | { |
| 289 | auto it = columns.get<1>().find(column_name); |
| 290 | if (it == columns.get<1>().end()) |
| 291 | throw Exception("There is no column " + column_name + " in table." , |
| 292 | ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); |
| 293 | |
| 294 | return *it; |
| 295 | } |
| 296 | |
| 297 | |
| 298 | NamesAndTypesList ColumnsDescription::getAllPhysical() const |
| 299 | { |
| 300 | NamesAndTypesList ret; |
| 301 | for (const auto & col : columns) |
| 302 | if (col.default_desc.kind != ColumnDefaultKind::Alias && !col.is_virtual) |
| 303 | ret.emplace_back(col.name, col.type); |
| 304 | return ret; |
| 305 | } |
| 306 | |
| 307 | Names ColumnsDescription::getNamesOfPhysical() const |
| 308 | { |
| 309 | Names ret; |
| 310 | for (const auto & col : columns) |
| 311 | if (col.default_desc.kind != ColumnDefaultKind::Alias && !col.is_virtual) |
| 312 | ret.emplace_back(col.name); |
| 313 | return ret; |
| 314 | } |
| 315 | |
| 316 | NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) const |
| 317 | { |
| 318 | auto it = columns.get<1>().find(column_name); |
| 319 | if (it == columns.get<1>().end() || it->default_desc.kind == ColumnDefaultKind::Alias || it->is_virtual) |
| 320 | throw Exception("There is no physical column " + column_name + " in table." , ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); |
| 321 | return NameAndTypePair(it->name, it->type); |
| 322 | } |
| 323 | |
| 324 | bool ColumnsDescription::hasPhysical(const String & column_name) const |
| 325 | { |
| 326 | auto it = columns.get<1>().find(column_name); |
| 327 | return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias && !it->is_virtual; |
| 328 | } |
| 329 | |
| 330 | |
| 331 | ColumnDefaults ColumnsDescription::getDefaults() const |
| 332 | { |
| 333 | ColumnDefaults ret; |
| 334 | for (const auto & column : columns) |
| 335 | if (column.default_desc.expression) |
| 336 | ret.emplace(column.name, column.default_desc); |
| 337 | |
| 338 | return ret; |
| 339 | } |
| 340 | |
| 341 | bool ColumnsDescription::hasDefault(const String & column_name) const |
| 342 | { |
| 343 | auto it = columns.get<1>().find(column_name); |
| 344 | return it != columns.get<1>().end() && it->default_desc.expression; |
| 345 | } |
| 346 | |
| 347 | std::optional<ColumnDefault> ColumnsDescription::getDefault(const String & column_name) const |
| 348 | { |
| 349 | auto it = columns.get<1>().find(column_name); |
| 350 | if (it != columns.get<1>().end() && it->default_desc.expression) |
| 351 | return it->default_desc; |
| 352 | |
| 353 | return {}; |
| 354 | } |
| 355 | |
| 356 | |
| 357 | CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const |
| 358 | { |
| 359 | const auto it = columns.get<1>().find(column_name); |
| 360 | |
| 361 | if (it == columns.get<1>().end() || !it->codec) |
| 362 | return default_codec; |
| 363 | |
| 364 | return it->codec; |
| 365 | } |
| 366 | |
| 367 | CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name) const |
| 368 | { |
| 369 | return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec()); |
| 370 | } |
| 371 | |
| 372 | ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const |
| 373 | { |
| 374 | ColumnTTLs ret; |
| 375 | for (const auto & column : columns) |
| 376 | if (column.ttl) |
| 377 | ret.emplace(column.name, column.ttl); |
| 378 | return ret; |
| 379 | } |
| 380 | |
| 381 | |
| 382 | String ColumnsDescription::toString() const |
| 383 | { |
| 384 | WriteBufferFromOwnString buf; |
| 385 | |
| 386 | writeCString("columns format version: 1\n" , buf); |
| 387 | DB::writeText(columns.size(), buf); |
| 388 | writeCString(" columns:\n" , buf); |
| 389 | |
| 390 | for (const ColumnDescription & column : columns) |
| 391 | column.writeText(buf); |
| 392 | |
| 393 | return buf.str(); |
| 394 | } |
| 395 | |
| 396 | ColumnsDescription ColumnsDescription::parse(const String & str) |
| 397 | { |
| 398 | ReadBufferFromString buf{str}; |
| 399 | |
| 400 | assertString("columns format version: 1\n" , buf); |
| 401 | size_t count{}; |
| 402 | readText(count, buf); |
| 403 | assertString(" columns:\n" , buf); |
| 404 | |
| 405 | ColumnsDescription result; |
| 406 | for (size_t i = 0; i < count; ++i) |
| 407 | { |
| 408 | ColumnDescription column; |
| 409 | column.readText(buf); |
| 410 | buf.ignore(1); /// ignore new line |
| 411 | result.add(std::move(column)); |
| 412 | } |
| 413 | |
| 414 | assertEOF(buf); |
| 415 | return result; |
| 416 | } |
| 417 | |
| 418 | } |
| 419 | |