| 1 | #include <Core/Defines.h> |
| 2 | #include <Core/Block.h> |
| 3 | |
| 4 | #include <IO/WriteHelpers.h> |
| 5 | #include <IO/VarInt.h> |
| 6 | #include <Compression/CompressedWriteBuffer.h> |
| 7 | |
| 8 | #include <DataStreams/MarkInCompressedFile.h> |
| 9 | #include <DataStreams/NativeBlockOutputStream.h> |
| 10 | |
| 11 | #include <Common/typeid_cast.h> |
| 12 | #include <DataTypes/DataTypeLowCardinality.h> |
| 13 | |
| 14 | namespace DB |
| 15 | { |
| 16 | |
| 17 | namespace ErrorCodes |
| 18 | { |
| 19 | extern const int LOGICAL_ERROR; |
| 20 | } |
| 21 | |
| 22 | |
| 23 | NativeBlockOutputStream::NativeBlockOutputStream( |
| 24 | WriteBuffer & ostr_, UInt64 client_revision_, const Block & , bool remove_low_cardinality_, |
| 25 | WriteBuffer * index_ostr_, size_t initial_size_of_file_) |
| 26 | : ostr(ostr_), client_revision(client_revision_), header(header_), |
| 27 | index_ostr(index_ostr_), initial_size_of_file(initial_size_of_file_), remove_low_cardinality(remove_low_cardinality_) |
| 28 | { |
| 29 | if (index_ostr) |
| 30 | { |
| 31 | ostr_concrete = typeid_cast<CompressedWriteBuffer *>(&ostr); |
| 32 | if (!ostr_concrete) |
| 33 | throw Exception("When need to write index for NativeBlockOutputStream, ostr must be CompressedWriteBuffer." , ErrorCodes::LOGICAL_ERROR); |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | |
| 38 | void NativeBlockOutputStream::flush() |
| 39 | { |
| 40 | ostr.next(); |
| 41 | } |
| 42 | |
| 43 | |
| 44 | void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit) |
| 45 | { |
| 46 | /** If there are columns-constants - then we materialize them. |
| 47 | * (Since the data type does not know how to serialize / deserialize constants.) |
| 48 | */ |
| 49 | ColumnPtr full_column = column->convertToFullColumnIfConst(); |
| 50 | |
| 51 | IDataType::SerializeBinaryBulkSettings settings; |
| 52 | settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; }; |
| 53 | settings.position_independent_encoding = false; |
| 54 | settings.low_cardinality_max_dictionary_size = 0; |
| 55 | |
| 56 | IDataType::SerializeBinaryBulkStatePtr state; |
| 57 | type.serializeBinaryBulkStatePrefix(settings, state); |
| 58 | type.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state); |
| 59 | type.serializeBinaryBulkStateSuffix(settings, state); |
| 60 | } |
| 61 | |
| 62 | |
| 63 | void NativeBlockOutputStream::write(const Block & block) |
| 64 | { |
| 65 | /// Additional information about the block. |
| 66 | if (client_revision > 0) |
| 67 | block.info.write(ostr); |
| 68 | |
| 69 | block.checkNumberOfRows(); |
| 70 | |
| 71 | /// Dimensions |
| 72 | size_t columns = block.columns(); |
| 73 | size_t rows = block.rows(); |
| 74 | |
| 75 | writeVarUInt(columns, ostr); |
| 76 | writeVarUInt(rows, ostr); |
| 77 | |
| 78 | /** The index has the same structure as the data stream. |
| 79 | * But instead of column values, it contains a mark that points to the location in the data file where this part of the column is located. |
| 80 | */ |
| 81 | if (index_ostr) |
| 82 | { |
| 83 | writeVarUInt(columns, *index_ostr); |
| 84 | writeVarUInt(rows, *index_ostr); |
| 85 | } |
| 86 | |
| 87 | for (size_t i = 0; i < columns; ++i) |
| 88 | { |
| 89 | /// For the index. |
| 90 | MarkInCompressedFile mark; |
| 91 | |
| 92 | if (index_ostr) |
| 93 | { |
| 94 | ostr_concrete->next(); /// Finish compressed block. |
| 95 | mark.offset_in_compressed_file = initial_size_of_file + ostr_concrete->getCompressedBytes(); |
| 96 | mark.offset_in_decompressed_block = ostr_concrete->getRemainingBytes(); |
| 97 | } |
| 98 | |
| 99 | ColumnWithTypeAndName column = block.safeGetByPosition(i); |
| 100 | |
| 101 | /// Send data to old clients without low cardinality type. |
| 102 | if (remove_low_cardinality || (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE)) |
| 103 | { |
| 104 | column.column = recursiveRemoveLowCardinality(column.column); |
| 105 | column.type = recursiveRemoveLowCardinality(column.type); |
| 106 | } |
| 107 | |
| 108 | /// Name |
| 109 | writeStringBinary(column.name, ostr); |
| 110 | |
| 111 | /// Type |
| 112 | String type_name = column.type->getName(); |
| 113 | |
| 114 | /// For compatibility, we will not send explicit timezone parameter in DateTime data type |
| 115 | /// to older clients, that cannot understand it. |
| 116 | if (client_revision < DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE |
| 117 | && startsWith(type_name, "DateTime(" )) |
| 118 | type_name = "DateTime" ; |
| 119 | |
| 120 | writeStringBinary(type_name, ostr); |
| 121 | |
| 122 | /// Data |
| 123 | if (rows) /// Zero items of data is always represented as zero number of bytes. |
| 124 | writeData(*column.type, column.column, ostr, 0, 0); |
| 125 | |
| 126 | if (index_ostr) |
| 127 | { |
| 128 | writeStringBinary(column.name, *index_ostr); |
| 129 | writeStringBinary(column.type->getName(), *index_ostr); |
| 130 | |
| 131 | writeBinary(mark.offset_in_compressed_file, *index_ostr); |
| 132 | writeBinary(mark.offset_in_decompressed_block, *index_ostr); |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | } |
| 138 | |