| 1 | #include <Columns/IColumn.h> |
| 2 | #include <Columns/ColumnConst.h> |
| 3 | |
| 4 | #include <Common/Exception.h> |
| 5 | #include <Common/escapeForFileName.h> |
| 6 | |
| 7 | #include <Core/Defines.h> |
| 8 | |
| 9 | #include <IO/WriteHelpers.h> |
| 10 | |
| 11 | #include <DataTypes/IDataType.h> |
| 12 | #include <DataTypes/DataTypeCustom.h> |
| 13 | #include <DataTypes/NestedUtils.h> |
| 14 | |
| 15 | |
| 16 | namespace DB |
| 17 | { |
| 18 | |
| 19 | namespace ErrorCodes |
| 20 | { |
| 21 | extern const int MULTIPLE_STREAMS_REQUIRED; |
| 22 | extern const int LOGICAL_ERROR; |
| 23 | extern const int DATA_TYPE_CANNOT_BE_PROMOTED; |
| 24 | } |
| 25 | |
| 26 | IDataType::IDataType() : custom_name(nullptr), custom_text_serialization(nullptr) |
| 27 | { |
| 28 | } |
| 29 | |
| 30 | IDataType::~IDataType() |
| 31 | { |
| 32 | } |
| 33 | |
| 34 | String IDataType::getName() const |
| 35 | { |
| 36 | if (custom_name) |
| 37 | { |
| 38 | return custom_name->getName(); |
| 39 | } |
| 40 | else |
| 41 | { |
| 42 | return doGetName(); |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | String IDataType::doGetName() const |
| 47 | { |
| 48 | return getFamilyName(); |
| 49 | } |
| 50 | |
| 51 | void IDataType::updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint) |
| 52 | { |
| 53 | /// Update the average value size hint if amount of read rows isn't too small |
| 54 | size_t column_size = column.size(); |
| 55 | if (column_size > 10) |
| 56 | { |
| 57 | double current_avg_value_size = static_cast<double>(column.byteSize()) / column_size; |
| 58 | |
| 59 | /// Heuristic is chosen so that avg_value_size_hint increases rapidly but decreases slowly. |
| 60 | if (current_avg_value_size > avg_value_size_hint) |
| 61 | avg_value_size_hint = std::min(1024., current_avg_value_size); /// avoid overestimation |
| 62 | else if (current_avg_value_size * 2 < avg_value_size_hint) |
| 63 | avg_value_size_hint = (current_avg_value_size + avg_value_size_hint * 3) / 4; |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | ColumnPtr IDataType::createColumnConst(size_t size, const Field & field) const |
| 68 | { |
| 69 | auto column = createColumn(); |
| 70 | column->insert(field); |
| 71 | return ColumnConst::create(std::move(column), size); |
| 72 | } |
| 73 | |
| 74 | |
| 75 | ColumnPtr IDataType::createColumnConstWithDefaultValue(size_t size) const |
| 76 | { |
| 77 | return createColumnConst(size, getDefault()); |
| 78 | } |
| 79 | |
| 80 | DataTypePtr IDataType::promoteNumericType() const |
| 81 | { |
| 82 | throw Exception("Data type " + getName() + " can't be promoted." , ErrorCodes::DATA_TYPE_CANNOT_BE_PROMOTED); |
| 83 | } |
| 84 | |
| 85 | void IDataType::serializeBinaryBulk(const IColumn &, WriteBuffer &, size_t, size_t) const |
| 86 | { |
| 87 | throw Exception("Data type " + getName() + " must be serialized with multiple streams" , ErrorCodes::MULTIPLE_STREAMS_REQUIRED); |
| 88 | } |
| 89 | |
| 90 | void IDataType::deserializeBinaryBulk(IColumn &, ReadBuffer &, size_t, double) const |
| 91 | { |
| 92 | throw Exception("Data type " + getName() + " must be deserialized with multiple streams" , ErrorCodes::MULTIPLE_STREAMS_REQUIRED); |
| 93 | } |
| 94 | |
| 95 | size_t IDataType::getSizeOfValueInMemory() const |
| 96 | { |
| 97 | throw Exception("Value of type " + getName() + " in memory is not of fixed size." , ErrorCodes::LOGICAL_ERROR); |
| 98 | } |
| 99 | |
| 100 | |
| 101 | String IDataType::getFileNameForStream(const String & column_name, const IDataType::SubstreamPath & path) |
| 102 | { |
| 103 | /// Sizes of arrays (elements of Nested type) are shared (all reside in single file). |
| 104 | String nested_table_name = Nested::extractTableName(column_name); |
| 105 | |
| 106 | bool is_sizes_of_nested_type = |
| 107 | path.size() == 1 /// Nested structure may have arrays as nested elements (so effectively we have multidimensional arrays). |
| 108 | /// Sizes of arrays are shared only at first level. |
| 109 | && path[0].type == IDataType::Substream::ArraySizes |
| 110 | && nested_table_name != column_name; |
| 111 | |
| 112 | size_t array_level = 0; |
| 113 | String stream_name = escapeForFileName(is_sizes_of_nested_type ? nested_table_name : column_name); |
| 114 | for (const Substream & elem : path) |
| 115 | { |
| 116 | if (elem.type == Substream::NullMap) |
| 117 | stream_name += ".null" ; |
| 118 | else if (elem.type == Substream::ArraySizes) |
| 119 | stream_name += ".size" + toString(array_level); |
| 120 | else if (elem.type == Substream::ArrayElements) |
| 121 | ++array_level; |
| 122 | else if (elem.type == Substream::TupleElement) |
| 123 | { |
| 124 | /// For compatibility reasons, we use %2E instead of dot. |
| 125 | /// Because nested data may be represented not by Array of Tuple, |
| 126 | /// but by separate Array columns with names in a form of a.b, |
| 127 | /// and name is encoded as a whole. |
| 128 | stream_name += "%2E" + escapeForFileName(elem.tuple_element_name); |
| 129 | } |
| 130 | else if (elem.type == Substream::DictionaryKeys) |
| 131 | stream_name += ".dict" ; |
| 132 | } |
| 133 | return stream_name; |
| 134 | } |
| 135 | |
| 136 | |
| 137 | void IDataType::insertDefaultInto(IColumn & column) const |
| 138 | { |
| 139 | column.insertDefault(); |
| 140 | } |
| 141 | |
| 142 | void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
| 143 | { |
| 144 | if (custom_text_serialization) |
| 145 | custom_text_serialization->serializeTextEscaped(column, row_num, ostr, settings); |
| 146 | else |
| 147 | serializeTextEscaped(column, row_num, ostr, settings); |
| 148 | } |
| 149 | |
| 150 | void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
| 151 | { |
| 152 | if (custom_text_serialization) |
| 153 | custom_text_serialization->deserializeTextEscaped(column, istr, settings); |
| 154 | else |
| 155 | deserializeTextEscaped(column, istr, settings); |
| 156 | } |
| 157 | |
| 158 | void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
| 159 | { |
| 160 | if (custom_text_serialization) |
| 161 | custom_text_serialization->serializeTextQuoted(column, row_num, ostr, settings); |
| 162 | else |
| 163 | serializeTextQuoted(column, row_num, ostr, settings); |
| 164 | } |
| 165 | |
| 166 | void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
| 167 | { |
| 168 | if (custom_text_serialization) |
| 169 | custom_text_serialization->deserializeTextQuoted(column, istr, settings); |
| 170 | else |
| 171 | deserializeTextQuoted(column, istr, settings); |
| 172 | } |
| 173 | |
| 174 | void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
| 175 | { |
| 176 | if (custom_text_serialization) |
| 177 | custom_text_serialization->serializeTextCSV(column, row_num, ostr, settings); |
| 178 | else |
| 179 | serializeTextCSV(column, row_num, ostr, settings); |
| 180 | } |
| 181 | |
| 182 | void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
| 183 | { |
| 184 | if (custom_text_serialization) |
| 185 | custom_text_serialization->deserializeTextCSV(column, istr, settings); |
| 186 | else |
| 187 | deserializeTextCSV(column, istr, settings); |
| 188 | } |
| 189 | |
| 190 | void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
| 191 | { |
| 192 | if (custom_text_serialization) |
| 193 | custom_text_serialization->serializeText(column, row_num, ostr, settings); |
| 194 | else |
| 195 | serializeText(column, row_num, ostr, settings); |
| 196 | } |
| 197 | |
| 198 | void IDataType::deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
| 199 | { |
| 200 | if (custom_text_serialization) |
| 201 | custom_text_serialization->deserializeWholeText(column, istr, settings); |
| 202 | else |
| 203 | deserializeWholeText(column, istr, settings); |
| 204 | } |
| 205 | |
| 206 | void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
| 207 | { |
| 208 | if (custom_text_serialization) |
| 209 | custom_text_serialization->serializeTextJSON(column, row_num, ostr, settings); |
| 210 | else |
| 211 | serializeTextJSON(column, row_num, ostr, settings); |
| 212 | } |
| 213 | |
| 214 | void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
| 215 | { |
| 216 | if (custom_text_serialization) |
| 217 | custom_text_serialization->deserializeTextJSON(column, istr, settings); |
| 218 | else |
| 219 | deserializeTextJSON(column, istr, settings); |
| 220 | } |
| 221 | |
| 222 | void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
| 223 | { |
| 224 | if (custom_text_serialization) |
| 225 | custom_text_serialization->serializeTextXML(column, row_num, ostr, settings); |
| 226 | else |
| 227 | serializeTextXML(column, row_num, ostr, settings); |
| 228 | } |
| 229 | |
| 230 | void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const |
| 231 | { |
| 232 | /// replace only if not null |
| 233 | if (custom_desc_->name) |
| 234 | custom_name = std::move(custom_desc_->name); |
| 235 | |
| 236 | if (custom_desc_->text_serialization) |
| 237 | custom_text_serialization = std::move(custom_desc_->text_serialization); |
| 238 | } |
| 239 | |
| 240 | } |
| 241 | |