1 | #include <Columns/IColumn.h> |
2 | #include <Columns/ColumnConst.h> |
3 | |
4 | #include <Common/Exception.h> |
5 | #include <Common/escapeForFileName.h> |
6 | |
7 | #include <Core/Defines.h> |
8 | |
9 | #include <IO/WriteHelpers.h> |
10 | |
11 | #include <DataTypes/IDataType.h> |
12 | #include <DataTypes/DataTypeCustom.h> |
13 | #include <DataTypes/NestedUtils.h> |
14 | |
15 | |
16 | namespace DB |
17 | { |
18 | |
19 | namespace ErrorCodes |
20 | { |
21 | extern const int MULTIPLE_STREAMS_REQUIRED; |
22 | extern const int LOGICAL_ERROR; |
23 | extern const int DATA_TYPE_CANNOT_BE_PROMOTED; |
24 | } |
25 | |
26 | IDataType::IDataType() : custom_name(nullptr), custom_text_serialization(nullptr) |
27 | { |
28 | } |
29 | |
30 | IDataType::~IDataType() |
31 | { |
32 | } |
33 | |
34 | String IDataType::getName() const |
35 | { |
36 | if (custom_name) |
37 | { |
38 | return custom_name->getName(); |
39 | } |
40 | else |
41 | { |
42 | return doGetName(); |
43 | } |
44 | } |
45 | |
46 | String IDataType::doGetName() const |
47 | { |
48 | return getFamilyName(); |
49 | } |
50 | |
51 | void IDataType::updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint) |
52 | { |
53 | /// Update the average value size hint if amount of read rows isn't too small |
54 | size_t column_size = column.size(); |
55 | if (column_size > 10) |
56 | { |
57 | double current_avg_value_size = static_cast<double>(column.byteSize()) / column_size; |
58 | |
59 | /// Heuristic is chosen so that avg_value_size_hint increases rapidly but decreases slowly. |
60 | if (current_avg_value_size > avg_value_size_hint) |
61 | avg_value_size_hint = std::min(1024., current_avg_value_size); /// avoid overestimation |
62 | else if (current_avg_value_size * 2 < avg_value_size_hint) |
63 | avg_value_size_hint = (current_avg_value_size + avg_value_size_hint * 3) / 4; |
64 | } |
65 | } |
66 | |
67 | ColumnPtr IDataType::createColumnConst(size_t size, const Field & field) const |
68 | { |
69 | auto column = createColumn(); |
70 | column->insert(field); |
71 | return ColumnConst::create(std::move(column), size); |
72 | } |
73 | |
74 | |
75 | ColumnPtr IDataType::createColumnConstWithDefaultValue(size_t size) const |
76 | { |
77 | return createColumnConst(size, getDefault()); |
78 | } |
79 | |
80 | DataTypePtr IDataType::promoteNumericType() const |
81 | { |
82 | throw Exception("Data type " + getName() + " can't be promoted." , ErrorCodes::DATA_TYPE_CANNOT_BE_PROMOTED); |
83 | } |
84 | |
85 | void IDataType::serializeBinaryBulk(const IColumn &, WriteBuffer &, size_t, size_t) const |
86 | { |
87 | throw Exception("Data type " + getName() + " must be serialized with multiple streams" , ErrorCodes::MULTIPLE_STREAMS_REQUIRED); |
88 | } |
89 | |
90 | void IDataType::deserializeBinaryBulk(IColumn &, ReadBuffer &, size_t, double) const |
91 | { |
92 | throw Exception("Data type " + getName() + " must be deserialized with multiple streams" , ErrorCodes::MULTIPLE_STREAMS_REQUIRED); |
93 | } |
94 | |
95 | size_t IDataType::getSizeOfValueInMemory() const |
96 | { |
97 | throw Exception("Value of type " + getName() + " in memory is not of fixed size." , ErrorCodes::LOGICAL_ERROR); |
98 | } |
99 | |
100 | |
101 | String IDataType::getFileNameForStream(const String & column_name, const IDataType::SubstreamPath & path) |
102 | { |
103 | /// Sizes of arrays (elements of Nested type) are shared (all reside in single file). |
104 | String nested_table_name = Nested::extractTableName(column_name); |
105 | |
106 | bool is_sizes_of_nested_type = |
107 | path.size() == 1 /// Nested structure may have arrays as nested elements (so effectively we have multidimensional arrays). |
108 | /// Sizes of arrays are shared only at first level. |
109 | && path[0].type == IDataType::Substream::ArraySizes |
110 | && nested_table_name != column_name; |
111 | |
112 | size_t array_level = 0; |
113 | String stream_name = escapeForFileName(is_sizes_of_nested_type ? nested_table_name : column_name); |
114 | for (const Substream & elem : path) |
115 | { |
116 | if (elem.type == Substream::NullMap) |
117 | stream_name += ".null" ; |
118 | else if (elem.type == Substream::ArraySizes) |
119 | stream_name += ".size" + toString(array_level); |
120 | else if (elem.type == Substream::ArrayElements) |
121 | ++array_level; |
122 | else if (elem.type == Substream::TupleElement) |
123 | { |
124 | /// For compatibility reasons, we use %2E instead of dot. |
125 | /// Because nested data may be represented not by Array of Tuple, |
126 | /// but by separate Array columns with names in a form of a.b, |
127 | /// and name is encoded as a whole. |
128 | stream_name += "%2E" + escapeForFileName(elem.tuple_element_name); |
129 | } |
130 | else if (elem.type == Substream::DictionaryKeys) |
131 | stream_name += ".dict" ; |
132 | } |
133 | return stream_name; |
134 | } |
135 | |
136 | |
137 | void IDataType::insertDefaultInto(IColumn & column) const |
138 | { |
139 | column.insertDefault(); |
140 | } |
141 | |
142 | void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
143 | { |
144 | if (custom_text_serialization) |
145 | custom_text_serialization->serializeTextEscaped(column, row_num, ostr, settings); |
146 | else |
147 | serializeTextEscaped(column, row_num, ostr, settings); |
148 | } |
149 | |
150 | void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
151 | { |
152 | if (custom_text_serialization) |
153 | custom_text_serialization->deserializeTextEscaped(column, istr, settings); |
154 | else |
155 | deserializeTextEscaped(column, istr, settings); |
156 | } |
157 | |
158 | void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
159 | { |
160 | if (custom_text_serialization) |
161 | custom_text_serialization->serializeTextQuoted(column, row_num, ostr, settings); |
162 | else |
163 | serializeTextQuoted(column, row_num, ostr, settings); |
164 | } |
165 | |
166 | void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
167 | { |
168 | if (custom_text_serialization) |
169 | custom_text_serialization->deserializeTextQuoted(column, istr, settings); |
170 | else |
171 | deserializeTextQuoted(column, istr, settings); |
172 | } |
173 | |
174 | void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
175 | { |
176 | if (custom_text_serialization) |
177 | custom_text_serialization->serializeTextCSV(column, row_num, ostr, settings); |
178 | else |
179 | serializeTextCSV(column, row_num, ostr, settings); |
180 | } |
181 | |
182 | void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
183 | { |
184 | if (custom_text_serialization) |
185 | custom_text_serialization->deserializeTextCSV(column, istr, settings); |
186 | else |
187 | deserializeTextCSV(column, istr, settings); |
188 | } |
189 | |
190 | void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
191 | { |
192 | if (custom_text_serialization) |
193 | custom_text_serialization->serializeText(column, row_num, ostr, settings); |
194 | else |
195 | serializeText(column, row_num, ostr, settings); |
196 | } |
197 | |
198 | void IDataType::deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
199 | { |
200 | if (custom_text_serialization) |
201 | custom_text_serialization->deserializeWholeText(column, istr, settings); |
202 | else |
203 | deserializeWholeText(column, istr, settings); |
204 | } |
205 | |
206 | void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
207 | { |
208 | if (custom_text_serialization) |
209 | custom_text_serialization->serializeTextJSON(column, row_num, ostr, settings); |
210 | else |
211 | serializeTextJSON(column, row_num, ostr, settings); |
212 | } |
213 | |
214 | void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
215 | { |
216 | if (custom_text_serialization) |
217 | custom_text_serialization->deserializeTextJSON(column, istr, settings); |
218 | else |
219 | deserializeTextJSON(column, istr, settings); |
220 | } |
221 | |
222 | void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
223 | { |
224 | if (custom_text_serialization) |
225 | custom_text_serialization->serializeTextXML(column, row_num, ostr, settings); |
226 | else |
227 | serializeTextXML(column, row_num, ostr, settings); |
228 | } |
229 | |
230 | void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const |
231 | { |
232 | /// replace only if not null |
233 | if (custom_desc_->name) |
234 | custom_name = std::move(custom_desc_->name); |
235 | |
236 | if (custom_desc_->text_serialization) |
237 | custom_text_serialization = std::move(custom_desc_->text_serialization); |
238 | } |
239 | |
240 | } |
241 | |