1#include <Columns/IColumn.h>
2#include <Columns/ColumnConst.h>
3
4#include <Common/Exception.h>
5#include <Common/escapeForFileName.h>
6
7#include <Core/Defines.h>
8
9#include <IO/WriteHelpers.h>
10
11#include <DataTypes/IDataType.h>
12#include <DataTypes/DataTypeCustom.h>
13#include <DataTypes/NestedUtils.h>
14
15
16namespace DB
17{
18
19namespace ErrorCodes
20{
21 extern const int MULTIPLE_STREAMS_REQUIRED;
22 extern const int LOGICAL_ERROR;
23 extern const int DATA_TYPE_CANNOT_BE_PROMOTED;
24}
25
26IDataType::IDataType() : custom_name(nullptr), custom_text_serialization(nullptr)
27{
28}
29
30IDataType::~IDataType()
31{
32}
33
34String IDataType::getName() const
35{
36 if (custom_name)
37 {
38 return custom_name->getName();
39 }
40 else
41 {
42 return doGetName();
43 }
44}
45
46String IDataType::doGetName() const
47{
48 return getFamilyName();
49}
50
51void IDataType::updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint)
52{
53 /// Update the average value size hint if amount of read rows isn't too small
54 size_t column_size = column.size();
55 if (column_size > 10)
56 {
57 double current_avg_value_size = static_cast<double>(column.byteSize()) / column_size;
58
59 /// Heuristic is chosen so that avg_value_size_hint increases rapidly but decreases slowly.
60 if (current_avg_value_size > avg_value_size_hint)
61 avg_value_size_hint = std::min(1024., current_avg_value_size); /// avoid overestimation
62 else if (current_avg_value_size * 2 < avg_value_size_hint)
63 avg_value_size_hint = (current_avg_value_size + avg_value_size_hint * 3) / 4;
64 }
65}
66
67ColumnPtr IDataType::createColumnConst(size_t size, const Field & field) const
68{
69 auto column = createColumn();
70 column->insert(field);
71 return ColumnConst::create(std::move(column), size);
72}
73
74
75ColumnPtr IDataType::createColumnConstWithDefaultValue(size_t size) const
76{
77 return createColumnConst(size, getDefault());
78}
79
80DataTypePtr IDataType::promoteNumericType() const
81{
82 throw Exception("Data type " + getName() + " can't be promoted.", ErrorCodes::DATA_TYPE_CANNOT_BE_PROMOTED);
83}
84
85void IDataType::serializeBinaryBulk(const IColumn &, WriteBuffer &, size_t, size_t) const
86{
87 throw Exception("Data type " + getName() + " must be serialized with multiple streams", ErrorCodes::MULTIPLE_STREAMS_REQUIRED);
88}
89
90void IDataType::deserializeBinaryBulk(IColumn &, ReadBuffer &, size_t, double) const
91{
92 throw Exception("Data type " + getName() + " must be deserialized with multiple streams", ErrorCodes::MULTIPLE_STREAMS_REQUIRED);
93}
94
95size_t IDataType::getSizeOfValueInMemory() const
96{
97 throw Exception("Value of type " + getName() + " in memory is not of fixed size.", ErrorCodes::LOGICAL_ERROR);
98}
99
100
101String IDataType::getFileNameForStream(const String & column_name, const IDataType::SubstreamPath & path)
102{
103 /// Sizes of arrays (elements of Nested type) are shared (all reside in single file).
104 String nested_table_name = Nested::extractTableName(column_name);
105
106 bool is_sizes_of_nested_type =
107 path.size() == 1 /// Nested structure may have arrays as nested elements (so effectively we have multidimensional arrays).
108 /// Sizes of arrays are shared only at first level.
109 && path[0].type == IDataType::Substream::ArraySizes
110 && nested_table_name != column_name;
111
112 size_t array_level = 0;
113 String stream_name = escapeForFileName(is_sizes_of_nested_type ? nested_table_name : column_name);
114 for (const Substream & elem : path)
115 {
116 if (elem.type == Substream::NullMap)
117 stream_name += ".null";
118 else if (elem.type == Substream::ArraySizes)
119 stream_name += ".size" + toString(array_level);
120 else if (elem.type == Substream::ArrayElements)
121 ++array_level;
122 else if (elem.type == Substream::TupleElement)
123 {
124 /// For compatibility reasons, we use %2E instead of dot.
125 /// Because nested data may be represented not by Array of Tuple,
126 /// but by separate Array columns with names in a form of a.b,
127 /// and name is encoded as a whole.
128 stream_name += "%2E" + escapeForFileName(elem.tuple_element_name);
129 }
130 else if (elem.type == Substream::DictionaryKeys)
131 stream_name += ".dict";
132 }
133 return stream_name;
134}
135
136
137void IDataType::insertDefaultInto(IColumn & column) const
138{
139 column.insertDefault();
140}
141
142void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
143{
144 if (custom_text_serialization)
145 custom_text_serialization->serializeTextEscaped(column, row_num, ostr, settings);
146 else
147 serializeTextEscaped(column, row_num, ostr, settings);
148}
149
150void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
151{
152 if (custom_text_serialization)
153 custom_text_serialization->deserializeTextEscaped(column, istr, settings);
154 else
155 deserializeTextEscaped(column, istr, settings);
156}
157
158void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
159{
160 if (custom_text_serialization)
161 custom_text_serialization->serializeTextQuoted(column, row_num, ostr, settings);
162 else
163 serializeTextQuoted(column, row_num, ostr, settings);
164}
165
166void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
167{
168 if (custom_text_serialization)
169 custom_text_serialization->deserializeTextQuoted(column, istr, settings);
170 else
171 deserializeTextQuoted(column, istr, settings);
172}
173
174void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
175{
176 if (custom_text_serialization)
177 custom_text_serialization->serializeTextCSV(column, row_num, ostr, settings);
178 else
179 serializeTextCSV(column, row_num, ostr, settings);
180}
181
182void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
183{
184 if (custom_text_serialization)
185 custom_text_serialization->deserializeTextCSV(column, istr, settings);
186 else
187 deserializeTextCSV(column, istr, settings);
188}
189
190void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
191{
192 if (custom_text_serialization)
193 custom_text_serialization->serializeText(column, row_num, ostr, settings);
194 else
195 serializeText(column, row_num, ostr, settings);
196}
197
198void IDataType::deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
199{
200 if (custom_text_serialization)
201 custom_text_serialization->deserializeWholeText(column, istr, settings);
202 else
203 deserializeWholeText(column, istr, settings);
204}
205
206void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
207{
208 if (custom_text_serialization)
209 custom_text_serialization->serializeTextJSON(column, row_num, ostr, settings);
210 else
211 serializeTextJSON(column, row_num, ostr, settings);
212}
213
214void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
215{
216 if (custom_text_serialization)
217 custom_text_serialization->deserializeTextJSON(column, istr, settings);
218 else
219 deserializeTextJSON(column, istr, settings);
220}
221
222void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
223{
224 if (custom_text_serialization)
225 custom_text_serialization->serializeTextXML(column, row_num, ostr, settings);
226 else
227 serializeTextXML(column, row_num, ostr, settings);
228}
229
230void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
231{
232 /// replace only if not null
233 if (custom_desc_->name)
234 custom_name = std::move(custom_desc_->name);
235
236 if (custom_desc_->text_serialization)
237 custom_text_serialization = std::move(custom_desc_->text_serialization);
238}
239
240}
241