1#include <DataTypes/DataTypeNullable.h>
2#include <DataTypes/DataTypeNothing.h>
3#include <DataTypes/DataTypesNumber.h>
4#include <DataTypes/DataTypeFactory.h>
5#include <Columns/ColumnNullable.h>
6#include <Core/Field.h>
7#include <IO/ReadBuffer.h>
8#include <IO/ReadBufferFromMemory.h>
9#include <IO/ReadHelpers.h>
10#include <IO/WriteBuffer.h>
11#include <IO/WriteHelpers.h>
12#include <IO/ConcatReadBuffer.h>
13#include <Parsers/IAST.h>
14#include <Common/typeid_cast.h>
15#include <Common/assert_cast.h>
16
17
18namespace DB
19{
20
21namespace ErrorCodes
22{
23 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
24 extern const int ILLEGAL_TYPE_OF_ARGUMENT;
25}
26
27
28DataTypeNullable::DataTypeNullable(const DataTypePtr & nested_data_type_)
29 : nested_data_type{nested_data_type_}
30{
31 if (!nested_data_type->canBeInsideNullable())
32 throw Exception("Nested type " + nested_data_type->getName() + " cannot be inside Nullable type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
33}
34
35
36bool DataTypeNullable::onlyNull() const
37{
38 return typeid_cast<const DataTypeNothing *>(nested_data_type.get());
39}
40
41
42void DataTypeNullable::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
43{
44 path.push_back(Substream::NullMap);
45 callback(path);
46 path.back() = Substream::NullableElements;
47 nested_data_type->enumerateStreams(callback, path);
48 path.pop_back();
49}
50
51
52void DataTypeNullable::serializeBinaryBulkStatePrefix(
53 SerializeBinaryBulkSettings & settings,
54 SerializeBinaryBulkStatePtr & state) const
55{
56 settings.path.push_back(Substream::NullableElements);
57 nested_data_type->serializeBinaryBulkStatePrefix(settings, state);
58 settings.path.pop_back();
59}
60
61
62void DataTypeNullable::serializeBinaryBulkStateSuffix(
63 SerializeBinaryBulkSettings & settings,
64 SerializeBinaryBulkStatePtr & state) const
65{
66 settings.path.push_back(Substream::NullableElements);
67 nested_data_type->serializeBinaryBulkStateSuffix(settings, state);
68 settings.path.pop_back();
69}
70
71
72void DataTypeNullable::deserializeBinaryBulkStatePrefix(
73 DeserializeBinaryBulkSettings & settings,
74 DeserializeBinaryBulkStatePtr & state) const
75{
76 settings.path.push_back(Substream::NullableElements);
77 nested_data_type->deserializeBinaryBulkStatePrefix(settings, state);
78 settings.path.pop_back();
79}
80
81
82void DataTypeNullable::serializeBinaryBulkWithMultipleStreams(
83 const IColumn & column,
84 size_t offset,
85 size_t limit,
86 SerializeBinaryBulkSettings & settings,
87 SerializeBinaryBulkStatePtr & state) const
88{
89 const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
90 col.checkConsistency();
91
92 /// First serialize null map.
93 settings.path.push_back(Substream::NullMap);
94 if (auto stream = settings.getter(settings.path))
95 DataTypeUInt8().serializeBinaryBulk(col.getNullMapColumn(), *stream, offset, limit);
96
97 /// Then serialize contents of arrays.
98 settings.path.back() = Substream::NullableElements;
99 nested_data_type->serializeBinaryBulkWithMultipleStreams(col.getNestedColumn(), offset, limit, settings, state);
100 settings.path.pop_back();
101}
102
103
104void DataTypeNullable::deserializeBinaryBulkWithMultipleStreams(
105 IColumn & column,
106 size_t limit,
107 DeserializeBinaryBulkSettings & settings,
108 DeserializeBinaryBulkStatePtr & state) const
109{
110 ColumnNullable & col = assert_cast<ColumnNullable &>(column);
111
112 settings.path.push_back(Substream::NullMap);
113 if (auto stream = settings.getter(settings.path))
114 DataTypeUInt8().deserializeBinaryBulk(col.getNullMapColumn(), *stream, limit, 0);
115
116 settings.path.back() = Substream::NullableElements;
117 nested_data_type->deserializeBinaryBulkWithMultipleStreams(col.getNestedColumn(), limit, settings, state);
118 settings.path.pop_back();
119}
120
121
122void DataTypeNullable::serializeBinary(const Field & field, WriteBuffer & ostr) const
123{
124 if (field.isNull())
125 {
126 writeBinary(true, ostr);
127 }
128 else
129 {
130 writeBinary(false, ostr);
131 nested_data_type->serializeBinary(field, ostr);
132 }
133}
134
135void DataTypeNullable::deserializeBinary(Field & field, ReadBuffer & istr) const
136{
137 bool is_null = false;
138 readBinary(is_null, istr);
139 if (!is_null)
140 {
141 nested_data_type->deserializeBinary(field, istr);
142 }
143 else
144 {
145 field = Null();
146 }
147}
148
149void DataTypeNullable::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
150{
151 const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
152
153 bool is_null = col.isNullAt(row_num);
154 writeBinary(is_null, ostr);
155 if (!is_null)
156 nested_data_type->serializeBinary(col.getNestedColumn(), row_num, ostr);
157}
158
159/// Deserialize value into ColumnNullable.
160/// We need to insert both to nested column and to null byte map, or, in case of exception, to not insert at all.
161template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, typename std::enable_if_t<std::is_same_v<ReturnType, void>, ReturnType>* = nullptr>
162static ReturnType safeDeserialize(
163 IColumn & column, const IDataType & /*nested_data_type*/,
164 CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
165{
166 ColumnNullable & col = assert_cast<ColumnNullable &>(column);
167
168 if (check_for_null())
169 {
170 col.insertDefault();
171 }
172 else
173 {
174 deserialize_nested(col.getNestedColumn());
175
176 try
177 {
178 col.getNullMapData().push_back(0);
179 }
180 catch (...)
181 {
182 col.getNestedColumn().popBack(1);
183 throw;
184 }
185 }
186}
187
188/// Deserialize value into non-nullable column. In case of NULL, insert default value and return false.
189template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, typename std::enable_if_t<std::is_same_v<ReturnType, bool>, ReturnType>* = nullptr>
190static ReturnType safeDeserialize(
191 IColumn & column, const IDataType & nested_data_type,
192 CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
193{
194 assert(!dynamic_cast<ColumnNullable *>(&column));
195 assert(!dynamic_cast<const DataTypeNullable *>(&nested_data_type));
196 bool insert_default = check_for_null();
197 if (insert_default)
198 nested_data_type.insertDefaultInto(column);
199 else
200 deserialize_nested(column);
201 return !insert_default;
202}
203
204
205void DataTypeNullable::deserializeBinary(IColumn & column, ReadBuffer & istr) const
206{
207 safeDeserialize(column, *nested_data_type,
208 [&istr] { bool is_null = 0; readBinary(is_null, istr); return is_null; },
209 [this, &istr] (IColumn & nested) { nested_data_type->deserializeBinary(nested, istr); });
210}
211
212
213void DataTypeNullable::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
214{
215 const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
216
217 if (col.isNullAt(row_num))
218 writeCString("\\N", ostr);
219 else
220 nested_data_type->serializeAsTextEscaped(col.getNestedColumn(), row_num, ostr, settings);
221}
222
223
224void DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
225{
226 deserializeTextEscaped<void>(column, istr, settings, nested_data_type);
227}
228
229template<typename ReturnType>
230ReturnType DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
231 const DataTypePtr & nested_data_type)
232{
233 /// Little tricky, because we cannot discriminate null from first character.
234
235 if (istr.eof())
236 throw Exception("Unexpected end of stream, while parsing value of Nullable type", ErrorCodes::CANNOT_READ_ALL_DATA);
237
238 /// This is not null, surely.
239 if (*istr.position() != '\\')
240 {
241 return safeDeserialize<ReturnType>(column, *nested_data_type,
242 [] { return false; },
243 [&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextEscaped(nested, istr, settings); });
244 }
245 else
246 {
247 /// Now we know, that data in buffer starts with backslash.
248 ++istr.position();
249
250 if (istr.eof())
251 throw Exception("Unexpected end of stream, while parsing value of Nullable type, after backslash", ErrorCodes::CANNOT_READ_ALL_DATA);
252
253 return safeDeserialize<ReturnType>(column, *nested_data_type,
254 [&istr]
255 {
256 if (*istr.position() == 'N')
257 {
258 ++istr.position();
259 return true;
260 }
261 return false;
262 },
263 [&nested_data_type, &istr, &settings] (IColumn & nested)
264 {
265 if (istr.position() != istr.buffer().begin())
266 {
267 /// We could step back to consume backslash again.
268 --istr.position();
269 nested_data_type->deserializeAsTextEscaped(nested, istr, settings);
270 }
271 else
272 {
273 /// Otherwise, we need to place backslash back in front of istr.
274 ReadBufferFromMemory prefix("\\", 1);
275 ConcatReadBuffer prepended_istr(prefix, istr);
276
277 nested_data_type->deserializeAsTextEscaped(nested, prepended_istr, settings);
278
279 /// Synchronise cursor position in original buffer.
280
281 if (prepended_istr.count() > 1)
282 istr.position() = prepended_istr.position();
283 }
284 });
285 }
286}
287
288void DataTypeNullable::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
289{
290 const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
291
292 if (col.isNullAt(row_num))
293 writeCString("NULL", ostr);
294 else
295 nested_data_type->serializeAsTextQuoted(col.getNestedColumn(), row_num, ostr, settings);
296}
297
298
299void DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
300{
301 deserializeTextQuoted<void>(column, istr, settings, nested_data_type);
302}
303
304template<typename ReturnType>
305ReturnType DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
306 const DataTypePtr & nested_data_type)
307{
308 return safeDeserialize<ReturnType>(column, *nested_data_type,
309 [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); },
310 [&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextQuoted(nested, istr, settings); });
311}
312
313
314void DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
315{
316 safeDeserialize(column, *nested_data_type,
317 [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); },
318 [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); });
319}
320
321
322void DataTypeNullable::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
323{
324 const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
325
326 if (col.isNullAt(row_num))
327 writeCString("\\N", ostr);
328 else
329 nested_data_type->serializeAsTextCSV(col.getNestedColumn(), row_num, ostr, settings);
330}
331
332void DataTypeNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
333{
334 deserializeTextCSV<void>(column, istr, settings, nested_data_type);
335}
336
337template<typename ReturnType>
338ReturnType DataTypeNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
339 const DataTypePtr & nested_data_type)
340{
341 constexpr char const * null_literal = "NULL";
342 constexpr size_t len = 4;
343 size_t null_prefix_len = 0;
344
345 auto check_for_null = [&istr, &settings, &null_prefix_len]
346 {
347 if (checkStringByFirstCharacterAndAssertTheRest("\\N", istr))
348 return true;
349 if (!settings.csv.unquoted_null_literal_as_null)
350 return false;
351
352 /// Check for unquoted NULL
353 while (!istr.eof() && null_prefix_len < len && null_literal[null_prefix_len] == *istr.position())
354 {
355 ++null_prefix_len;
356 ++istr.position();
357 }
358 if (null_prefix_len == len)
359 return true;
360
361 /// Value and "NULL" have common prefix, but value is not "NULL".
362 /// Restore previous buffer position if possible.
363 if (null_prefix_len <= istr.offset())
364 {
365 istr.position() -= null_prefix_len;
366 null_prefix_len = 0;
367 }
368 return false;
369 };
370
371 auto deserialize_nested = [&nested_data_type, &settings, &istr, &null_prefix_len] (IColumn & nested)
372 {
373 if (likely(!null_prefix_len))
374 nested_data_type->deserializeAsTextCSV(nested, istr, settings);
375 else
376 {
377 /// Previous buffer position was not restored,
378 /// so we need to prepend extracted characters (rare case)
379 ReadBufferFromMemory prepend(null_literal, null_prefix_len);
380 ConcatReadBuffer buf(prepend, istr);
381 nested_data_type->deserializeAsTextCSV(nested, buf, settings);
382
383 /// Check if all extracted characters were read by nested parser and update buffer position
384 if (null_prefix_len < buf.count())
385 istr.position() = buf.position();
386 else if (null_prefix_len > buf.count())
387 {
388 /// It can happen only if there is an unquoted string instead of a number
389 /// or if someone uses 'U' or 'L' as delimiter in CSV.
390 /// In the first case we cannot continue reading anyway. The second case seems to be unlikely.
391 if (settings.csv.delimiter == 'U' || settings.csv.delimiter == 'L')
392 throw DB::Exception("Enabled setting input_format_csv_unquoted_null_literal_as_null may not work correctly "
393 "with format_csv_delimiter = 'U' or 'L' for large input.", ErrorCodes::CANNOT_READ_ALL_DATA);
394 WriteBufferFromOwnString parsed_value;
395 nested_data_type->serializeAsTextCSV(nested, nested.size() - 1, parsed_value, settings);
396 throw DB::Exception("Error while parsing \"" + std::string(null_literal, null_prefix_len)
397 + std::string(istr.position(), std::min(size_t{10}, istr.available())) + "\" as Nullable(" + nested_data_type->getName()
398 + ") at position " + std::to_string(istr.count()) + ": expected \"NULL\" or " + nested_data_type->getName()
399 + ", got \"" + std::string(null_literal, buf.count()) + "\", which was deserialized as \""
400 + parsed_value.str() + "\". It seems that input data is ill-formatted.",
401 ErrorCodes::CANNOT_READ_ALL_DATA);
402 }
403 }
404 };
405
406 return safeDeserialize<ReturnType>(column, *nested_data_type, check_for_null, deserialize_nested);
407}
408
409void DataTypeNullable::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
410{
411 const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
412
413 /// In simple text format (like 'Pretty' format) (these formats are suitable only for output and cannot be parsed back),
414 /// data is printed without escaping.
415 /// It makes theoretically impossible to distinguish between NULL and some string value, regardless on how do we print NULL.
416 /// For this reason, we output NULL in a bit strange way.
417 /// This assumes UTF-8 and proper font support. This is Ok, because Pretty formats are "presentational", not for data exchange.
418
419 if (col.isNullAt(row_num))
420 writeCString("ᴺᵁᴸᴸ", ostr);
421 else
422 nested_data_type->serializeAsText(col.getNestedColumn(), row_num, ostr, settings);
423}
424
425void DataTypeNullable::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
426{
427 const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
428
429 if (col.isNullAt(row_num))
430 writeCString("null", ostr);
431 else
432 nested_data_type->serializeAsTextJSON(col.getNestedColumn(), row_num, ostr, settings);
433}
434
435void DataTypeNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
436{
437 deserializeTextJSON<void>(column, istr, settings, nested_data_type);
438}
439
440template<typename ReturnType>
441ReturnType DataTypeNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
442 const DataTypePtr & nested_data_type)
443{
444 return safeDeserialize<ReturnType>(column, *nested_data_type,
445 [&istr] { return checkStringByFirstCharacterAndAssertTheRest("null", istr); },
446 [&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextJSON(nested, istr, settings); });
447}
448
449void DataTypeNullable::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
450{
451 const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
452
453 if (col.isNullAt(row_num))
454 writeCString("\\N", ostr);
455 else
456 nested_data_type->serializeAsTextXML(col.getNestedColumn(), row_num, ostr, settings);
457}
458
459void DataTypeNullable::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
460{
461 const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
462 if (!col.isNullAt(row_num))
463 nested_data_type->serializeProtobuf(col.getNestedColumn(), row_num, protobuf, value_index);
464}
465
466void DataTypeNullable::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
467{
468 ColumnNullable & col = assert_cast<ColumnNullable &>(column);
469 IColumn & nested_column = col.getNestedColumn();
470 size_t old_size = nested_column.size();
471 try
472 {
473 nested_data_type->deserializeProtobuf(nested_column, protobuf, allow_add_row, row_added);
474 if (row_added)
475 col.getNullMapData().push_back(0);
476 }
477 catch (...)
478 {
479 nested_column.popBack(nested_column.size() - old_size);
480 col.getNullMapData().resize_assume_reserved(old_size);
481 row_added = false;
482 throw;
483 }
484}
485
486MutableColumnPtr DataTypeNullable::createColumn() const
487{
488 return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create());
489}
490
491Field DataTypeNullable::getDefault() const
492{
493 return Null();
494}
495
496size_t DataTypeNullable::getSizeOfValueInMemory() const
497{
498 throw Exception("Value of type " + getName() + " in memory is not of fixed size.", ErrorCodes::LOGICAL_ERROR);
499}
500
501
502bool DataTypeNullable::equals(const IDataType & rhs) const
503{
504 return rhs.isNullable() && nested_data_type->equals(*static_cast<const DataTypeNullable &>(rhs).nested_data_type);
505}
506
507
508static DataTypePtr create(const String & /*type_name*/, const ASTPtr & arguments)
509{
510 if (!arguments || arguments->children.size() != 1)
511 throw Exception("Nullable data type family must have exactly one argument - nested type", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
512
513 DataTypePtr nested_type = DataTypeFactory::instance().get(arguments->children[0]);
514
515 return std::make_shared<DataTypeNullable>(nested_type);
516}
517
518
519void registerDataTypeNullable(DataTypeFactory & factory)
520{
521 factory.registerDataType("Nullable", create);
522}
523
524
525DataTypePtr makeNullable(const DataTypePtr & type)
526{
527 if (type->isNullable())
528 return type;
529 return std::make_shared<DataTypeNullable>(type);
530}
531
532DataTypePtr removeNullable(const DataTypePtr & type)
533{
534 if (type->isNullable())
535 return static_cast<const DataTypeNullable &>(*type).getNestedType();
536 return type;
537}
538
539
540template bool DataTypeNullable::deserializeTextEscaped<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
541template bool DataTypeNullable::deserializeTextQuoted<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested);
542template bool DataTypeNullable::deserializeTextCSV<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
543template bool DataTypeNullable::deserializeTextJSON<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested);
544
545}
546