1#pragma once
2
3#include <cmath>
4#include <cstring>
5#include <limits>
6#include <algorithm>
7#include <iterator>
8
9#include <type_traits>
10
11#include <common/DateLUT.h>
12#include <common/LocalDate.h>
13#include <common/LocalDateTime.h>
14#include <common/StringRef.h>
15#include <common/arithmeticOverflow.h>
16
17#include <Core/Types.h>
18#include <Core/DecimalFunctions.h>
19#include <Core/UUID.h>
20
21#include <Common/Exception.h>
22#include <Common/StringUtils/StringUtils.h>
23#include <Common/Arena.h>
24#include <Common/UInt128.h>
25#include <Common/intExp.h>
26
27#include <Formats/FormatSettings.h>
28
29#include <IO/CompressionMethod.h>
30#include <IO/ReadBuffer.h>
31#include <IO/ReadBufferFromMemory.h>
32#include <IO/VarInt.h>
33#include <IO/ZlibInflatingReadBuffer.h>
34
35#include <DataTypes/DataTypeDateTime.h>
36
37#ifdef __clang__
38#pragma clang diagnostic push
39#pragma clang diagnostic ignored "-Wdouble-promotion"
40#endif
41
42#include <double-conversion/double-conversion.h>
43
44#ifdef __clang__
45#pragma clang diagnostic pop
46#endif
47
48
49/// 1 GiB
50#define DEFAULT_MAX_STRING_SIZE (1ULL << 30)
51
52
53namespace DB
54{
55
56namespace ErrorCodes
57{
58 extern const int CANNOT_PARSE_DATE;
59 extern const int CANNOT_PARSE_DATETIME;
60 extern const int CANNOT_PARSE_UUID;
61 extern const int CANNOT_READ_ARRAY_FROM_TEXT;
62 extern const int CANNOT_PARSE_NUMBER;
63 extern const int ILLEGAL_TYPE_OF_ARGUMENT;
64}
65
66/// Helper functions for formatted input.
67
68inline char parseEscapeSequence(char c)
69{
70 switch (c)
71 {
72 case 'a':
73 return '\a';
74 case 'b':
75 return '\b';
76 case 'e':
77 return '\x1B'; /// \e escape sequence is non standard for C and C++ but supported by gcc and clang.
78 case 'f':
79 return '\f';
80 case 'n':
81 return '\n';
82 case 'r':
83 return '\r';
84 case 't':
85 return '\t';
86 case 'v':
87 return '\v';
88 case '0':
89 return '\0';
90 default:
91 return c;
92 }
93}
94
95
96/// These functions are located in VarInt.h
97/// inline void throwReadAfterEOF()
98
99
100inline void readChar(char & x, ReadBuffer & buf)
101{
102 if (!buf.eof())
103 {
104 x = *buf.position();
105 ++buf.position();
106 }
107 else
108 throwReadAfterEOF();
109}
110
111
112/// Read POD-type in native format
113template <typename T>
114inline void readPODBinary(T & x, ReadBuffer & buf)
115{
116 buf.readStrict(reinterpret_cast<char *>(&x), sizeof(x));
117}
118
119template <typename T>
120inline void readIntBinary(T & x, ReadBuffer & buf)
121{
122 readPODBinary(x, buf);
123}
124
125template <typename T>
126inline void readFloatBinary(T & x, ReadBuffer & buf)
127{
128 readPODBinary(x, buf);
129}
130
131
132inline void readStringBinary(std::string & s, ReadBuffer & buf, size_t MAX_STRING_SIZE = DEFAULT_MAX_STRING_SIZE)
133{
134 size_t size = 0;
135 readVarUInt(size, buf);
136
137 if (size > MAX_STRING_SIZE)
138 throw Poco::Exception("Too large string size.");
139
140 s.resize(size);
141 buf.readStrict(s.data(), size);
142}
143
144
145inline StringRef readStringBinaryInto(Arena & arena, ReadBuffer & buf)
146{
147 size_t size = 0;
148 readVarUInt(size, buf);
149
150 char * data = arena.alloc(size);
151 buf.readStrict(data, size);
152
153 return StringRef(data, size);
154}
155
156
157template <typename T>
158void readVectorBinary(std::vector<T> & v, ReadBuffer & buf, size_t MAX_VECTOR_SIZE = DEFAULT_MAX_STRING_SIZE)
159{
160 size_t size = 0;
161 readVarUInt(size, buf);
162
163 if (size > MAX_VECTOR_SIZE)
164 throw Poco::Exception("Too large vector size.");
165
166 v.resize(size);
167 for (size_t i = 0; i < size; ++i)
168 readBinary(v[i], buf);
169}
170
171
172void assertString(const char * s, ReadBuffer & buf);
173void assertEOF(ReadBuffer & buf);
174
175[[noreturn]] void throwAtAssertionFailed(const char * s, ReadBuffer & buf);
176
177inline void assertChar(char symbol, ReadBuffer & buf)
178{
179 if (buf.eof() || *buf.position() != symbol)
180 {
181 char err[2] = {symbol, '\0'};
182 throwAtAssertionFailed(err, buf);
183 }
184 ++buf.position();
185}
186
187inline void assertString(const String & s, ReadBuffer & buf)
188{
189 assertString(s.c_str(), buf);
190}
191
192bool checkString(const char * s, ReadBuffer & buf);
193inline bool checkString(const String & s, ReadBuffer & buf)
194{
195 return checkString(s.c_str(), buf);
196}
197
198inline bool checkChar(char c, ReadBuffer & buf)
199{
200 if (buf.eof() || *buf.position() != c)
201 return false;
202 ++buf.position();
203 return true;
204}
205
206bool checkStringCaseInsensitive(const char * s, ReadBuffer & buf);
207inline bool checkStringCaseInsensitive(const String & s, ReadBuffer & buf)
208{
209 return checkStringCaseInsensitive(s.c_str(), buf);
210}
211
212void assertStringCaseInsensitive(const char * s, ReadBuffer & buf);
213inline void assertStringCaseInsensitive(const String & s, ReadBuffer & buf)
214{
215 return assertStringCaseInsensitive(s.c_str(), buf);
216}
217
218/** Check that next character in buf matches first character of s.
219 * If true, then check all characters in s and throw exception if it doesn't match.
220 * If false, then return false, and leave position in buffer unchanged.
221 */
222bool checkStringByFirstCharacterAndAssertTheRest(const char * s, ReadBuffer & buf);
223bool checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(const char * s, ReadBuffer & buf);
224
225inline bool checkStringByFirstCharacterAndAssertTheRest(const String & s, ReadBuffer & buf)
226{
227 return checkStringByFirstCharacterAndAssertTheRest(s.c_str(), buf);
228}
229
230inline bool checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(const String & s, ReadBuffer & buf)
231{
232 return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(s.c_str(), buf);
233}
234
235
236inline void readBoolText(bool & x, ReadBuffer & buf)
237{
238 char tmp = '0';
239 readChar(tmp, buf);
240 x = tmp != '0';
241}
242
243inline void readBoolTextWord(bool & x, ReadBuffer & buf)
244{
245 if (buf.eof())
246 throwReadAfterEOF();
247
248 if (*buf.position() == 't')
249 {
250 assertString("true", buf);
251 x = true;
252 }
253 else
254 {
255 assertString("false", buf);
256 x = false;
257 }
258}
259
260enum class ReadIntTextCheckOverflow
261{
262 DO_NOT_CHECK_OVERFLOW,
263 CHECK_OVERFLOW,
264};
265
266template <typename T, typename ReturnType = void, ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW>
267ReturnType readIntTextImpl(T & x, ReadBuffer & buf)
268{
269 static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
270
271 bool negative = false;
272 std::make_unsigned_t<T> res = 0;
273 if (buf.eof())
274 {
275 if constexpr (throw_exception)
276 throwReadAfterEOF();
277 else
278 return ReturnType(false);
279 }
280
281 const size_t initial_pos = buf.count();
282 while (!buf.eof())
283 {
284 switch (*buf.position())
285 {
286 case '+':
287 break;
288 case '-':
289 if constexpr (is_signed_v<T>)
290 negative = true;
291 else
292 {
293 if constexpr (throw_exception)
294 throw Exception("Unsigned type must not contain '-' symbol", ErrorCodes::CANNOT_PARSE_NUMBER);
295 else
296 return ReturnType(false);
297 }
298 break;
299 case '0': [[fallthrough]];
300 case '1': [[fallthrough]];
301 case '2': [[fallthrough]];
302 case '3': [[fallthrough]];
303 case '4': [[fallthrough]];
304 case '5': [[fallthrough]];
305 case '6': [[fallthrough]];
306 case '7': [[fallthrough]];
307 case '8': [[fallthrough]];
308 case '9':
309 if constexpr (check_overflow == ReadIntTextCheckOverflow::CHECK_OVERFLOW)
310 {
311 // perform relativelly slow overflow check only when number of decimal digits so far is close to the max for given type.
312 if (buf.count() - initial_pos >= std::numeric_limits<T>::max_digits10)
313 {
314 if (common::mulOverflow(res, static_cast<decltype(res)>(10), res)
315 || common::addOverflow(res, static_cast<decltype(res)>(*buf.position() - '0'), res))
316 return ReturnType(false);
317 break;
318 }
319 }
320 res *= 10;
321 res += *buf.position() - '0';
322 break;
323 default:
324 goto end;
325 }
326 ++buf.position();
327 }
328
329end:
330 x = negative ? -res : res;
331
332 return ReturnType(true);
333}
334
335template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW, typename T>
336void readIntText(T & x, ReadBuffer & buf)
337{
338 readIntTextImpl<T, void, check_overflow>(x, buf);
339}
340
341template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T>
342bool tryReadIntText(T & x, ReadBuffer & buf)
343{
344 return readIntTextImpl<T, bool, check_overflow>(x, buf);
345}
346
347template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW, typename T>
348void readIntText(Decimal<T> & x, ReadBuffer & buf)
349{
350 readIntText<check_overflow>(x.value, buf);
351}
352
353/** More efficient variant (about 1.5 times on real dataset).
354 * Differs in following:
355 * - for numbers starting with zero, parsed only zero;
356 * - symbol '+' before number is not supported;
357 * - symbols :;<=>? are parsed as some numbers.
358 */
359template <typename T, bool throw_on_error = true>
360void readIntTextUnsafe(T & x, ReadBuffer & buf)
361{
362 bool negative = false;
363 std::make_unsigned_t<T> res = 0;
364
365 auto on_error = []
366 {
367 if (throw_on_error)
368 throwReadAfterEOF();
369 };
370
371 if (unlikely(buf.eof()))
372 return on_error();
373
374 if (is_signed_v<T> && *buf.position() == '-')
375 {
376 ++buf.position();
377 negative = true;
378 if (unlikely(buf.eof()))
379 return on_error();
380 }
381
382 if (*buf.position() == '0') /// There are many zeros in real datasets.
383 {
384 ++buf.position();
385 x = 0;
386 return;
387 }
388
389 while (!buf.eof())
390 {
391 /// This check is suddenly faster than
392 /// unsigned char c = *buf.position() - '0';
393 /// if (c < 10)
394 /// for unknown reason on Xeon E5645.
395
396 if ((*buf.position() & 0xF0) == 0x30) /// It makes sense to have this condition inside loop.
397 {
398 res *= 10;
399 res += *buf.position() & 0x0F;
400 ++buf.position();
401 }
402 else
403 break;
404 }
405
406 /// See note about undefined behaviour above.
407 x = is_signed_v<T> && negative ? -res : res;
408}
409
410template <typename T>
411void tryReadIntTextUnsafe(T & x, ReadBuffer & buf)
412{
413 return readIntTextUnsafe<T, false>(x, buf);
414}
415
416
417/// Look at readFloatText.h
418template <typename T> void readFloatText(T & x, ReadBuffer & in);
419template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in);
420
421
422/// simple: all until '\n' or '\t'
423void readString(String & s, ReadBuffer & buf);
424
425void readEscapedString(String & s, ReadBuffer & buf);
426
427void readQuotedString(String & s, ReadBuffer & buf);
428void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
429
430void readDoubleQuotedString(String & s, ReadBuffer & buf);
431void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
432
433void readJSONString(String & s, ReadBuffer & buf);
434
435void readBackQuotedString(String & s, ReadBuffer & buf);
436void readBackQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
437
438void readStringUntilEOF(String & s, ReadBuffer & buf);
439void readEscapedStringUntilEOL(String & s, ReadBuffer & buf);
440
441
442/** Read string in CSV format.
443 * Parsing rules:
444 * - string could be placed in quotes; quotes could be single: ' if FormatSettings::CSV::allow_single_quotes is true
445 * or double: " if FormatSettings::CSV::allow_double_quotes is true;
446 * - or string could be unquoted - this is determined by first character;
447 * - if string is unquoted, then it is read until next delimiter,
448 * either until end of line (CR or LF),
449 * or until end of stream;
450 * but spaces and tabs at begin and end of unquoted string are consumed but ignored (note that this behaviour differs from RFC).
451 * - if string is in quotes, then it will be read until closing quote,
452 * but sequences of two consecutive quotes are parsed as single quote inside string;
453 */
454void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
455
456
457/// Read and append result to array of characters.
458template <typename Vector>
459void readStringInto(Vector & s, ReadBuffer & buf);
460
461template <typename Vector>
462void readNullTerminated(Vector & s, ReadBuffer & buf);
463
464template <typename Vector>
465void readEscapedStringInto(Vector & s, ReadBuffer & buf);
466
467template <bool enable_sql_style_quoting, typename Vector>
468void readQuotedStringInto(Vector & s, ReadBuffer & buf);
469
470template <bool enable_sql_style_quoting, typename Vector>
471void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf);
472
473template <bool enable_sql_style_quoting, typename Vector>
474void readBackQuotedStringInto(Vector & s, ReadBuffer & buf);
475
476template <typename Vector>
477void readStringUntilEOFInto(Vector & s, ReadBuffer & buf);
478
479template <typename Vector>
480void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
481
482/// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception.
483template <typename Vector, typename ReturnType = void>
484ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf);
485
486template <typename Vector>
487bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf)
488{
489 return readJSONStringInto<Vector, bool>(s, buf);
490}
491
492/// This could be used as template parameter for functions above, if you want to just skip data.
493struct NullSink
494{
495 void append(const char *, size_t) {}
496 void push_back(char) {}
497};
498
499void parseUUID(const UInt8 * src36, UInt8 * dst16);
500void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);
501
502template <typename IteratorSrc, typename IteratorDst>
503void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes);
504
505
506template <typename ReturnType>
507ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf);
508
509/// In YYYY-MM-DD format.
510/// For convenience, Month and Day parts can have single digit instead of two digits.
511/// Any separators other than '-' are supported.
512template <typename ReturnType = void>
513inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
514{
515 /// Optimistic path, when whole value is in buffer.
516 if (buf.position() + 10 <= buf.buffer().end())
517 {
518 UInt16 year = (buf.position()[0] - '0') * 1000 + (buf.position()[1] - '0') * 100 + (buf.position()[2] - '0') * 10 + (buf.position()[3] - '0');
519 buf.position() += 5;
520
521 UInt8 month = buf.position()[0] - '0';
522 if (isNumericASCII(buf.position()[1]))
523 {
524 month = month * 10 + buf.position()[1] - '0';
525 buf.position() += 3;
526 }
527 else
528 buf.position() += 2;
529
530 UInt8 day = buf.position()[0] - '0';
531 if (isNumericASCII(buf.position()[1]))
532 {
533 day = day * 10 + buf.position()[1] - '0';
534 buf.position() += 2;
535 }
536 else
537 buf.position() += 1;
538
539 date = LocalDate(year, month, day);
540 return ReturnType(true);
541 }
542 else
543 return readDateTextFallback<ReturnType>(date, buf);
544}
545
546template <typename ReturnType = void>
547inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf)
548{
549 static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
550
551 LocalDate local_date;
552
553 if constexpr (throw_exception)
554 readDateTextImpl<ReturnType>(local_date, buf);
555 else if (!readDateTextImpl<ReturnType>(local_date, buf))
556 return false;
557
558 date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day());
559 return ReturnType(true);
560}
561
562
563inline void readDateText(LocalDate & date, ReadBuffer & buf)
564{
565 readDateTextImpl<void>(date, buf);
566}
567
568inline void readDateText(DayNum & date, ReadBuffer & buf)
569{
570 readDateTextImpl<void>(date, buf);
571}
572
573inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf)
574{
575 return readDateTextImpl<bool>(date, buf);
576}
577
578inline bool tryReadDateText(DayNum & date, ReadBuffer & buf)
579{
580 return readDateTextImpl<bool>(date, buf);
581}
582
583
584inline void readUUIDText(UUID & uuid, ReadBuffer & buf)
585{
586 char s[36];
587 size_t size = buf.read(s, 36);
588
589 if (size != 36)
590 {
591 s[size] = 0;
592 throw Exception(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID);
593 }
594
595 parseUUID(reinterpret_cast<const UInt8 *>(s), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
596}
597
598
599template <typename T>
600inline T parse(const char * data, size_t size);
601
602template <typename T>
603inline T parseFromString(const String & str)
604{
605 return parse<T>(str.data(), str.size());
606}
607
608#pragma GCC diagnostic push
609#pragma GCC diagnostic ignored "-Wredundant-decls"
610// Just dont mess with it. If the redundant redeclaration is removed then ReaderHelpers.h should be included.
611// This leads to Arena.h inclusion which has a problem with ASAN stuff included properly and messing macro definition
612// which intefrers with... You dont want to know, really.
613UInt128 stringToUUID(const String & str);
614#pragma GCC diagnostic pop
615
616template <typename ReturnType = void>
617ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut);
618
619/** In YYYY-MM-DD hh:mm:ss format, according to specified time zone.
620 * As an exception, also supported parsing of unix timestamp in form of decimal number.
621 */
622template <typename ReturnType = void>
623inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
624{
625 /** Read 10 characters, that could represent unix timestamp.
626 * Only unix timestamp of 5-10 characters is supported.
627 * Then look at 5th character. If it is a number - treat whole as unix timestamp.
628 * If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss format.
629 */
630
631 /// Optimistic path, when whole value is in buffer.
632 const char * s = buf.position();
633 if (s + 19 <= buf.buffer().end())
634 {
635 if (s[4] < '0' || s[4] > '9')
636 {
637 UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
638 UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
639 UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
640
641 UInt8 hour = (s[11] - '0') * 10 + (s[12] - '0');
642 UInt8 minute = (s[14] - '0') * 10 + (s[15] - '0');
643 UInt8 second = (s[17] - '0') * 10 + (s[18] - '0');
644
645 if (unlikely(year == 0))
646 datetime = 0;
647 else
648 datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
649
650 buf.position() += 19;
651 return ReturnType(true);
652 }
653 else
654 /// Why not readIntTextUnsafe? Because for needs of AdFox, parsing of unix timestamp with leading zeros is supported: 000...NNNN.
655 return readIntTextImpl<time_t, ReturnType, ReadIntTextCheckOverflow::CHECK_OVERFLOW>(datetime, buf);
656 }
657 else
658 return readDateTimeTextFallback<ReturnType>(datetime, buf, date_lut);
659}
660
661template <typename ReturnType>
662inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut)
663{
664 time_t whole;
665 if (!readDateTimeTextImpl<bool>(whole, buf, date_lut))
666 {
667 return ReturnType(false);
668 }
669
670 DB::DecimalUtils::DecimalComponents<DateTime64::NativeType> c{static_cast<DateTime64::NativeType>(whole), 0};
671
672 if (!buf.eof() && *buf.position() == '.')
673 {
674 buf.ignore(1); // skip separator
675 const auto pos_before_fractional = buf.count();
676 if (!tryReadIntText<ReadIntTextCheckOverflow::CHECK_OVERFLOW>(c.fractional, buf))
677 {
678 return ReturnType(false);
679 }
680
681 // Adjust fractional part to the scale, since decimalFromComponents knows nothing
682 // about convention of ommiting trailing zero on fractional part
683 // and assumes that fractional part value is less than 10^scale.
684
685 // If scale is 3, but we read '12', promote fractional part to '120'.
686 // And vice versa: if we read '1234', denote it to '123'.
687 const auto fractional_length = static_cast<Int32>(buf.count() - pos_before_fractional);
688 if (const auto adjust_scale = static_cast<Int32>(scale) - fractional_length; adjust_scale > 0)
689 {
690 c.fractional *= common::exp10_i64(adjust_scale);
691 }
692 else if (adjust_scale < 0)
693 {
694 c.fractional /= common::exp10_i64(-1 * adjust_scale);
695 }
696 }
697
698 datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(c, scale);
699
700 return ReturnType(true);
701}
702
703inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
704{
705 readDateTimeTextImpl<void>(datetime, buf, date_lut);
706}
707
708inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
709{
710 readDateTimeTextImpl<void>(datetime64, scale, buf, date_lut);
711}
712
713inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
714{
715 return readDateTimeTextImpl<bool>(datetime, buf, date_lut);
716}
717
718inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
719{
720 return readDateTimeTextImpl<bool>(datetime64, scale, buf, date_lut);
721}
722
723inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf)
724{
725 char s[19];
726 size_t size = buf.read(s, 19);
727 if (19 != size)
728 {
729 s[size] = 0;
730 throw Exception(std::string("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
731 }
732
733 datetime.year((s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'));
734 datetime.month((s[5] - '0') * 10 + (s[6] - '0'));
735 datetime.day((s[8] - '0') * 10 + (s[9] - '0'));
736
737 datetime.hour((s[11] - '0') * 10 + (s[12] - '0'));
738 datetime.minute((s[14] - '0') * 10 + (s[15] - '0'));
739 datetime.second((s[17] - '0') * 10 + (s[18] - '0'));
740}
741
742
743/// Generic methods to read value in native binary format.
744template <typename T>
745inline std::enable_if_t<is_arithmetic_v<T>, void>
746readBinary(T & x, ReadBuffer & buf) { readPODBinary(x, buf); }
747
748inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); }
749inline void readBinary(Int128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
750inline void readBinary(UInt128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
751inline void readBinary(UInt256 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
752inline void readBinary(Decimal32 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
753inline void readBinary(Decimal64 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
754inline void readBinary(Decimal128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
755inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); }
756
757
758/// Generic methods to read value in text tab-separated format.
759template <typename T>
760inline std::enable_if_t<is_integral_v<T>, void>
761readText(T & x, ReadBuffer & buf) { readIntText(x, buf); }
762
763template <typename T>
764inline std::enable_if_t<std::is_floating_point_v<T>, void>
765readText(T & x, ReadBuffer & buf) { readFloatText(x, buf); }
766
767inline void readText(bool & x, ReadBuffer & buf) { readBoolText(x, buf); }
768inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); }
769inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); }
770inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); }
771inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); }
772[[noreturn]] inline void readText(UInt128 &, ReadBuffer &)
773{
774 /** Because UInt128 isn't a natural type, without arithmetic operator and only use as an intermediary type -for UUID-
775 * it should never arrive here. But because we used the DataTypeNumber class we should have at least a definition of it.
776 */
777 throw Exception("UInt128 cannot be read as a text", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
778}
779
780/// Generic methods to read value in text format,
781/// possibly in single quotes (only for data types that use quotes in VALUES format of INSERT statement in SQL).
782template <typename T>
783inline std::enable_if_t<is_arithmetic_v<T>, void>
784readQuoted(T & x, ReadBuffer & buf) { readText(x, buf); }
785
786inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); }
787
788inline void readQuoted(LocalDate & x, ReadBuffer & buf)
789{
790 assertChar('\'', buf);
791 readDateText(x, buf);
792 assertChar('\'', buf);
793}
794
795inline void readQuoted(LocalDateTime & x, ReadBuffer & buf)
796{
797 assertChar('\'', buf);
798 readDateTimeText(x, buf);
799 assertChar('\'', buf);
800}
801
802
803/// Same as above, but in double quotes.
804template <typename T>
805inline std::enable_if_t<is_arithmetic_v<T>, void>
806readDoubleQuoted(T & x, ReadBuffer & buf) { readText(x, buf); }
807
808inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); }
809
810inline void readDoubleQuoted(LocalDate & x, ReadBuffer & buf)
811{
812 assertChar('"', buf);
813 readDateText(x, buf);
814 assertChar('"', buf);
815}
816
817inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf)
818{
819 assertChar('"', buf);
820 readDateTimeText(x, buf);
821 assertChar('"', buf);
822}
823
824
825/// CSV, for numbers, dates: quotes are optional, no special escaping rules.
826template <typename T>
827inline void readCSVSimple(T & x, ReadBuffer & buf)
828{
829 if (buf.eof())
830 throwReadAfterEOF();
831
832 char maybe_quote = *buf.position();
833
834 if (maybe_quote == '\'' || maybe_quote == '\"')
835 ++buf.position();
836
837 readText(x, buf);
838
839 if (maybe_quote == '\'' || maybe_quote == '\"')
840 assertChar(maybe_quote, buf);
841}
842
843template <typename T>
844inline std::enable_if_t<is_arithmetic_v<T>, void>
845readCSV(T & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
846
847inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); }
848inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
849inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
850inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
851[[noreturn]] inline void readCSV(UInt128 &, ReadBuffer &)
852{
853 /** Because UInt128 isn't a natural type, without arithmetic operator and only use as an intermediary type -for UUID-
854 * it should never arrive here. But because we used the DataTypeNumber class we should have at least a definition of it.
855 */
856 throw Exception("UInt128 cannot be read as a text", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
857}
858
859template <typename T>
860void readBinary(std::vector<T> & x, ReadBuffer & buf)
861{
862 size_t size = 0;
863 readVarUInt(size, buf);
864
865 if (size > DEFAULT_MAX_STRING_SIZE)
866 throw Poco::Exception("Too large vector size.");
867
868 x.resize(size);
869 for (size_t i = 0; i < size; ++i)
870 readBinary(x[i], buf);
871}
872
873template <typename T>
874void readQuoted(std::vector<T> & x, ReadBuffer & buf)
875{
876 bool first = true;
877 assertChar('[', buf);
878 while (!buf.eof() && *buf.position() != ']')
879 {
880 if (!first)
881 {
882 if (*buf.position() == ',')
883 ++buf.position();
884 else
885 throw Exception("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
886 }
887
888 first = false;
889
890 x.push_back(T());
891 readQuoted(x.back(), buf);
892 }
893 assertChar(']', buf);
894}
895
896template <typename T>
897void readDoubleQuoted(std::vector<T> & x, ReadBuffer & buf)
898{
899 bool first = true;
900 assertChar('[', buf);
901 while (!buf.eof() && *buf.position() != ']')
902 {
903 if (!first)
904 {
905 if (*buf.position() == ',')
906 ++buf.position();
907 else
908 throw Exception("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
909 }
910
911 first = false;
912
913 x.push_back(T());
914 readDoubleQuoted(x.back(), buf);
915 }
916 assertChar(']', buf);
917}
918
919template <typename T>
920void readText(std::vector<T> & x, ReadBuffer & buf)
921{
922 readQuoted(x, buf);
923}
924
925
926/// Skip whitespace characters.
927inline void skipWhitespaceIfAny(ReadBuffer & buf)
928{
929 while (!buf.eof() && isWhitespaceASCII(*buf.position()))
930 ++buf.position();
931}
932
933/// Skips json value.
934void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field);
935
936
937/** Read serialized exception.
938 * During serialization/deserialization some information is lost
939 * (type is cut to base class, 'message' replaced by 'displayText', and stack trace is appended to 'message')
940 * Some additional message could be appended to exception (example: you could add information about from where it was received).
941 */
942void readException(Exception & e, ReadBuffer & buf, const String & additional_message = "");
943void readAndThrowException(ReadBuffer & buf, const String & additional_message = "");
944
945
946/** Helper function for implementation.
947 */
948template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T>
949static inline const char * tryReadIntText(T & x, const char * pos, const char * end)
950{
951 ReadBufferFromMemory in(pos, end - pos);
952 tryReadIntText<check_overflow>(x, in);
953 return pos + in.count();
954}
955
956
957/// Convenient methods for reading something from string in text format.
958template <typename T>
959inline T parse(const char * data, size_t size)
960{
961 T res;
962 ReadBufferFromMemory buf(data, size);
963 readText(res, buf);
964 return res;
965}
966
967/// Read something from text format, but expect complete parse of given text
968/// For example: 723145 -- ok, 213MB -- not ok
969template <typename T>
970inline T completeParse(const char * data, size_t size)
971{
972 T res;
973 ReadBufferFromMemory buf(data, size);
974 readText(res, buf);
975 assertEOF(buf);
976 return res;
977}
978
979template <typename T>
980inline T completeParse(const String & s)
981{
982 return completeParse<T>(s.data(), s.size());
983}
984
985template <typename T>
986inline T completeParse(const char * data)
987{
988 return completeParse<T>(data, strlen(data));
989}
990
991template <typename T>
992inline T parse(const char * data)
993{
994 return parse<T>(data, strlen(data));
995}
996
997template <typename T>
998inline T parse(const String & s)
999{
1000 return parse<T>(s.data(), s.size());
1001}
1002
1003
1004/** Skip UTF-8 BOM if it is under cursor.
1005 * As BOM is usually located at start of stream, and buffer size is usually larger than three bytes,
1006 * the function expects, that all three bytes of BOM is fully in buffer (otherwise it don't skip anything).
1007 */
1008inline void skipBOMIfExists(ReadBuffer & buf)
1009{
1010 if (!buf.eof()
1011 && buf.position() + 3 < buf.buffer().end()
1012 && buf.position()[0] == '\xEF'
1013 && buf.position()[1] == '\xBB'
1014 && buf.position()[2] == '\xBF')
1015 {
1016 buf.position() += 3;
1017 }
1018}
1019
1020
1021/// Skip to next character after next \n. If no \n in stream, skip to end.
1022void skipToNextLineOrEOF(ReadBuffer & buf);
1023
1024/// Skip to next character after next unescaped \n. If no \n in stream, skip to end. Does not throw on invalid escape sequences.
1025void skipToUnescapedNextLineOrEOF(ReadBuffer & buf);
1026
1027template <class TReadBuffer, class... Types>
1028std::unique_ptr<ReadBuffer> getReadBuffer(const DB::CompressionMethod method, Types&&... args)
1029{
1030 if (method == DB::CompressionMethod::Gzip)
1031 {
1032 auto read_buf = std::make_unique<TReadBuffer>(std::forward<Types>(args)...);
1033 return std::make_unique<ZlibInflatingReadBuffer>(std::move(read_buf), method);
1034 }
1035 return std::make_unique<TReadBuffer>(args...);
1036}
1037
1038/** This function just copies the data from buffer's internal position (in.position())
1039 * to current position (from arguments) into memory.
1040 */
1041void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current);
1042
1043/** This function is negative to eof().
1044 * In fact it returns whether the data was loaded to internal ReadBuffers's buffer or not.
1045 * And saves data from buffer's position to current if there is no pending data in buffer.
1046 * Why we have to use this strange function? Consider we have buffer's internal position in the middle
1047 * of our buffer and the current cursor in the end of the buffer. When we call eof() it calls next().
1048 * And this function can fill the buffer with new data, so we will lose the data from previous buffer state.
1049 */
1050bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current);
1051
1052}
1053