1#include <Core/Defines.h>
2#include <Common/hex.h>
3#include <Common/PODArray.h>
4#include <Common/StringUtils/StringUtils.h>
5#include <Common/memcpySmall.h>
6#include <Formats/FormatSettings.h>
7#include <IO/WriteHelpers.h>
8#include <IO/WriteBufferFromString.h>
9#include <IO/readFloatText.h>
10#include <IO/Operators.h>
11#include <common/find_symbols.h>
12#include <stdlib.h>
13
14#ifdef __SSE2__
15 #include <emmintrin.h>
16#endif
17
18namespace DB
19{
20
21namespace ErrorCodes
22{
23 extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
24 extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
25 extern const int CANNOT_PARSE_QUOTED_STRING;
26 extern const int CANNOT_PARSE_DATETIME;
27 extern const int CANNOT_PARSE_DATE;
28 extern const int INCORRECT_DATA;
29}
30
31template <typename IteratorSrc, typename IteratorDst>
32void parseHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes)
33{
34 size_t src_pos = 0;
35 size_t dst_pos = 0;
36 for (; dst_pos < num_bytes; ++dst_pos)
37 {
38 dst[dst_pos] = UInt8(unhex(src[src_pos])) * 16 + UInt8(unhex(src[src_pos + 1]));
39 src_pos += 2;
40 }
41}
42
43void parseUUID(const UInt8 * src36, UInt8 * dst16)
44{
45 /// If string is not like UUID - implementation specific behaviour.
46
47 parseHex(&src36[0], &dst16[0], 4);
48 parseHex(&src36[9], &dst16[4], 2);
49 parseHex(&src36[14], &dst16[6], 2);
50 parseHex(&src36[19], &dst16[8], 2);
51 parseHex(&src36[24], &dst16[10], 6);
52}
53
54/** Function used when byte ordering is important when parsing uuid
55 * ex: When we create an UUID type
56 */
57void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16)
58{
59 /// If string is not like UUID - implementation specific behaviour.
60
61 /// FIXME This code looks like trash.
62 parseHex(&src36[0], dst16 + 8, 4);
63 parseHex(&src36[9], dst16 + 12, 2);
64 parseHex(&src36[14], dst16 + 14, 2);
65 parseHex(&src36[19], dst16, 2);
66 parseHex(&src36[24], dst16 + 2, 6);
67}
68
69UInt128 stringToUUID(const String & str)
70{
71 return parseFromString<UUID>(str);
72}
73
74void NO_INLINE throwAtAssertionFailed(const char * s, ReadBuffer & buf)
75{
76 WriteBufferFromOwnString out;
77 out << "Cannot parse input: expected " << escape << s;
78
79 if (buf.eof())
80 out << " at end of stream.";
81 else
82 out << " before: " << escape << String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position()));
83
84 throw Exception(out.str(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
85}
86
87
88bool checkString(const char * s, ReadBuffer & buf)
89{
90 for (; *s; ++s)
91 {
92 if (buf.eof() || *buf.position() != *s)
93 return false;
94 ++buf.position();
95 }
96 return true;
97}
98
99
100bool checkStringCaseInsensitive(const char * s, ReadBuffer & buf)
101{
102 for (; *s; ++s)
103 {
104 if (buf.eof())
105 return false;
106
107 char c = *buf.position();
108 if (!equalsCaseInsensitive(*s, c))
109 return false;
110
111 ++buf.position();
112 }
113 return true;
114}
115
116
117void assertString(const char * s, ReadBuffer & buf)
118{
119 if (!checkString(s, buf))
120 throwAtAssertionFailed(s, buf);
121}
122
123
124void assertEOF(ReadBuffer & buf)
125{
126 if (!buf.eof())
127 throwAtAssertionFailed("eof", buf);
128}
129
130
131void assertStringCaseInsensitive(const char * s, ReadBuffer & buf)
132{
133 if (!checkStringCaseInsensitive(s, buf))
134 throwAtAssertionFailed(s, buf);
135}
136
137
138bool checkStringByFirstCharacterAndAssertTheRest(const char * s, ReadBuffer & buf)
139{
140 if (buf.eof() || *buf.position() != *s)
141 return false;
142
143 assertString(s, buf);
144 return true;
145}
146
147bool checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(const char * s, ReadBuffer & buf)
148{
149 if (buf.eof())
150 return false;
151
152 char c = *buf.position();
153 if (!equalsCaseInsensitive(*s, c))
154 return false;
155
156 assertStringCaseInsensitive(s, buf);
157 return true;
158}
159
160
161template <typename T>
162static void appendToStringOrVector(T & s, ReadBuffer & rb, const char * end)
163{
164 s.append(rb.position(), end - rb.position());
165}
166
167template <>
168inline void appendToStringOrVector(PaddedPODArray<UInt8> & s, ReadBuffer & rb, const char * end)
169{
170 if (rb.isPadded())
171 s.insertSmallAllowReadWriteOverflow15(rb.position(), end);
172 else
173 s.insert(rb.position(), end);
174}
175
176template <>
177inline void appendToStringOrVector(PODArray<char> & s, ReadBuffer & rb, const char * end)
178{
179 s.insert(rb.position(), end);
180}
181
182template <typename Vector>
183void readStringInto(Vector & s, ReadBuffer & buf)
184{
185 while (!buf.eof())
186 {
187 char * next_pos = find_first_symbols<'\t', '\n'>(buf.position(), buf.buffer().end());
188
189 appendToStringOrVector(s, buf, next_pos);
190 buf.position() = next_pos;
191
192 if (buf.hasPendingData())
193 return;
194 }
195}
196
197template <typename Vector>
198void readNullTerminated(Vector & s, ReadBuffer & buf)
199{
200 while (!buf.eof())
201 {
202 char * next_pos = find_first_symbols<'\0'>(buf.position(), buf.buffer().end());
203
204 appendToStringOrVector(s, buf, next_pos);
205 buf.position() = next_pos;
206
207 if (buf.hasPendingData())
208 break;
209 }
210 buf.ignore();
211}
212
213template void readNullTerminated<PODArray<char>>(PODArray<char> & s, ReadBuffer & buf);
214template void readNullTerminated<String>(String & s, ReadBuffer & buf);
215
216void readString(String & s, ReadBuffer & buf)
217{
218 s.clear();
219 readStringInto(s, buf);
220}
221
222template void readStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
223
224
225template <typename Vector>
226void readStringUntilEOFInto(Vector & s, ReadBuffer & buf)
227{
228 while (!buf.eof())
229 {
230 appendToStringOrVector(s, buf, buf.buffer().end());
231 buf.position() = buf.buffer().end();
232
233 if (buf.hasPendingData())
234 return;
235 }
236}
237
238
239void readStringUntilEOF(String & s, ReadBuffer & buf)
240{
241 s.clear();
242 readStringUntilEOFInto(s, buf);
243}
244
245template <typename Vector>
246void readEscapedStringUntilEOLInto(Vector & s, ReadBuffer & buf)
247{
248 while (!buf.eof())
249 {
250 char * next_pos = find_first_symbols<'\n', '\\'>(buf.position(), buf.buffer().end());
251
252 appendToStringOrVector(s, buf, next_pos);
253 buf.position() = next_pos;
254
255 if (!buf.hasPendingData())
256 continue;
257
258 if (*buf.position() == '\n')
259 return;
260
261 if (*buf.position() == '\\')
262 parseComplexEscapeSequence(s, buf);
263 }
264}
265
266
267void readEscapedStringUntilEOL(String & s, ReadBuffer & buf)
268{
269 s.clear();
270 readEscapedStringUntilEOLInto(s, buf);
271}
272
273template void readStringUntilEOFInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
274
275
276/** Parse the escape sequence, which can be simple (one character after backslash) or more complex (multiple characters).
277 * It is assumed that the cursor is located on the `\` symbol
278 */
279template <typename Vector>
280static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
281{
282 ++buf.position();
283 if (buf.eof())
284 throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
285
286 if (*buf.position() == 'x')
287 {
288 ++buf.position();
289 /// escape sequence of the form \xAA
290 char hex_code[2];
291 readPODBinary(hex_code, buf);
292 s.push_back(unhex2(hex_code));
293 }
294 else if (*buf.position() == 'N')
295 {
296 /// Support for NULLs: \N sequence must be parsed as empty string.
297 ++buf.position();
298 }
299 else
300 {
301 /// The usual escape sequence of a single character.
302 s.push_back(parseEscapeSequence(*buf.position()));
303 ++buf.position();
304 }
305}
306
307
308template <typename Vector, typename ReturnType>
309static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
310{
311 static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
312
313 auto error = [](const char * message [[maybe_unused]], int code [[maybe_unused]])
314 {
315 if constexpr (throw_exception)
316 throw Exception(message, code);
317 return ReturnType(false);
318 };
319
320 ++buf.position();
321 if (buf.eof())
322 return error("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
323
324 switch (*buf.position())
325 {
326 case '"':
327 s.push_back('"');
328 break;
329 case '\\':
330 s.push_back('\\');
331 break;
332 case '/':
333 s.push_back('/');
334 break;
335 case 'b':
336 s.push_back('\b');
337 break;
338 case 'f':
339 s.push_back('\f');
340 break;
341 case 'n':
342 s.push_back('\n');
343 break;
344 case 'r':
345 s.push_back('\r');
346 break;
347 case 't':
348 s.push_back('\t');
349 break;
350 case 'u':
351 {
352 ++buf.position();
353
354 char hex_code[4];
355 if (4 != buf.read(hex_code, 4))
356 return error("Cannot parse escape sequence: less than four bytes after \\u", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
357
358 /// \u0000 - special case
359 if (0 == memcmp(hex_code, "0000", 4))
360 {
361 s.push_back(0);
362 return ReturnType(true);
363 }
364
365 UInt16 code_point = unhex4(hex_code);
366
367 if (code_point <= 0x7F)
368 {
369 s.push_back(code_point);
370 }
371 else if (code_point <= 0x07FF)
372 {
373 s.push_back(((code_point >> 6) & 0x1F) | 0xC0);
374 s.push_back((code_point & 0x3F) | 0x80);
375 }
376 else
377 {
378 /// Surrogate pair.
379 if (code_point >= 0xD800 && code_point <= 0xDBFF)
380 {
381 if (!checkString("\\u", buf))
382 return error("Cannot parse escape sequence: missing second part of surrogate pair", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
383
384 char second_hex_code[4];
385 if (4 != buf.read(second_hex_code, 4))
386 return error("Cannot parse escape sequence: less than four bytes after \\u of second part of surrogate pair",
387 ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
388
389 UInt16 second_code_point = unhex4(second_hex_code);
390
391 if (second_code_point >= 0xDC00 && second_code_point <= 0xDFFF)
392 {
393 UInt32 full_code_point = 0x10000 + (code_point - 0xD800) * 1024 + (second_code_point - 0xDC00);
394
395 s.push_back(((full_code_point >> 18) & 0x07) | 0xF0);
396 s.push_back(((full_code_point >> 12) & 0x3F) | 0x80);
397 s.push_back(((full_code_point >> 6) & 0x3F) | 0x80);
398 s.push_back((full_code_point & 0x3F) | 0x80);
399 }
400 else
401 return error("Incorrect surrogate pair of unicode escape sequences in JSON", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
402 }
403 else
404 {
405 s.push_back(((code_point >> 12) & 0x0F) | 0xE0);
406 s.push_back(((code_point >> 6) & 0x3F) | 0x80);
407 s.push_back((code_point & 0x3F) | 0x80);
408 }
409 }
410
411 return ReturnType(true);
412 }
413 default:
414 s.push_back(*buf.position());
415 break;
416 }
417
418 ++buf.position();
419 return ReturnType(true);
420}
421
422
423template <typename Vector>
424void readEscapedStringInto(Vector & s, ReadBuffer & buf)
425{
426 while (!buf.eof())
427 {
428 char * next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end());
429
430 appendToStringOrVector(s, buf, next_pos);
431 buf.position() = next_pos;
432
433 if (!buf.hasPendingData())
434 continue;
435
436 if (*buf.position() == '\t' || *buf.position() == '\n')
437 return;
438
439 if (*buf.position() == '\\')
440 parseComplexEscapeSequence(s, buf);
441 }
442}
443
444void readEscapedString(String & s, ReadBuffer & buf)
445{
446 s.clear();
447 readEscapedStringInto(s, buf);
448}
449
450template void readEscapedStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
451template void readEscapedStringInto<NullSink>(NullSink & s, ReadBuffer & buf);
452
453
454/** If enable_sql_style_quoting == true,
455 * strings like 'abc''def' will be parsed as abc'def.
456 * Please note, that even with SQL style quoting enabled,
457 * backslash escape sequences are also parsed,
458 * that could be slightly confusing.
459 */
460template <char quote, bool enable_sql_style_quoting, typename Vector>
461static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf)
462{
463 if (buf.eof() || *buf.position() != quote)
464 throw Exception("Cannot parse quoted string: expected opening quote",
465 ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
466 ++buf.position();
467
468 while (!buf.eof())
469 {
470 char * next_pos = find_first_symbols<'\\', quote>(buf.position(), buf.buffer().end());
471
472 appendToStringOrVector(s, buf, next_pos);
473 buf.position() = next_pos;
474
475 if (!buf.hasPendingData())
476 continue;
477
478 if (*buf.position() == quote)
479 {
480 ++buf.position();
481
482 if (enable_sql_style_quoting && !buf.eof() && *buf.position() == quote)
483 {
484 s.push_back(quote);
485 ++buf.position();
486 continue;
487 }
488
489 return;
490 }
491
492 if (*buf.position() == '\\')
493 parseComplexEscapeSequence(s, buf);
494 }
495
496 throw Exception("Cannot parse quoted string: expected closing quote",
497 ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
498}
499
500template <bool enable_sql_style_quoting, typename Vector>
501void readQuotedStringInto(Vector & s, ReadBuffer & buf)
502{
503 readAnyQuotedStringInto<'\'', enable_sql_style_quoting>(s, buf);
504}
505
506template <bool enable_sql_style_quoting, typename Vector>
507void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf)
508{
509 readAnyQuotedStringInto<'"', enable_sql_style_quoting>(s, buf);
510}
511
512template <bool enable_sql_style_quoting, typename Vector>
513void readBackQuotedStringInto(Vector & s, ReadBuffer & buf)
514{
515 readAnyQuotedStringInto<'`', enable_sql_style_quoting>(s, buf);
516}
517
518
519void readQuotedString(String & s, ReadBuffer & buf)
520{
521 s.clear();
522 readQuotedStringInto<false>(s, buf);
523}
524
525void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
526{
527 s.clear();
528 readQuotedStringInto<true>(s, buf);
529}
530
531
532template void readQuotedStringInto<true>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
533template void readDoubleQuotedStringInto<false>(NullSink & s, ReadBuffer & buf);
534
535void readDoubleQuotedString(String & s, ReadBuffer & buf)
536{
537 s.clear();
538 readDoubleQuotedStringInto<false>(s, buf);
539}
540
541void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
542{
543 s.clear();
544 readDoubleQuotedStringInto<true>(s, buf);
545}
546
547void readBackQuotedString(String & s, ReadBuffer & buf)
548{
549 s.clear();
550 readBackQuotedStringInto<false>(s, buf);
551}
552
553void readBackQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
554{
555 s.clear();
556 readBackQuotedStringInto<true>(s, buf);
557}
558
559
560template <typename Vector>
561void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings)
562{
563 if (buf.eof())
564 throwReadAfterEOF();
565
566 const char delimiter = settings.delimiter;
567 const char maybe_quote = *buf.position();
568
569 /// Emptiness and not even in quotation marks.
570 if (maybe_quote == delimiter)
571 return;
572
573 if ((settings.allow_single_quotes && maybe_quote == '\'') || (settings.allow_double_quotes && maybe_quote == '"'))
574 {
575 ++buf.position();
576
577 /// The quoted case. We are looking for the next quotation mark.
578 while (!buf.eof())
579 {
580 char * next_pos = reinterpret_cast<char *>(memchr(buf.position(), maybe_quote, buf.buffer().end() - buf.position()));
581
582 if (nullptr == next_pos)
583 next_pos = buf.buffer().end();
584
585 appendToStringOrVector(s, buf, next_pos);
586 buf.position() = next_pos;
587
588 if (!buf.hasPendingData())
589 continue;
590
591 /// Now there is a quotation mark under the cursor. Is there any following?
592 ++buf.position();
593 if (buf.eof())
594 return;
595
596 if (*buf.position() == maybe_quote)
597 {
598 s.push_back(maybe_quote);
599 ++buf.position();
600 continue;
601 }
602
603 return;
604 }
605 }
606 else
607 {
608 /// Unquoted case. Look for delimiter or \r or \n.
609 while (!buf.eof())
610 {
611 char * next_pos = buf.position();
612
613 [&]()
614 {
615#ifdef __SSE2__
616 auto rc = _mm_set1_epi8('\r');
617 auto nc = _mm_set1_epi8('\n');
618 auto dc = _mm_set1_epi8(delimiter);
619 for (; next_pos + 15 < buf.buffer().end(); next_pos += 16)
620 {
621 __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(next_pos));
622 auto eq = _mm_or_si128(_mm_or_si128(_mm_cmpeq_epi8(bytes, rc), _mm_cmpeq_epi8(bytes, nc)), _mm_cmpeq_epi8(bytes, dc));
623 uint16_t bit_mask = _mm_movemask_epi8(eq);
624 if (bit_mask)
625 {
626 next_pos += __builtin_ctz(bit_mask);
627 return;
628 }
629 }
630#endif
631 while (next_pos < buf.buffer().end()
632 && *next_pos != delimiter && *next_pos != '\r' && *next_pos != '\n')
633 ++next_pos;
634 }();
635
636
637 appendToStringOrVector(s, buf, next_pos);
638 buf.position() = next_pos;
639
640 if (!buf.hasPendingData())
641 continue;
642
643 /** CSV format can contain insignificant spaces and tabs.
644 * Usually the task of skipping them is for the calling code.
645 * But in this case, it will be difficult to do this, so remove the trailing whitespace by yourself.
646 */
647 size_t size = s.size();
648 while (size > 0
649 && (s[size - 1] == ' ' || s[size - 1] == '\t'))
650 --size;
651
652 s.resize(size);
653 return;
654 }
655 }
656}
657
658void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings)
659{
660 s.clear();
661 readCSVStringInto(s, buf, settings);
662}
663
664template void readCSVStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
665
666
667template <typename Vector, typename ReturnType>
668ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf)
669{
670 static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
671
672 auto error = [](const char * message [[maybe_unused]], int code [[maybe_unused]])
673 {
674 if constexpr (throw_exception)
675 throw Exception(message, code);
676 return ReturnType(false);
677 };
678
679 if (buf.eof() || *buf.position() != '"')
680 return error("Cannot parse JSON string: expected opening quote", ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
681 ++buf.position();
682
683 while (!buf.eof())
684 {
685 char * next_pos = find_first_symbols<'\\', '"'>(buf.position(), buf.buffer().end());
686
687 appendToStringOrVector(s, buf, next_pos);
688 buf.position() = next_pos;
689
690 if (!buf.hasPendingData())
691 continue;
692
693 if (*buf.position() == '"')
694 {
695 ++buf.position();
696 return ReturnType(true);
697 }
698
699 if (*buf.position() == '\\')
700 parseJSONEscapeSequence<Vector, ReturnType>(s, buf);
701 }
702
703 return error("Cannot parse JSON string: expected closing quote", ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
704}
705
706void readJSONString(String & s, ReadBuffer & buf)
707{
708 s.clear();
709 readJSONStringInto(s, buf);
710}
711
712template void readJSONStringInto<PaddedPODArray<UInt8>, void>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
713template bool readJSONStringInto<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
714template void readJSONStringInto<NullSink>(NullSink & s, ReadBuffer & buf);
715template void readJSONStringInto<String>(String & s, ReadBuffer & buf);
716
717
718template <typename ReturnType>
719ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf)
720{
721 static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
722
723 auto error = []
724 {
725 if constexpr (throw_exception)
726 throw Exception("Cannot parse date: value is too short", ErrorCodes::CANNOT_PARSE_DATE);
727 return ReturnType(false);
728 };
729
730 auto ignore_delimiter = [&]
731 {
732 if (!buf.eof())
733 {
734 ++buf.position();
735 return true;
736 }
737 else
738 return false;
739 };
740
741 auto append_digit = [&](auto & x)
742 {
743 if (!buf.eof() && isNumericASCII(*buf.position()))
744 {
745 x = x * 10 + (*buf.position() - '0');
746 ++buf.position();
747 return true;
748 }
749 else
750 return false;
751 };
752
753 UInt16 year = 0;
754 if (!append_digit(year)
755 || !append_digit(year)
756 || !append_digit(year)
757 || !append_digit(year))
758 return error();
759
760 if (!ignore_delimiter())
761 return error();
762
763 UInt8 month = 0;
764 if (!append_digit(month))
765 return error();
766 append_digit(month);
767
768 if (!ignore_delimiter())
769 return error();
770
771 UInt8 day = 0;
772 if (!append_digit(day))
773 return error();
774 append_digit(day);
775
776 date = LocalDate(year, month, day);
777 return ReturnType(true);
778}
779
780template void readDateTextFallback<void>(LocalDate &, ReadBuffer &);
781template bool readDateTextFallback<bool>(LocalDate &, ReadBuffer &);
782
783
784template <typename ReturnType>
785ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
786{
787 static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
788
789 static constexpr auto DATE_TIME_BROKEN_DOWN_LENGTH = 19;
790 static constexpr auto UNIX_TIMESTAMP_MAX_LENGTH = 10;
791
792 char s[DATE_TIME_BROKEN_DOWN_LENGTH];
793 char * s_pos = s;
794
795 /// A piece similar to unix timestamp.
796 while (s_pos < s + UNIX_TIMESTAMP_MAX_LENGTH && !buf.eof() && isNumericASCII(*buf.position()))
797 {
798 *s_pos = *buf.position();
799 ++s_pos;
800 ++buf.position();
801 }
802
803 /// 2015-01-01 01:02:03
804 if (s_pos == s + 4 && !buf.eof() && (*buf.position() < '0' || *buf.position() > '9'))
805 {
806 const size_t remaining_size = DATE_TIME_BROKEN_DOWN_LENGTH - (s_pos - s);
807 size_t size = buf.read(s_pos, remaining_size);
808 if (remaining_size != size)
809 {
810 s_pos[size] = 0;
811
812 if constexpr (throw_exception)
813 throw Exception(std::string("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
814 else
815 return false;
816 }
817
818 UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
819 UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
820 UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
821
822 UInt8 hour = (s[11] - '0') * 10 + (s[12] - '0');
823 UInt8 minute = (s[14] - '0') * 10 + (s[15] - '0');
824 UInt8 second = (s[17] - '0') * 10 + (s[18] - '0');
825
826 if (unlikely(year == 0))
827 datetime = 0;
828 else
829 datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
830 }
831 else
832 {
833 /// Only unix timestamp of 5-10 characters is supported. For consistency. See readDateTimeTextImpl.
834 if (s_pos - s >= 5 && s_pos - s <= 10)
835 {
836 /// Not very efficient.
837 datetime = 0;
838 for (const char * digit_pos = s; digit_pos < s_pos; ++digit_pos)
839 datetime = datetime * 10 + *digit_pos - '0';
840 }
841 else
842 {
843 if constexpr (throw_exception)
844 throw Exception("Cannot parse datetime", ErrorCodes::CANNOT_PARSE_DATETIME);
845 else
846 return false;
847 }
848 }
849
850 return ReturnType(true);
851}
852
853template void readDateTimeTextFallback<void>(time_t &, ReadBuffer &, const DateLUTImpl &);
854template bool readDateTimeTextFallback<bool>(time_t &, ReadBuffer &, const DateLUTImpl &);
855
856
857void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field)
858{
859 if (buf.eof())
860 throw Exception("Unexpected EOF for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
861 else if (*buf.position() == '"') /// skip double-quoted string
862 {
863 NullSink sink;
864 readJSONStringInto(sink, buf);
865 }
866 else if (isNumericASCII(*buf.position()) || *buf.position() == '-' || *buf.position() == '+' || *buf.position() == '.') /// skip number
867 {
868 if (*buf.position() == '+')
869 ++buf.position();
870
871 double v;
872 if (!tryReadFloatText(v, buf))
873 throw Exception("Expected a number field for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
874 }
875 else if (*buf.position() == 'n') /// skip null
876 {
877 assertString("null", buf);
878 }
879 else if (*buf.position() == 't') /// skip true
880 {
881 assertString("true", buf);
882 }
883 else if (*buf.position() == 'f') /// skip false
884 {
885 assertString("false", buf);
886 }
887 else if (*buf.position() == '[')
888 {
889 ++buf.position();
890 skipWhitespaceIfAny(buf);
891
892 if (!buf.eof() && *buf.position() == ']') /// skip empty array
893 {
894 ++buf.position();
895 return;
896 }
897
898 while (true)
899 {
900 skipJSONField(buf, name_of_field);
901 skipWhitespaceIfAny(buf);
902
903 if (!buf.eof() && *buf.position() == ',')
904 {
905 ++buf.position();
906 skipWhitespaceIfAny(buf);
907 }
908 else if (!buf.eof() && *buf.position() == ']')
909 {
910 ++buf.position();
911 break;
912 }
913 else
914 throw Exception("Unexpected symbol for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
915 }
916 }
917 else if (*buf.position() == '{') /// skip whole object
918 {
919 ++buf.position();
920 skipWhitespaceIfAny(buf);
921
922 while (!buf.eof() && *buf.position() != '}')
923 {
924 // field name
925 if (*buf.position() == '"')
926 {
927 NullSink sink;
928 readJSONStringInto(sink, buf);
929 }
930 else
931 throw Exception("Unexpected symbol for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
932
933 // ':'
934 skipWhitespaceIfAny(buf);
935 if (buf.eof() || !(*buf.position() == ':'))
936 throw Exception("Unexpected symbol for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
937 ++buf.position();
938 skipWhitespaceIfAny(buf);
939
940 skipJSONField(buf, name_of_field);
941 skipWhitespaceIfAny(buf);
942
943 // optional ','
944 if (!buf.eof() && *buf.position() == ',')
945 {
946 ++buf.position();
947 skipWhitespaceIfAny(buf);
948 }
949 }
950
951 if (buf.eof())
952 throw Exception("Unexpected EOF for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
953 ++buf.position();
954 }
955 else
956 {
957 throw Exception("Unexpected symbol '" + std::string(*buf.position(), 1) + "' for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
958 }
959}
960
961
962void readException(Exception & e, ReadBuffer & buf, const String & additional_message)
963{
964 int code = 0;
965 String name;
966 String message;
967 String stack_trace;
968 bool has_nested = false;
969
970 readBinary(code, buf);
971 readBinary(name, buf);
972 readBinary(message, buf);
973 readBinary(stack_trace, buf);
974 readBinary(has_nested, buf);
975
976 WriteBufferFromOwnString out;
977
978 if (!additional_message.empty())
979 out << additional_message << ". ";
980
981 if (name != "DB::Exception")
982 out << name << ". ";
983
984 out << message << ".";
985
986 if (!stack_trace.empty())
987 out << " Stack trace:\n\n" << stack_trace;
988
989 if (has_nested)
990 {
991 Exception nested;
992 readException(nested, buf);
993 e = Exception(out.str(), nested, code);
994 }
995 else
996 e = Exception(out.str(), code);
997}
998
999void readAndThrowException(ReadBuffer & buf, const String & additional_message)
1000{
1001 Exception e;
1002 readException(e, buf, additional_message);
1003 e.rethrow();
1004}
1005
1006
1007void skipToNextLineOrEOF(ReadBuffer & buf)
1008{
1009 while (!buf.eof())
1010 {
1011 char * next_pos = find_first_symbols<'\n'>(buf.position(), buf.buffer().end());
1012 buf.position() = next_pos;
1013
1014 if (!buf.hasPendingData())
1015 continue;
1016
1017 if (*buf.position() == '\n')
1018 {
1019 ++buf.position();
1020 return;
1021 }
1022 }
1023}
1024
1025
1026void skipToUnescapedNextLineOrEOF(ReadBuffer & buf)
1027{
1028 while (!buf.eof())
1029 {
1030 char * next_pos = find_first_symbols<'\n', '\\'>(buf.position(), buf.buffer().end());
1031 buf.position() = next_pos;
1032
1033 if (!buf.hasPendingData())
1034 continue;
1035
1036 if (*buf.position() == '\n')
1037 {
1038 ++buf.position();
1039 return;
1040 }
1041
1042 if (*buf.position() == '\\')
1043 {
1044 ++buf.position();
1045 if (buf.eof())
1046 return;
1047
1048 /// Skip escaped character. We do not consider escape sequences with more than one character after backslash (\x01).
1049 /// It's ok for the purpose of this function, because we are interested only in \n and \\.
1050 ++buf.position();
1051 continue;
1052 }
1053 }
1054}
1055
1056void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current)
1057{
1058 assert(current >= in.position());
1059 assert(current <= in.buffer().end());
1060
1061 const int old_bytes = memory.size();
1062 const int additional_bytes = current - in.position();
1063 const int new_bytes = old_bytes + additional_bytes;
1064 /// There are no new bytes to add to memory.
1065 /// No need to do extra stuff.
1066 if (new_bytes == 0)
1067 return;
1068 memory.resize(new_bytes);
1069 memcpy(memory.data() + old_bytes, in.position(), additional_bytes);
1070 in.position() = current;
1071}
1072
1073bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current)
1074{
1075 assert(current <= in.buffer().end());
1076
1077 if (current < in.buffer().end())
1078 return true;
1079
1080 saveUpToPosition(in, memory, current);
1081 bool loaded_more = !in.eof();
1082 assert(in.position() == in.buffer().begin());
1083 current = in.position();
1084 return loaded_more;
1085}
1086
1087}
1088