1 | #pragma once |
2 | |
3 | #include <cmath> |
4 | #include <cstring> |
5 | #include <limits> |
6 | #include <algorithm> |
7 | #include <iterator> |
8 | |
9 | #include <type_traits> |
10 | |
11 | #include <common/DateLUT.h> |
12 | #include <common/LocalDate.h> |
13 | #include <common/LocalDateTime.h> |
14 | #include <common/StringRef.h> |
15 | #include <common/arithmeticOverflow.h> |
16 | |
17 | #include <Core/Types.h> |
18 | #include <Core/DecimalFunctions.h> |
19 | #include <Core/UUID.h> |
20 | |
21 | #include <Common/Exception.h> |
22 | #include <Common/StringUtils/StringUtils.h> |
23 | #include <Common/Arena.h> |
24 | #include <Common/UInt128.h> |
25 | #include <Common/intExp.h> |
26 | |
27 | #include <Formats/FormatSettings.h> |
28 | |
29 | #include <IO/CompressionMethod.h> |
30 | #include <IO/ReadBuffer.h> |
31 | #include <IO/ReadBufferFromMemory.h> |
32 | #include <IO/VarInt.h> |
33 | #include <IO/ZlibInflatingReadBuffer.h> |
34 | |
35 | #include <DataTypes/DataTypeDateTime.h> |
36 | |
37 | #ifdef __clang__ |
38 | #pragma clang diagnostic push |
39 | #pragma clang diagnostic ignored "-Wdouble-promotion" |
40 | #endif |
41 | |
42 | #include <double-conversion/double-conversion.h> |
43 | |
44 | #ifdef __clang__ |
45 | #pragma clang diagnostic pop |
46 | #endif |
47 | |
48 | |
49 | /// 1 GiB |
50 | #define DEFAULT_MAX_STRING_SIZE (1ULL << 30) |
51 | |
52 | |
53 | namespace DB |
54 | { |
55 | |
56 | namespace ErrorCodes |
57 | { |
58 | extern const int CANNOT_PARSE_DATE; |
59 | extern const int CANNOT_PARSE_DATETIME; |
60 | extern const int CANNOT_PARSE_UUID; |
61 | extern const int CANNOT_READ_ARRAY_FROM_TEXT; |
62 | extern const int CANNOT_PARSE_NUMBER; |
63 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
64 | } |
65 | |
66 | /// Helper functions for formatted input. |
67 | |
68 | inline char parseEscapeSequence(char c) |
69 | { |
70 | switch (c) |
71 | { |
72 | case 'a': |
73 | return '\a'; |
74 | case 'b': |
75 | return '\b'; |
76 | case 'e': |
77 | return '\x1B'; /// \e escape sequence is non standard for C and C++ but supported by gcc and clang. |
78 | case 'f': |
79 | return '\f'; |
80 | case 'n': |
81 | return '\n'; |
82 | case 'r': |
83 | return '\r'; |
84 | case 't': |
85 | return '\t'; |
86 | case 'v': |
87 | return '\v'; |
88 | case '0': |
89 | return '\0'; |
90 | default: |
91 | return c; |
92 | } |
93 | } |
94 | |
95 | |
96 | /// These functions are located in VarInt.h |
97 | /// inline void throwReadAfterEOF() |
98 | |
99 | |
100 | inline void readChar(char & x, ReadBuffer & buf) |
101 | { |
102 | if (!buf.eof()) |
103 | { |
104 | x = *buf.position(); |
105 | ++buf.position(); |
106 | } |
107 | else |
108 | throwReadAfterEOF(); |
109 | } |
110 | |
111 | |
112 | /// Read POD-type in native format |
113 | template <typename T> |
114 | inline void readPODBinary(T & x, ReadBuffer & buf) |
115 | { |
116 | buf.readStrict(reinterpret_cast<char *>(&x), sizeof(x)); |
117 | } |
118 | |
119 | template <typename T> |
120 | inline void readIntBinary(T & x, ReadBuffer & buf) |
121 | { |
122 | readPODBinary(x, buf); |
123 | } |
124 | |
125 | template <typename T> |
126 | inline void readFloatBinary(T & x, ReadBuffer & buf) |
127 | { |
128 | readPODBinary(x, buf); |
129 | } |
130 | |
131 | |
132 | inline void readStringBinary(std::string & s, ReadBuffer & buf, size_t MAX_STRING_SIZE = DEFAULT_MAX_STRING_SIZE) |
133 | { |
134 | size_t size = 0; |
135 | readVarUInt(size, buf); |
136 | |
137 | if (size > MAX_STRING_SIZE) |
138 | throw Poco::Exception("Too large string size." ); |
139 | |
140 | s.resize(size); |
141 | buf.readStrict(s.data(), size); |
142 | } |
143 | |
144 | |
145 | inline StringRef readStringBinaryInto(Arena & arena, ReadBuffer & buf) |
146 | { |
147 | size_t size = 0; |
148 | readVarUInt(size, buf); |
149 | |
150 | char * data = arena.alloc(size); |
151 | buf.readStrict(data, size); |
152 | |
153 | return StringRef(data, size); |
154 | } |
155 | |
156 | |
157 | template <typename T> |
158 | void readVectorBinary(std::vector<T> & v, ReadBuffer & buf, size_t MAX_VECTOR_SIZE = DEFAULT_MAX_STRING_SIZE) |
159 | { |
160 | size_t size = 0; |
161 | readVarUInt(size, buf); |
162 | |
163 | if (size > MAX_VECTOR_SIZE) |
164 | throw Poco::Exception("Too large vector size." ); |
165 | |
166 | v.resize(size); |
167 | for (size_t i = 0; i < size; ++i) |
168 | readBinary(v[i], buf); |
169 | } |
170 | |
171 | |
172 | void assertString(const char * s, ReadBuffer & buf); |
173 | void assertEOF(ReadBuffer & buf); |
174 | |
175 | [[noreturn]] void throwAtAssertionFailed(const char * s, ReadBuffer & buf); |
176 | |
177 | inline void assertChar(char symbol, ReadBuffer & buf) |
178 | { |
179 | if (buf.eof() || *buf.position() != symbol) |
180 | { |
181 | char err[2] = {symbol, '\0'}; |
182 | throwAtAssertionFailed(err, buf); |
183 | } |
184 | ++buf.position(); |
185 | } |
186 | |
187 | inline void assertString(const String & s, ReadBuffer & buf) |
188 | { |
189 | assertString(s.c_str(), buf); |
190 | } |
191 | |
192 | bool checkString(const char * s, ReadBuffer & buf); |
193 | inline bool checkString(const String & s, ReadBuffer & buf) |
194 | { |
195 | return checkString(s.c_str(), buf); |
196 | } |
197 | |
198 | inline bool checkChar(char c, ReadBuffer & buf) |
199 | { |
200 | if (buf.eof() || *buf.position() != c) |
201 | return false; |
202 | ++buf.position(); |
203 | return true; |
204 | } |
205 | |
206 | bool checkStringCaseInsensitive(const char * s, ReadBuffer & buf); |
207 | inline bool checkStringCaseInsensitive(const String & s, ReadBuffer & buf) |
208 | { |
209 | return checkStringCaseInsensitive(s.c_str(), buf); |
210 | } |
211 | |
212 | void assertStringCaseInsensitive(const char * s, ReadBuffer & buf); |
213 | inline void assertStringCaseInsensitive(const String & s, ReadBuffer & buf) |
214 | { |
215 | return assertStringCaseInsensitive(s.c_str(), buf); |
216 | } |
217 | |
218 | /** Check that next character in buf matches first character of s. |
219 | * If true, then check all characters in s and throw exception if it doesn't match. |
220 | * If false, then return false, and leave position in buffer unchanged. |
221 | */ |
222 | bool checkStringByFirstCharacterAndAssertTheRest(const char * s, ReadBuffer & buf); |
223 | bool checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(const char * s, ReadBuffer & buf); |
224 | |
225 | inline bool checkStringByFirstCharacterAndAssertTheRest(const String & s, ReadBuffer & buf) |
226 | { |
227 | return checkStringByFirstCharacterAndAssertTheRest(s.c_str(), buf); |
228 | } |
229 | |
230 | inline bool checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(const String & s, ReadBuffer & buf) |
231 | { |
232 | return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(s.c_str(), buf); |
233 | } |
234 | |
235 | |
236 | inline void readBoolText(bool & x, ReadBuffer & buf) |
237 | { |
238 | char tmp = '0'; |
239 | readChar(tmp, buf); |
240 | x = tmp != '0'; |
241 | } |
242 | |
243 | inline void readBoolTextWord(bool & x, ReadBuffer & buf) |
244 | { |
245 | if (buf.eof()) |
246 | throwReadAfterEOF(); |
247 | |
248 | if (*buf.position() == 't') |
249 | { |
250 | assertString("true" , buf); |
251 | x = true; |
252 | } |
253 | else |
254 | { |
255 | assertString("false" , buf); |
256 | x = false; |
257 | } |
258 | } |
259 | |
260 | enum class ReadIntTextCheckOverflow |
261 | { |
262 | DO_NOT_CHECK_OVERFLOW, |
263 | CHECK_OVERFLOW, |
264 | }; |
265 | |
266 | template <typename T, typename ReturnType = void, ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW> |
267 | ReturnType readIntTextImpl(T & x, ReadBuffer & buf) |
268 | { |
269 | static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; |
270 | |
271 | bool negative = false; |
272 | std::make_unsigned_t<T> res = 0; |
273 | if (buf.eof()) |
274 | { |
275 | if constexpr (throw_exception) |
276 | throwReadAfterEOF(); |
277 | else |
278 | return ReturnType(false); |
279 | } |
280 | |
281 | const size_t initial_pos = buf.count(); |
282 | while (!buf.eof()) |
283 | { |
284 | switch (*buf.position()) |
285 | { |
286 | case '+': |
287 | break; |
288 | case '-': |
289 | if constexpr (is_signed_v<T>) |
290 | negative = true; |
291 | else |
292 | { |
293 | if constexpr (throw_exception) |
294 | throw Exception("Unsigned type must not contain '-' symbol" , ErrorCodes::CANNOT_PARSE_NUMBER); |
295 | else |
296 | return ReturnType(false); |
297 | } |
298 | break; |
299 | case '0': [[fallthrough]]; |
300 | case '1': [[fallthrough]]; |
301 | case '2': [[fallthrough]]; |
302 | case '3': [[fallthrough]]; |
303 | case '4': [[fallthrough]]; |
304 | case '5': [[fallthrough]]; |
305 | case '6': [[fallthrough]]; |
306 | case '7': [[fallthrough]]; |
307 | case '8': [[fallthrough]]; |
308 | case '9': |
309 | if constexpr (check_overflow == ReadIntTextCheckOverflow::CHECK_OVERFLOW) |
310 | { |
311 | // perform relativelly slow overflow check only when number of decimal digits so far is close to the max for given type. |
312 | if (buf.count() - initial_pos >= std::numeric_limits<T>::max_digits10) |
313 | { |
314 | if (common::mulOverflow(res, static_cast<decltype(res)>(10), res) |
315 | || common::addOverflow(res, static_cast<decltype(res)>(*buf.position() - '0'), res)) |
316 | return ReturnType(false); |
317 | break; |
318 | } |
319 | } |
320 | res *= 10; |
321 | res += *buf.position() - '0'; |
322 | break; |
323 | default: |
324 | goto end; |
325 | } |
326 | ++buf.position(); |
327 | } |
328 | |
329 | end: |
330 | x = negative ? -res : res; |
331 | |
332 | return ReturnType(true); |
333 | } |
334 | |
335 | template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW, typename T> |
336 | void readIntText(T & x, ReadBuffer & buf) |
337 | { |
338 | readIntTextImpl<T, void, check_overflow>(x, buf); |
339 | } |
340 | |
341 | template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T> |
342 | bool tryReadIntText(T & x, ReadBuffer & buf) |
343 | { |
344 | return readIntTextImpl<T, bool, check_overflow>(x, buf); |
345 | } |
346 | |
347 | template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW, typename T> |
348 | void readIntText(Decimal<T> & x, ReadBuffer & buf) |
349 | { |
350 | readIntText<check_overflow>(x.value, buf); |
351 | } |
352 | |
353 | /** More efficient variant (about 1.5 times on real dataset). |
354 | * Differs in following: |
355 | * - for numbers starting with zero, parsed only zero; |
356 | * - symbol '+' before number is not supported; |
357 | * - symbols :;<=>? are parsed as some numbers. |
358 | */ |
359 | template <typename T, bool throw_on_error = true> |
360 | void readIntTextUnsafe(T & x, ReadBuffer & buf) |
361 | { |
362 | bool negative = false; |
363 | std::make_unsigned_t<T> res = 0; |
364 | |
365 | auto on_error = [] |
366 | { |
367 | if (throw_on_error) |
368 | throwReadAfterEOF(); |
369 | }; |
370 | |
371 | if (unlikely(buf.eof())) |
372 | return on_error(); |
373 | |
374 | if (is_signed_v<T> && *buf.position() == '-') |
375 | { |
376 | ++buf.position(); |
377 | negative = true; |
378 | if (unlikely(buf.eof())) |
379 | return on_error(); |
380 | } |
381 | |
382 | if (*buf.position() == '0') /// There are many zeros in real datasets. |
383 | { |
384 | ++buf.position(); |
385 | x = 0; |
386 | return; |
387 | } |
388 | |
389 | while (!buf.eof()) |
390 | { |
391 | /// This check is suddenly faster than |
392 | /// unsigned char c = *buf.position() - '0'; |
393 | /// if (c < 10) |
394 | /// for unknown reason on Xeon E5645. |
395 | |
396 | if ((*buf.position() & 0xF0) == 0x30) /// It makes sense to have this condition inside loop. |
397 | { |
398 | res *= 10; |
399 | res += *buf.position() & 0x0F; |
400 | ++buf.position(); |
401 | } |
402 | else |
403 | break; |
404 | } |
405 | |
406 | /// See note about undefined behaviour above. |
407 | x = is_signed_v<T> && negative ? -res : res; |
408 | } |
409 | |
410 | template <typename T> |
411 | void tryReadIntTextUnsafe(T & x, ReadBuffer & buf) |
412 | { |
413 | return readIntTextUnsafe<T, false>(x, buf); |
414 | } |
415 | |
416 | |
417 | /// Look at readFloatText.h |
418 | template <typename T> void readFloatText(T & x, ReadBuffer & in); |
419 | template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in); |
420 | |
421 | |
422 | /// simple: all until '\n' or '\t' |
423 | void readString(String & s, ReadBuffer & buf); |
424 | |
425 | void readEscapedString(String & s, ReadBuffer & buf); |
426 | |
427 | void readQuotedString(String & s, ReadBuffer & buf); |
428 | void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); |
429 | |
430 | void readDoubleQuotedString(String & s, ReadBuffer & buf); |
431 | void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); |
432 | |
433 | void readJSONString(String & s, ReadBuffer & buf); |
434 | |
435 | void readBackQuotedString(String & s, ReadBuffer & buf); |
436 | void readBackQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); |
437 | |
438 | void readStringUntilEOF(String & s, ReadBuffer & buf); |
439 | void readEscapedStringUntilEOL(String & s, ReadBuffer & buf); |
440 | |
441 | |
442 | /** Read string in CSV format. |
443 | * Parsing rules: |
444 | * - string could be placed in quotes; quotes could be single: ' if FormatSettings::CSV::allow_single_quotes is true |
445 | * or double: " if FormatSettings::CSV::allow_double_quotes is true; |
446 | * - or string could be unquoted - this is determined by first character; |
447 | * - if string is unquoted, then it is read until next delimiter, |
448 | * either until end of line (CR or LF), |
449 | * or until end of stream; |
450 | * but spaces and tabs at begin and end of unquoted string are consumed but ignored (note that this behaviour differs from RFC). |
451 | * - if string is in quotes, then it will be read until closing quote, |
452 | * but sequences of two consecutive quotes are parsed as single quote inside string; |
453 | */ |
454 | void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings); |
455 | |
456 | |
457 | /// Read and append result to array of characters. |
458 | template <typename Vector> |
459 | void readStringInto(Vector & s, ReadBuffer & buf); |
460 | |
461 | template <typename Vector> |
462 | void readNullTerminated(Vector & s, ReadBuffer & buf); |
463 | |
464 | template <typename Vector> |
465 | void readEscapedStringInto(Vector & s, ReadBuffer & buf); |
466 | |
467 | template <bool enable_sql_style_quoting, typename Vector> |
468 | void readQuotedStringInto(Vector & s, ReadBuffer & buf); |
469 | |
470 | template <bool enable_sql_style_quoting, typename Vector> |
471 | void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf); |
472 | |
473 | template <bool enable_sql_style_quoting, typename Vector> |
474 | void readBackQuotedStringInto(Vector & s, ReadBuffer & buf); |
475 | |
476 | template <typename Vector> |
477 | void readStringUntilEOFInto(Vector & s, ReadBuffer & buf); |
478 | |
479 | template <typename Vector> |
480 | void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings); |
481 | |
482 | /// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception. |
483 | template <typename Vector, typename ReturnType = void> |
484 | ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf); |
485 | |
486 | template <typename Vector> |
487 | bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf) |
488 | { |
489 | return readJSONStringInto<Vector, bool>(s, buf); |
490 | } |
491 | |
492 | /// This could be used as template parameter for functions above, if you want to just skip data. |
493 | struct NullSink |
494 | { |
495 | void append(const char *, size_t) {} |
496 | void push_back(char) {} |
497 | }; |
498 | |
499 | void parseUUID(const UInt8 * src36, UInt8 * dst16); |
500 | void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16); |
501 | |
502 | template <typename IteratorSrc, typename IteratorDst> |
503 | void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes); |
504 | |
505 | |
506 | template <typename ReturnType> |
507 | ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf); |
508 | |
509 | /// In YYYY-MM-DD format. |
510 | /// For convenience, Month and Day parts can have single digit instead of two digits. |
511 | /// Any separators other than '-' are supported. |
512 | template <typename ReturnType = void> |
513 | inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) |
514 | { |
515 | /// Optimistic path, when whole value is in buffer. |
516 | if (buf.position() + 10 <= buf.buffer().end()) |
517 | { |
518 | UInt16 year = (buf.position()[0] - '0') * 1000 + (buf.position()[1] - '0') * 100 + (buf.position()[2] - '0') * 10 + (buf.position()[3] - '0'); |
519 | buf.position() += 5; |
520 | |
521 | UInt8 month = buf.position()[0] - '0'; |
522 | if (isNumericASCII(buf.position()[1])) |
523 | { |
524 | month = month * 10 + buf.position()[1] - '0'; |
525 | buf.position() += 3; |
526 | } |
527 | else |
528 | buf.position() += 2; |
529 | |
530 | UInt8 day = buf.position()[0] - '0'; |
531 | if (isNumericASCII(buf.position()[1])) |
532 | { |
533 | day = day * 10 + buf.position()[1] - '0'; |
534 | buf.position() += 2; |
535 | } |
536 | else |
537 | buf.position() += 1; |
538 | |
539 | date = LocalDate(year, month, day); |
540 | return ReturnType(true); |
541 | } |
542 | else |
543 | return readDateTextFallback<ReturnType>(date, buf); |
544 | } |
545 | |
546 | template <typename ReturnType = void> |
547 | inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) |
548 | { |
549 | static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; |
550 | |
551 | LocalDate local_date; |
552 | |
553 | if constexpr (throw_exception) |
554 | readDateTextImpl<ReturnType>(local_date, buf); |
555 | else if (!readDateTextImpl<ReturnType>(local_date, buf)) |
556 | return false; |
557 | |
558 | date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day()); |
559 | return ReturnType(true); |
560 | } |
561 | |
562 | |
563 | inline void readDateText(LocalDate & date, ReadBuffer & buf) |
564 | { |
565 | readDateTextImpl<void>(date, buf); |
566 | } |
567 | |
568 | inline void readDateText(DayNum & date, ReadBuffer & buf) |
569 | { |
570 | readDateTextImpl<void>(date, buf); |
571 | } |
572 | |
573 | inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf) |
574 | { |
575 | return readDateTextImpl<bool>(date, buf); |
576 | } |
577 | |
578 | inline bool tryReadDateText(DayNum & date, ReadBuffer & buf) |
579 | { |
580 | return readDateTextImpl<bool>(date, buf); |
581 | } |
582 | |
583 | |
584 | inline void readUUIDText(UUID & uuid, ReadBuffer & buf) |
585 | { |
586 | char s[36]; |
587 | size_t size = buf.read(s, 36); |
588 | |
589 | if (size != 36) |
590 | { |
591 | s[size] = 0; |
592 | throw Exception(std::string("Cannot parse uuid " ) + s, ErrorCodes::CANNOT_PARSE_UUID); |
593 | } |
594 | |
595 | parseUUID(reinterpret_cast<const UInt8 *>(s), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16)); |
596 | } |
597 | |
598 | |
599 | template <typename T> |
600 | inline T parse(const char * data, size_t size); |
601 | |
602 | template <typename T> |
603 | inline T parseFromString(const String & str) |
604 | { |
605 | return parse<T>(str.data(), str.size()); |
606 | } |
607 | |
608 | #pragma GCC diagnostic push |
609 | #pragma GCC diagnostic ignored "-Wredundant-decls" |
610 | // Just dont mess with it. If the redundant redeclaration is removed then ReaderHelpers.h should be included. |
611 | // This leads to Arena.h inclusion which has a problem with ASAN stuff included properly and messing macro definition |
612 | // which intefrers with... You dont want to know, really. |
613 | UInt128 stringToUUID(const String & str); |
614 | #pragma GCC diagnostic pop |
615 | |
616 | template <typename ReturnType = void> |
617 | ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut); |
618 | |
619 | /** In YYYY-MM-DD hh:mm:ss format, according to specified time zone. |
620 | * As an exception, also supported parsing of unix timestamp in form of decimal number. |
621 | */ |
622 | template <typename ReturnType = void> |
623 | inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) |
624 | { |
625 | /** Read 10 characters, that could represent unix timestamp. |
626 | * Only unix timestamp of 5-10 characters is supported. |
627 | * Then look at 5th character. If it is a number - treat whole as unix timestamp. |
628 | * If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss format. |
629 | */ |
630 | |
631 | /// Optimistic path, when whole value is in buffer. |
632 | const char * s = buf.position(); |
633 | if (s + 19 <= buf.buffer().end()) |
634 | { |
635 | if (s[4] < '0' || s[4] > '9') |
636 | { |
637 | UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); |
638 | UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); |
639 | UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); |
640 | |
641 | UInt8 hour = (s[11] - '0') * 10 + (s[12] - '0'); |
642 | UInt8 minute = (s[14] - '0') * 10 + (s[15] - '0'); |
643 | UInt8 second = (s[17] - '0') * 10 + (s[18] - '0'); |
644 | |
645 | if (unlikely(year == 0)) |
646 | datetime = 0; |
647 | else |
648 | datetime = date_lut.makeDateTime(year, month, day, hour, minute, second); |
649 | |
650 | buf.position() += 19; |
651 | return ReturnType(true); |
652 | } |
653 | else |
654 | /// Why not readIntTextUnsafe? Because for needs of AdFox, parsing of unix timestamp with leading zeros is supported: 000...NNNN. |
655 | return readIntTextImpl<time_t, ReturnType, ReadIntTextCheckOverflow::CHECK_OVERFLOW>(datetime, buf); |
656 | } |
657 | else |
658 | return readDateTimeTextFallback<ReturnType>(datetime, buf, date_lut); |
659 | } |
660 | |
661 | template <typename ReturnType> |
662 | inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut) |
663 | { |
664 | time_t whole; |
665 | if (!readDateTimeTextImpl<bool>(whole, buf, date_lut)) |
666 | { |
667 | return ReturnType(false); |
668 | } |
669 | |
670 | DB::DecimalUtils::DecimalComponents<DateTime64::NativeType> c{static_cast<DateTime64::NativeType>(whole), 0}; |
671 | |
672 | if (!buf.eof() && *buf.position() == '.') |
673 | { |
674 | buf.ignore(1); // skip separator |
675 | const auto pos_before_fractional = buf.count(); |
676 | if (!tryReadIntText<ReadIntTextCheckOverflow::CHECK_OVERFLOW>(c.fractional, buf)) |
677 | { |
678 | return ReturnType(false); |
679 | } |
680 | |
681 | // Adjust fractional part to the scale, since decimalFromComponents knows nothing |
682 | // about convention of ommiting trailing zero on fractional part |
683 | // and assumes that fractional part value is less than 10^scale. |
684 | |
685 | // If scale is 3, but we read '12', promote fractional part to '120'. |
686 | // And vice versa: if we read '1234', denote it to '123'. |
687 | const auto fractional_length = static_cast<Int32>(buf.count() - pos_before_fractional); |
688 | if (const auto adjust_scale = static_cast<Int32>(scale) - fractional_length; adjust_scale > 0) |
689 | { |
690 | c.fractional *= common::exp10_i64(adjust_scale); |
691 | } |
692 | else if (adjust_scale < 0) |
693 | { |
694 | c.fractional /= common::exp10_i64(-1 * adjust_scale); |
695 | } |
696 | } |
697 | |
698 | datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(c, scale); |
699 | |
700 | return ReturnType(true); |
701 | } |
702 | |
703 | inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) |
704 | { |
705 | readDateTimeTextImpl<void>(datetime, buf, date_lut); |
706 | } |
707 | |
708 | inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) |
709 | { |
710 | readDateTimeTextImpl<void>(datetime64, scale, buf, date_lut); |
711 | } |
712 | |
713 | inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) |
714 | { |
715 | return readDateTimeTextImpl<bool>(datetime, buf, date_lut); |
716 | } |
717 | |
718 | inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) |
719 | { |
720 | return readDateTimeTextImpl<bool>(datetime64, scale, buf, date_lut); |
721 | } |
722 | |
723 | inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf) |
724 | { |
725 | char s[19]; |
726 | size_t size = buf.read(s, 19); |
727 | if (19 != size) |
728 | { |
729 | s[size] = 0; |
730 | throw Exception(std::string("Cannot parse datetime " ) + s, ErrorCodes::CANNOT_PARSE_DATETIME); |
731 | } |
732 | |
733 | datetime.year((s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0')); |
734 | datetime.month((s[5] - '0') * 10 + (s[6] - '0')); |
735 | datetime.day((s[8] - '0') * 10 + (s[9] - '0')); |
736 | |
737 | datetime.hour((s[11] - '0') * 10 + (s[12] - '0')); |
738 | datetime.minute((s[14] - '0') * 10 + (s[15] - '0')); |
739 | datetime.second((s[17] - '0') * 10 + (s[18] - '0')); |
740 | } |
741 | |
742 | |
743 | /// Generic methods to read value in native binary format. |
744 | template <typename T> |
745 | inline std::enable_if_t<is_arithmetic_v<T>, void> |
746 | readBinary(T & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
747 | |
748 | inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); } |
749 | inline void readBinary(Int128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
750 | inline void readBinary(UInt128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
751 | inline void readBinary(UInt256 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
752 | inline void readBinary(Decimal32 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
753 | inline void readBinary(Decimal64 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
754 | inline void readBinary(Decimal128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
755 | inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
756 | |
757 | |
758 | /// Generic methods to read value in text tab-separated format. |
759 | template <typename T> |
760 | inline std::enable_if_t<is_integral_v<T>, void> |
761 | readText(T & x, ReadBuffer & buf) { readIntText(x, buf); } |
762 | |
763 | template <typename T> |
764 | inline std::enable_if_t<std::is_floating_point_v<T>, void> |
765 | readText(T & x, ReadBuffer & buf) { readFloatText(x, buf); } |
766 | |
767 | inline void readText(bool & x, ReadBuffer & buf) { readBoolText(x, buf); } |
768 | inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } |
769 | inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } |
770 | inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } |
771 | inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); } |
772 | [[noreturn]] inline void readText(UInt128 &, ReadBuffer &) |
773 | { |
774 | /** Because UInt128 isn't a natural type, without arithmetic operator and only use as an intermediary type -for UUID- |
775 | * it should never arrive here. But because we used the DataTypeNumber class we should have at least a definition of it. |
776 | */ |
777 | throw Exception("UInt128 cannot be read as a text" , ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
778 | } |
779 | |
780 | /// Generic methods to read value in text format, |
781 | /// possibly in single quotes (only for data types that use quotes in VALUES format of INSERT statement in SQL). |
782 | template <typename T> |
783 | inline std::enable_if_t<is_arithmetic_v<T>, void> |
784 | readQuoted(T & x, ReadBuffer & buf) { readText(x, buf); } |
785 | |
786 | inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); } |
787 | |
788 | inline void readQuoted(LocalDate & x, ReadBuffer & buf) |
789 | { |
790 | assertChar('\'', buf); |
791 | readDateText(x, buf); |
792 | assertChar('\'', buf); |
793 | } |
794 | |
795 | inline void readQuoted(LocalDateTime & x, ReadBuffer & buf) |
796 | { |
797 | assertChar('\'', buf); |
798 | readDateTimeText(x, buf); |
799 | assertChar('\'', buf); |
800 | } |
801 | |
802 | |
803 | /// Same as above, but in double quotes. |
804 | template <typename T> |
805 | inline std::enable_if_t<is_arithmetic_v<T>, void> |
806 | readDoubleQuoted(T & x, ReadBuffer & buf) { readText(x, buf); } |
807 | |
808 | inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); } |
809 | |
810 | inline void readDoubleQuoted(LocalDate & x, ReadBuffer & buf) |
811 | { |
812 | assertChar('"', buf); |
813 | readDateText(x, buf); |
814 | assertChar('"', buf); |
815 | } |
816 | |
817 | inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf) |
818 | { |
819 | assertChar('"', buf); |
820 | readDateTimeText(x, buf); |
821 | assertChar('"', buf); |
822 | } |
823 | |
824 | |
825 | /// CSV, for numbers, dates: quotes are optional, no special escaping rules. |
826 | template <typename T> |
827 | inline void readCSVSimple(T & x, ReadBuffer & buf) |
828 | { |
829 | if (buf.eof()) |
830 | throwReadAfterEOF(); |
831 | |
832 | char maybe_quote = *buf.position(); |
833 | |
834 | if (maybe_quote == '\'' || maybe_quote == '\"') |
835 | ++buf.position(); |
836 | |
837 | readText(x, buf); |
838 | |
839 | if (maybe_quote == '\'' || maybe_quote == '\"') |
840 | assertChar(maybe_quote, buf); |
841 | } |
842 | |
843 | template <typename T> |
844 | inline std::enable_if_t<is_arithmetic_v<T>, void> |
845 | readCSV(T & x, ReadBuffer & buf) { readCSVSimple(x, buf); } |
846 | |
847 | inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); } |
848 | inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } |
849 | inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); } |
850 | inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); } |
851 | [[noreturn]] inline void readCSV(UInt128 &, ReadBuffer &) |
852 | { |
853 | /** Because UInt128 isn't a natural type, without arithmetic operator and only use as an intermediary type -for UUID- |
854 | * it should never arrive here. But because we used the DataTypeNumber class we should have at least a definition of it. |
855 | */ |
856 | throw Exception("UInt128 cannot be read as a text" , ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
857 | } |
858 | |
859 | template <typename T> |
860 | void readBinary(std::vector<T> & x, ReadBuffer & buf) |
861 | { |
862 | size_t size = 0; |
863 | readVarUInt(size, buf); |
864 | |
865 | if (size > DEFAULT_MAX_STRING_SIZE) |
866 | throw Poco::Exception("Too large vector size." ); |
867 | |
868 | x.resize(size); |
869 | for (size_t i = 0; i < size; ++i) |
870 | readBinary(x[i], buf); |
871 | } |
872 | |
873 | template <typename T> |
874 | void readQuoted(std::vector<T> & x, ReadBuffer & buf) |
875 | { |
876 | bool first = true; |
877 | assertChar('[', buf); |
878 | while (!buf.eof() && *buf.position() != ']') |
879 | { |
880 | if (!first) |
881 | { |
882 | if (*buf.position() == ',') |
883 | ++buf.position(); |
884 | else |
885 | throw Exception("Cannot read array from text" , ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT); |
886 | } |
887 | |
888 | first = false; |
889 | |
890 | x.push_back(T()); |
891 | readQuoted(x.back(), buf); |
892 | } |
893 | assertChar(']', buf); |
894 | } |
895 | |
896 | template <typename T> |
897 | void readDoubleQuoted(std::vector<T> & x, ReadBuffer & buf) |
898 | { |
899 | bool first = true; |
900 | assertChar('[', buf); |
901 | while (!buf.eof() && *buf.position() != ']') |
902 | { |
903 | if (!first) |
904 | { |
905 | if (*buf.position() == ',') |
906 | ++buf.position(); |
907 | else |
908 | throw Exception("Cannot read array from text" , ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT); |
909 | } |
910 | |
911 | first = false; |
912 | |
913 | x.push_back(T()); |
914 | readDoubleQuoted(x.back(), buf); |
915 | } |
916 | assertChar(']', buf); |
917 | } |
918 | |
919 | template <typename T> |
920 | void readText(std::vector<T> & x, ReadBuffer & buf) |
921 | { |
922 | readQuoted(x, buf); |
923 | } |
924 | |
925 | |
926 | /// Skip whitespace characters. |
927 | inline void skipWhitespaceIfAny(ReadBuffer & buf) |
928 | { |
929 | while (!buf.eof() && isWhitespaceASCII(*buf.position())) |
930 | ++buf.position(); |
931 | } |
932 | |
933 | /// Skips json value. |
934 | void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field); |
935 | |
936 | |
937 | /** Read serialized exception. |
938 | * During serialization/deserialization some information is lost |
939 | * (type is cut to base class, 'message' replaced by 'displayText', and stack trace is appended to 'message') |
940 | * Some additional message could be appended to exception (example: you could add information about from where it was received). |
941 | */ |
942 | void readException(Exception & e, ReadBuffer & buf, const String & additional_message = "" ); |
943 | void readAndThrowException(ReadBuffer & buf, const String & additional_message = "" ); |
944 | |
945 | |
946 | /** Helper function for implementation. |
947 | */ |
948 | template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T> |
949 | static inline const char * tryReadIntText(T & x, const char * pos, const char * end) |
950 | { |
951 | ReadBufferFromMemory in(pos, end - pos); |
952 | tryReadIntText<check_overflow>(x, in); |
953 | return pos + in.count(); |
954 | } |
955 | |
956 | |
957 | /// Convenient methods for reading something from string in text format. |
958 | template <typename T> |
959 | inline T parse(const char * data, size_t size) |
960 | { |
961 | T res; |
962 | ReadBufferFromMemory buf(data, size); |
963 | readText(res, buf); |
964 | return res; |
965 | } |
966 | |
967 | /// Read something from text format, but expect complete parse of given text |
968 | /// For example: 723145 -- ok, 213MB -- not ok |
969 | template <typename T> |
970 | inline T completeParse(const char * data, size_t size) |
971 | { |
972 | T res; |
973 | ReadBufferFromMemory buf(data, size); |
974 | readText(res, buf); |
975 | assertEOF(buf); |
976 | return res; |
977 | } |
978 | |
979 | template <typename T> |
980 | inline T completeParse(const String & s) |
981 | { |
982 | return completeParse<T>(s.data(), s.size()); |
983 | } |
984 | |
985 | template <typename T> |
986 | inline T completeParse(const char * data) |
987 | { |
988 | return completeParse<T>(data, strlen(data)); |
989 | } |
990 | |
991 | template <typename T> |
992 | inline T parse(const char * data) |
993 | { |
994 | return parse<T>(data, strlen(data)); |
995 | } |
996 | |
997 | template <typename T> |
998 | inline T parse(const String & s) |
999 | { |
1000 | return parse<T>(s.data(), s.size()); |
1001 | } |
1002 | |
1003 | |
1004 | /** Skip UTF-8 BOM if it is under cursor. |
1005 | * As BOM is usually located at start of stream, and buffer size is usually larger than three bytes, |
1006 | * the function expects, that all three bytes of BOM is fully in buffer (otherwise it don't skip anything). |
1007 | */ |
1008 | inline void skipBOMIfExists(ReadBuffer & buf) |
1009 | { |
1010 | if (!buf.eof() |
1011 | && buf.position() + 3 < buf.buffer().end() |
1012 | && buf.position()[0] == '\xEF' |
1013 | && buf.position()[1] == '\xBB' |
1014 | && buf.position()[2] == '\xBF') |
1015 | { |
1016 | buf.position() += 3; |
1017 | } |
1018 | } |
1019 | |
1020 | |
1021 | /// Skip to next character after next \n. If no \n in stream, skip to end. |
1022 | void skipToNextLineOrEOF(ReadBuffer & buf); |
1023 | |
1024 | /// Skip to next character after next unescaped \n. If no \n in stream, skip to end. Does not throw on invalid escape sequences. |
1025 | void skipToUnescapedNextLineOrEOF(ReadBuffer & buf); |
1026 | |
1027 | template <class TReadBuffer, class... Types> |
1028 | std::unique_ptr<ReadBuffer> getReadBuffer(const DB::CompressionMethod method, Types&&... args) |
1029 | { |
1030 | if (method == DB::CompressionMethod::Gzip) |
1031 | { |
1032 | auto read_buf = std::make_unique<TReadBuffer>(std::forward<Types>(args)...); |
1033 | return std::make_unique<ZlibInflatingReadBuffer>(std::move(read_buf), method); |
1034 | } |
1035 | return std::make_unique<TReadBuffer>(args...); |
1036 | } |
1037 | |
1038 | /** This function just copies the data from buffer's internal position (in.position()) |
1039 | * to current position (from arguments) into memory. |
1040 | */ |
1041 | void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current); |
1042 | |
1043 | /** This function is negative to eof(). |
1044 | * In fact it returns whether the data was loaded to internal ReadBuffers's buffer or not. |
1045 | * And saves data from buffer's position to current if there is no pending data in buffer. |
1046 | * Why we have to use this strange function? Consider we have buffer's internal position in the middle |
1047 | * of our buffer and the current cursor in the end of the buffer. When we call eof() it calls next(). |
1048 | * And this function can fill the buffer with new data, so we will lose the data from previous buffer state. |
1049 | */ |
1050 | bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current); |
1051 | |
1052 | } |
1053 | |