| 1 | #pragma once |
| 2 | |
| 3 | #include <cmath> |
| 4 | #include <cstring> |
| 5 | #include <limits> |
| 6 | #include <algorithm> |
| 7 | #include <iterator> |
| 8 | |
| 9 | #include <type_traits> |
| 10 | |
| 11 | #include <common/DateLUT.h> |
| 12 | #include <common/LocalDate.h> |
| 13 | #include <common/LocalDateTime.h> |
| 14 | #include <common/StringRef.h> |
| 15 | #include <common/arithmeticOverflow.h> |
| 16 | |
| 17 | #include <Core/Types.h> |
| 18 | #include <Core/DecimalFunctions.h> |
| 19 | #include <Core/UUID.h> |
| 20 | |
| 21 | #include <Common/Exception.h> |
| 22 | #include <Common/StringUtils/StringUtils.h> |
| 23 | #include <Common/Arena.h> |
| 24 | #include <Common/UInt128.h> |
| 25 | #include <Common/intExp.h> |
| 26 | |
| 27 | #include <Formats/FormatSettings.h> |
| 28 | |
| 29 | #include <IO/CompressionMethod.h> |
| 30 | #include <IO/ReadBuffer.h> |
| 31 | #include <IO/ReadBufferFromMemory.h> |
| 32 | #include <IO/VarInt.h> |
| 33 | #include <IO/ZlibInflatingReadBuffer.h> |
| 34 | |
| 35 | #include <DataTypes/DataTypeDateTime.h> |
| 36 | |
| 37 | #ifdef __clang__ |
| 38 | #pragma clang diagnostic push |
| 39 | #pragma clang diagnostic ignored "-Wdouble-promotion" |
| 40 | #endif |
| 41 | |
| 42 | #include <double-conversion/double-conversion.h> |
| 43 | |
| 44 | #ifdef __clang__ |
| 45 | #pragma clang diagnostic pop |
| 46 | #endif |
| 47 | |
| 48 | |
| 49 | /// 1 GiB |
| 50 | #define DEFAULT_MAX_STRING_SIZE (1ULL << 30) |
| 51 | |
| 52 | |
| 53 | namespace DB |
| 54 | { |
| 55 | |
| 56 | namespace ErrorCodes |
| 57 | { |
| 58 | extern const int CANNOT_PARSE_DATE; |
| 59 | extern const int CANNOT_PARSE_DATETIME; |
| 60 | extern const int CANNOT_PARSE_UUID; |
| 61 | extern const int CANNOT_READ_ARRAY_FROM_TEXT; |
| 62 | extern const int CANNOT_PARSE_NUMBER; |
| 63 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
| 64 | } |
| 65 | |
| 66 | /// Helper functions for formatted input. |
| 67 | |
| 68 | inline char parseEscapeSequence(char c) |
| 69 | { |
| 70 | switch (c) |
| 71 | { |
| 72 | case 'a': |
| 73 | return '\a'; |
| 74 | case 'b': |
| 75 | return '\b'; |
| 76 | case 'e': |
| 77 | return '\x1B'; /// \e escape sequence is non standard for C and C++ but supported by gcc and clang. |
| 78 | case 'f': |
| 79 | return '\f'; |
| 80 | case 'n': |
| 81 | return '\n'; |
| 82 | case 'r': |
| 83 | return '\r'; |
| 84 | case 't': |
| 85 | return '\t'; |
| 86 | case 'v': |
| 87 | return '\v'; |
| 88 | case '0': |
| 89 | return '\0'; |
| 90 | default: |
| 91 | return c; |
| 92 | } |
| 93 | } |
| 94 | |
| 95 | |
| 96 | /// These functions are located in VarInt.h |
| 97 | /// inline void throwReadAfterEOF() |
| 98 | |
| 99 | |
| 100 | inline void readChar(char & x, ReadBuffer & buf) |
| 101 | { |
| 102 | if (!buf.eof()) |
| 103 | { |
| 104 | x = *buf.position(); |
| 105 | ++buf.position(); |
| 106 | } |
| 107 | else |
| 108 | throwReadAfterEOF(); |
| 109 | } |
| 110 | |
| 111 | |
| 112 | /// Read POD-type in native format |
| 113 | template <typename T> |
| 114 | inline void readPODBinary(T & x, ReadBuffer & buf) |
| 115 | { |
| 116 | buf.readStrict(reinterpret_cast<char *>(&x), sizeof(x)); |
| 117 | } |
| 118 | |
| 119 | template <typename T> |
| 120 | inline void readIntBinary(T & x, ReadBuffer & buf) |
| 121 | { |
| 122 | readPODBinary(x, buf); |
| 123 | } |
| 124 | |
| 125 | template <typename T> |
| 126 | inline void readFloatBinary(T & x, ReadBuffer & buf) |
| 127 | { |
| 128 | readPODBinary(x, buf); |
| 129 | } |
| 130 | |
| 131 | |
| 132 | inline void readStringBinary(std::string & s, ReadBuffer & buf, size_t MAX_STRING_SIZE = DEFAULT_MAX_STRING_SIZE) |
| 133 | { |
| 134 | size_t size = 0; |
| 135 | readVarUInt(size, buf); |
| 136 | |
| 137 | if (size > MAX_STRING_SIZE) |
| 138 | throw Poco::Exception("Too large string size." ); |
| 139 | |
| 140 | s.resize(size); |
| 141 | buf.readStrict(s.data(), size); |
| 142 | } |
| 143 | |
| 144 | |
| 145 | inline StringRef readStringBinaryInto(Arena & arena, ReadBuffer & buf) |
| 146 | { |
| 147 | size_t size = 0; |
| 148 | readVarUInt(size, buf); |
| 149 | |
| 150 | char * data = arena.alloc(size); |
| 151 | buf.readStrict(data, size); |
| 152 | |
| 153 | return StringRef(data, size); |
| 154 | } |
| 155 | |
| 156 | |
| 157 | template <typename T> |
| 158 | void readVectorBinary(std::vector<T> & v, ReadBuffer & buf, size_t MAX_VECTOR_SIZE = DEFAULT_MAX_STRING_SIZE) |
| 159 | { |
| 160 | size_t size = 0; |
| 161 | readVarUInt(size, buf); |
| 162 | |
| 163 | if (size > MAX_VECTOR_SIZE) |
| 164 | throw Poco::Exception("Too large vector size." ); |
| 165 | |
| 166 | v.resize(size); |
| 167 | for (size_t i = 0; i < size; ++i) |
| 168 | readBinary(v[i], buf); |
| 169 | } |
| 170 | |
| 171 | |
| 172 | void assertString(const char * s, ReadBuffer & buf); |
| 173 | void assertEOF(ReadBuffer & buf); |
| 174 | |
| 175 | [[noreturn]] void throwAtAssertionFailed(const char * s, ReadBuffer & buf); |
| 176 | |
| 177 | inline void assertChar(char symbol, ReadBuffer & buf) |
| 178 | { |
| 179 | if (buf.eof() || *buf.position() != symbol) |
| 180 | { |
| 181 | char err[2] = {symbol, '\0'}; |
| 182 | throwAtAssertionFailed(err, buf); |
| 183 | } |
| 184 | ++buf.position(); |
| 185 | } |
| 186 | |
| 187 | inline void assertString(const String & s, ReadBuffer & buf) |
| 188 | { |
| 189 | assertString(s.c_str(), buf); |
| 190 | } |
| 191 | |
| 192 | bool checkString(const char * s, ReadBuffer & buf); |
| 193 | inline bool checkString(const String & s, ReadBuffer & buf) |
| 194 | { |
| 195 | return checkString(s.c_str(), buf); |
| 196 | } |
| 197 | |
| 198 | inline bool checkChar(char c, ReadBuffer & buf) |
| 199 | { |
| 200 | if (buf.eof() || *buf.position() != c) |
| 201 | return false; |
| 202 | ++buf.position(); |
| 203 | return true; |
| 204 | } |
| 205 | |
| 206 | bool checkStringCaseInsensitive(const char * s, ReadBuffer & buf); |
| 207 | inline bool checkStringCaseInsensitive(const String & s, ReadBuffer & buf) |
| 208 | { |
| 209 | return checkStringCaseInsensitive(s.c_str(), buf); |
| 210 | } |
| 211 | |
| 212 | void assertStringCaseInsensitive(const char * s, ReadBuffer & buf); |
| 213 | inline void assertStringCaseInsensitive(const String & s, ReadBuffer & buf) |
| 214 | { |
| 215 | return assertStringCaseInsensitive(s.c_str(), buf); |
| 216 | } |
| 217 | |
| 218 | /** Check that next character in buf matches first character of s. |
| 219 | * If true, then check all characters in s and throw exception if it doesn't match. |
| 220 | * If false, then return false, and leave position in buffer unchanged. |
| 221 | */ |
| 222 | bool checkStringByFirstCharacterAndAssertTheRest(const char * s, ReadBuffer & buf); |
| 223 | bool checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(const char * s, ReadBuffer & buf); |
| 224 | |
| 225 | inline bool checkStringByFirstCharacterAndAssertTheRest(const String & s, ReadBuffer & buf) |
| 226 | { |
| 227 | return checkStringByFirstCharacterAndAssertTheRest(s.c_str(), buf); |
| 228 | } |
| 229 | |
| 230 | inline bool checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(const String & s, ReadBuffer & buf) |
| 231 | { |
| 232 | return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(s.c_str(), buf); |
| 233 | } |
| 234 | |
| 235 | |
| 236 | inline void readBoolText(bool & x, ReadBuffer & buf) |
| 237 | { |
| 238 | char tmp = '0'; |
| 239 | readChar(tmp, buf); |
| 240 | x = tmp != '0'; |
| 241 | } |
| 242 | |
| 243 | inline void readBoolTextWord(bool & x, ReadBuffer & buf) |
| 244 | { |
| 245 | if (buf.eof()) |
| 246 | throwReadAfterEOF(); |
| 247 | |
| 248 | if (*buf.position() == 't') |
| 249 | { |
| 250 | assertString("true" , buf); |
| 251 | x = true; |
| 252 | } |
| 253 | else |
| 254 | { |
| 255 | assertString("false" , buf); |
| 256 | x = false; |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | enum class ReadIntTextCheckOverflow |
| 261 | { |
| 262 | DO_NOT_CHECK_OVERFLOW, |
| 263 | CHECK_OVERFLOW, |
| 264 | }; |
| 265 | |
| 266 | template <typename T, typename ReturnType = void, ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW> |
| 267 | ReturnType readIntTextImpl(T & x, ReadBuffer & buf) |
| 268 | { |
| 269 | static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; |
| 270 | |
| 271 | bool negative = false; |
| 272 | std::make_unsigned_t<T> res = 0; |
| 273 | if (buf.eof()) |
| 274 | { |
| 275 | if constexpr (throw_exception) |
| 276 | throwReadAfterEOF(); |
| 277 | else |
| 278 | return ReturnType(false); |
| 279 | } |
| 280 | |
| 281 | const size_t initial_pos = buf.count(); |
| 282 | while (!buf.eof()) |
| 283 | { |
| 284 | switch (*buf.position()) |
| 285 | { |
| 286 | case '+': |
| 287 | break; |
| 288 | case '-': |
| 289 | if constexpr (is_signed_v<T>) |
| 290 | negative = true; |
| 291 | else |
| 292 | { |
| 293 | if constexpr (throw_exception) |
| 294 | throw Exception("Unsigned type must not contain '-' symbol" , ErrorCodes::CANNOT_PARSE_NUMBER); |
| 295 | else |
| 296 | return ReturnType(false); |
| 297 | } |
| 298 | break; |
| 299 | case '0': [[fallthrough]]; |
| 300 | case '1': [[fallthrough]]; |
| 301 | case '2': [[fallthrough]]; |
| 302 | case '3': [[fallthrough]]; |
| 303 | case '4': [[fallthrough]]; |
| 304 | case '5': [[fallthrough]]; |
| 305 | case '6': [[fallthrough]]; |
| 306 | case '7': [[fallthrough]]; |
| 307 | case '8': [[fallthrough]]; |
| 308 | case '9': |
| 309 | if constexpr (check_overflow == ReadIntTextCheckOverflow::CHECK_OVERFLOW) |
| 310 | { |
| 311 | // perform relativelly slow overflow check only when number of decimal digits so far is close to the max for given type. |
| 312 | if (buf.count() - initial_pos >= std::numeric_limits<T>::max_digits10) |
| 313 | { |
| 314 | if (common::mulOverflow(res, static_cast<decltype(res)>(10), res) |
| 315 | || common::addOverflow(res, static_cast<decltype(res)>(*buf.position() - '0'), res)) |
| 316 | return ReturnType(false); |
| 317 | break; |
| 318 | } |
| 319 | } |
| 320 | res *= 10; |
| 321 | res += *buf.position() - '0'; |
| 322 | break; |
| 323 | default: |
| 324 | goto end; |
| 325 | } |
| 326 | ++buf.position(); |
| 327 | } |
| 328 | |
| 329 | end: |
| 330 | x = negative ? -res : res; |
| 331 | |
| 332 | return ReturnType(true); |
| 333 | } |
| 334 | |
| 335 | template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW, typename T> |
| 336 | void readIntText(T & x, ReadBuffer & buf) |
| 337 | { |
| 338 | readIntTextImpl<T, void, check_overflow>(x, buf); |
| 339 | } |
| 340 | |
| 341 | template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T> |
| 342 | bool tryReadIntText(T & x, ReadBuffer & buf) |
| 343 | { |
| 344 | return readIntTextImpl<T, bool, check_overflow>(x, buf); |
| 345 | } |
| 346 | |
| 347 | template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW, typename T> |
| 348 | void readIntText(Decimal<T> & x, ReadBuffer & buf) |
| 349 | { |
| 350 | readIntText<check_overflow>(x.value, buf); |
| 351 | } |
| 352 | |
| 353 | /** More efficient variant (about 1.5 times on real dataset). |
| 354 | * Differs in following: |
| 355 | * - for numbers starting with zero, parsed only zero; |
| 356 | * - symbol '+' before number is not supported; |
| 357 | * - symbols :;<=>? are parsed as some numbers. |
| 358 | */ |
| 359 | template <typename T, bool throw_on_error = true> |
| 360 | void readIntTextUnsafe(T & x, ReadBuffer & buf) |
| 361 | { |
| 362 | bool negative = false; |
| 363 | std::make_unsigned_t<T> res = 0; |
| 364 | |
| 365 | auto on_error = [] |
| 366 | { |
| 367 | if (throw_on_error) |
| 368 | throwReadAfterEOF(); |
| 369 | }; |
| 370 | |
| 371 | if (unlikely(buf.eof())) |
| 372 | return on_error(); |
| 373 | |
| 374 | if (is_signed_v<T> && *buf.position() == '-') |
| 375 | { |
| 376 | ++buf.position(); |
| 377 | negative = true; |
| 378 | if (unlikely(buf.eof())) |
| 379 | return on_error(); |
| 380 | } |
| 381 | |
| 382 | if (*buf.position() == '0') /// There are many zeros in real datasets. |
| 383 | { |
| 384 | ++buf.position(); |
| 385 | x = 0; |
| 386 | return; |
| 387 | } |
| 388 | |
| 389 | while (!buf.eof()) |
| 390 | { |
| 391 | /// This check is suddenly faster than |
| 392 | /// unsigned char c = *buf.position() - '0'; |
| 393 | /// if (c < 10) |
| 394 | /// for unknown reason on Xeon E5645. |
| 395 | |
| 396 | if ((*buf.position() & 0xF0) == 0x30) /// It makes sense to have this condition inside loop. |
| 397 | { |
| 398 | res *= 10; |
| 399 | res += *buf.position() & 0x0F; |
| 400 | ++buf.position(); |
| 401 | } |
| 402 | else |
| 403 | break; |
| 404 | } |
| 405 | |
| 406 | /// See note about undefined behaviour above. |
| 407 | x = is_signed_v<T> && negative ? -res : res; |
| 408 | } |
| 409 | |
| 410 | template <typename T> |
| 411 | void tryReadIntTextUnsafe(T & x, ReadBuffer & buf) |
| 412 | { |
| 413 | return readIntTextUnsafe<T, false>(x, buf); |
| 414 | } |
| 415 | |
| 416 | |
| 417 | /// Look at readFloatText.h |
| 418 | template <typename T> void readFloatText(T & x, ReadBuffer & in); |
| 419 | template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in); |
| 420 | |
| 421 | |
| 422 | /// simple: all until '\n' or '\t' |
| 423 | void readString(String & s, ReadBuffer & buf); |
| 424 | |
| 425 | void readEscapedString(String & s, ReadBuffer & buf); |
| 426 | |
| 427 | void readQuotedString(String & s, ReadBuffer & buf); |
| 428 | void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); |
| 429 | |
| 430 | void readDoubleQuotedString(String & s, ReadBuffer & buf); |
| 431 | void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); |
| 432 | |
| 433 | void readJSONString(String & s, ReadBuffer & buf); |
| 434 | |
| 435 | void readBackQuotedString(String & s, ReadBuffer & buf); |
| 436 | void readBackQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); |
| 437 | |
| 438 | void readStringUntilEOF(String & s, ReadBuffer & buf); |
| 439 | void readEscapedStringUntilEOL(String & s, ReadBuffer & buf); |
| 440 | |
| 441 | |
| 442 | /** Read string in CSV format. |
| 443 | * Parsing rules: |
| 444 | * - string could be placed in quotes; quotes could be single: ' if FormatSettings::CSV::allow_single_quotes is true |
| 445 | * or double: " if FormatSettings::CSV::allow_double_quotes is true; |
| 446 | * - or string could be unquoted - this is determined by first character; |
| 447 | * - if string is unquoted, then it is read until next delimiter, |
| 448 | * either until end of line (CR or LF), |
| 449 | * or until end of stream; |
| 450 | * but spaces and tabs at begin and end of unquoted string are consumed but ignored (note that this behaviour differs from RFC). |
| 451 | * - if string is in quotes, then it will be read until closing quote, |
| 452 | * but sequences of two consecutive quotes are parsed as single quote inside string; |
| 453 | */ |
| 454 | void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings); |
| 455 | |
| 456 | |
| 457 | /// Read and append result to array of characters. |
| 458 | template <typename Vector> |
| 459 | void readStringInto(Vector & s, ReadBuffer & buf); |
| 460 | |
| 461 | template <typename Vector> |
| 462 | void readNullTerminated(Vector & s, ReadBuffer & buf); |
| 463 | |
| 464 | template <typename Vector> |
| 465 | void readEscapedStringInto(Vector & s, ReadBuffer & buf); |
| 466 | |
| 467 | template <bool enable_sql_style_quoting, typename Vector> |
| 468 | void readQuotedStringInto(Vector & s, ReadBuffer & buf); |
| 469 | |
| 470 | template <bool enable_sql_style_quoting, typename Vector> |
| 471 | void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf); |
| 472 | |
| 473 | template <bool enable_sql_style_quoting, typename Vector> |
| 474 | void readBackQuotedStringInto(Vector & s, ReadBuffer & buf); |
| 475 | |
| 476 | template <typename Vector> |
| 477 | void readStringUntilEOFInto(Vector & s, ReadBuffer & buf); |
| 478 | |
| 479 | template <typename Vector> |
| 480 | void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings); |
| 481 | |
| 482 | /// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception. |
| 483 | template <typename Vector, typename ReturnType = void> |
| 484 | ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf); |
| 485 | |
| 486 | template <typename Vector> |
| 487 | bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf) |
| 488 | { |
| 489 | return readJSONStringInto<Vector, bool>(s, buf); |
| 490 | } |
| 491 | |
| 492 | /// This could be used as template parameter for functions above, if you want to just skip data. |
| 493 | struct NullSink |
| 494 | { |
| 495 | void append(const char *, size_t) {} |
| 496 | void push_back(char) {} |
| 497 | }; |
| 498 | |
| 499 | void parseUUID(const UInt8 * src36, UInt8 * dst16); |
| 500 | void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16); |
| 501 | |
| 502 | template <typename IteratorSrc, typename IteratorDst> |
| 503 | void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes); |
| 504 | |
| 505 | |
| 506 | template <typename ReturnType> |
| 507 | ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf); |
| 508 | |
| 509 | /// In YYYY-MM-DD format. |
| 510 | /// For convenience, Month and Day parts can have single digit instead of two digits. |
| 511 | /// Any separators other than '-' are supported. |
| 512 | template <typename ReturnType = void> |
| 513 | inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) |
| 514 | { |
| 515 | /// Optimistic path, when whole value is in buffer. |
| 516 | if (buf.position() + 10 <= buf.buffer().end()) |
| 517 | { |
| 518 | UInt16 year = (buf.position()[0] - '0') * 1000 + (buf.position()[1] - '0') * 100 + (buf.position()[2] - '0') * 10 + (buf.position()[3] - '0'); |
| 519 | buf.position() += 5; |
| 520 | |
| 521 | UInt8 month = buf.position()[0] - '0'; |
| 522 | if (isNumericASCII(buf.position()[1])) |
| 523 | { |
| 524 | month = month * 10 + buf.position()[1] - '0'; |
| 525 | buf.position() += 3; |
| 526 | } |
| 527 | else |
| 528 | buf.position() += 2; |
| 529 | |
| 530 | UInt8 day = buf.position()[0] - '0'; |
| 531 | if (isNumericASCII(buf.position()[1])) |
| 532 | { |
| 533 | day = day * 10 + buf.position()[1] - '0'; |
| 534 | buf.position() += 2; |
| 535 | } |
| 536 | else |
| 537 | buf.position() += 1; |
| 538 | |
| 539 | date = LocalDate(year, month, day); |
| 540 | return ReturnType(true); |
| 541 | } |
| 542 | else |
| 543 | return readDateTextFallback<ReturnType>(date, buf); |
| 544 | } |
| 545 | |
| 546 | template <typename ReturnType = void> |
| 547 | inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) |
| 548 | { |
| 549 | static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; |
| 550 | |
| 551 | LocalDate local_date; |
| 552 | |
| 553 | if constexpr (throw_exception) |
| 554 | readDateTextImpl<ReturnType>(local_date, buf); |
| 555 | else if (!readDateTextImpl<ReturnType>(local_date, buf)) |
| 556 | return false; |
| 557 | |
| 558 | date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day()); |
| 559 | return ReturnType(true); |
| 560 | } |
| 561 | |
| 562 | |
| 563 | inline void readDateText(LocalDate & date, ReadBuffer & buf) |
| 564 | { |
| 565 | readDateTextImpl<void>(date, buf); |
| 566 | } |
| 567 | |
| 568 | inline void readDateText(DayNum & date, ReadBuffer & buf) |
| 569 | { |
| 570 | readDateTextImpl<void>(date, buf); |
| 571 | } |
| 572 | |
| 573 | inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf) |
| 574 | { |
| 575 | return readDateTextImpl<bool>(date, buf); |
| 576 | } |
| 577 | |
| 578 | inline bool tryReadDateText(DayNum & date, ReadBuffer & buf) |
| 579 | { |
| 580 | return readDateTextImpl<bool>(date, buf); |
| 581 | } |
| 582 | |
| 583 | |
| 584 | inline void readUUIDText(UUID & uuid, ReadBuffer & buf) |
| 585 | { |
| 586 | char s[36]; |
| 587 | size_t size = buf.read(s, 36); |
| 588 | |
| 589 | if (size != 36) |
| 590 | { |
| 591 | s[size] = 0; |
| 592 | throw Exception(std::string("Cannot parse uuid " ) + s, ErrorCodes::CANNOT_PARSE_UUID); |
| 593 | } |
| 594 | |
| 595 | parseUUID(reinterpret_cast<const UInt8 *>(s), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16)); |
| 596 | } |
| 597 | |
| 598 | |
| 599 | template <typename T> |
| 600 | inline T parse(const char * data, size_t size); |
| 601 | |
| 602 | template <typename T> |
| 603 | inline T parseFromString(const String & str) |
| 604 | { |
| 605 | return parse<T>(str.data(), str.size()); |
| 606 | } |
| 607 | |
| 608 | #pragma GCC diagnostic push |
| 609 | #pragma GCC diagnostic ignored "-Wredundant-decls" |
| 610 | // Just dont mess with it. If the redundant redeclaration is removed then ReaderHelpers.h should be included. |
| 611 | // This leads to Arena.h inclusion which has a problem with ASAN stuff included properly and messing macro definition |
| 612 | // which intefrers with... You dont want to know, really. |
| 613 | UInt128 stringToUUID(const String & str); |
| 614 | #pragma GCC diagnostic pop |
| 615 | |
| 616 | template <typename ReturnType = void> |
| 617 | ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut); |
| 618 | |
| 619 | /** In YYYY-MM-DD hh:mm:ss format, according to specified time zone. |
| 620 | * As an exception, also supported parsing of unix timestamp in form of decimal number. |
| 621 | */ |
| 622 | template <typename ReturnType = void> |
| 623 | inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) |
| 624 | { |
| 625 | /** Read 10 characters, that could represent unix timestamp. |
| 626 | * Only unix timestamp of 5-10 characters is supported. |
| 627 | * Then look at 5th character. If it is a number - treat whole as unix timestamp. |
| 628 | * If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss format. |
| 629 | */ |
| 630 | |
| 631 | /// Optimistic path, when whole value is in buffer. |
| 632 | const char * s = buf.position(); |
| 633 | if (s + 19 <= buf.buffer().end()) |
| 634 | { |
| 635 | if (s[4] < '0' || s[4] > '9') |
| 636 | { |
| 637 | UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); |
| 638 | UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); |
| 639 | UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); |
| 640 | |
| 641 | UInt8 hour = (s[11] - '0') * 10 + (s[12] - '0'); |
| 642 | UInt8 minute = (s[14] - '0') * 10 + (s[15] - '0'); |
| 643 | UInt8 second = (s[17] - '0') * 10 + (s[18] - '0'); |
| 644 | |
| 645 | if (unlikely(year == 0)) |
| 646 | datetime = 0; |
| 647 | else |
| 648 | datetime = date_lut.makeDateTime(year, month, day, hour, minute, second); |
| 649 | |
| 650 | buf.position() += 19; |
| 651 | return ReturnType(true); |
| 652 | } |
| 653 | else |
| 654 | /// Why not readIntTextUnsafe? Because for needs of AdFox, parsing of unix timestamp with leading zeros is supported: 000...NNNN. |
| 655 | return readIntTextImpl<time_t, ReturnType, ReadIntTextCheckOverflow::CHECK_OVERFLOW>(datetime, buf); |
| 656 | } |
| 657 | else |
| 658 | return readDateTimeTextFallback<ReturnType>(datetime, buf, date_lut); |
| 659 | } |
| 660 | |
| 661 | template <typename ReturnType> |
| 662 | inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut) |
| 663 | { |
| 664 | time_t whole; |
| 665 | if (!readDateTimeTextImpl<bool>(whole, buf, date_lut)) |
| 666 | { |
| 667 | return ReturnType(false); |
| 668 | } |
| 669 | |
| 670 | DB::DecimalUtils::DecimalComponents<DateTime64::NativeType> c{static_cast<DateTime64::NativeType>(whole), 0}; |
| 671 | |
| 672 | if (!buf.eof() && *buf.position() == '.') |
| 673 | { |
| 674 | buf.ignore(1); // skip separator |
| 675 | const auto pos_before_fractional = buf.count(); |
| 676 | if (!tryReadIntText<ReadIntTextCheckOverflow::CHECK_OVERFLOW>(c.fractional, buf)) |
| 677 | { |
| 678 | return ReturnType(false); |
| 679 | } |
| 680 | |
| 681 | // Adjust fractional part to the scale, since decimalFromComponents knows nothing |
| 682 | // about convention of ommiting trailing zero on fractional part |
| 683 | // and assumes that fractional part value is less than 10^scale. |
| 684 | |
| 685 | // If scale is 3, but we read '12', promote fractional part to '120'. |
| 686 | // And vice versa: if we read '1234', denote it to '123'. |
| 687 | const auto fractional_length = static_cast<Int32>(buf.count() - pos_before_fractional); |
| 688 | if (const auto adjust_scale = static_cast<Int32>(scale) - fractional_length; adjust_scale > 0) |
| 689 | { |
| 690 | c.fractional *= common::exp10_i64(adjust_scale); |
| 691 | } |
| 692 | else if (adjust_scale < 0) |
| 693 | { |
| 694 | c.fractional /= common::exp10_i64(-1 * adjust_scale); |
| 695 | } |
| 696 | } |
| 697 | |
| 698 | datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(c, scale); |
| 699 | |
| 700 | return ReturnType(true); |
| 701 | } |
| 702 | |
| 703 | inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) |
| 704 | { |
| 705 | readDateTimeTextImpl<void>(datetime, buf, date_lut); |
| 706 | } |
| 707 | |
| 708 | inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) |
| 709 | { |
| 710 | readDateTimeTextImpl<void>(datetime64, scale, buf, date_lut); |
| 711 | } |
| 712 | |
| 713 | inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) |
| 714 | { |
| 715 | return readDateTimeTextImpl<bool>(datetime, buf, date_lut); |
| 716 | } |
| 717 | |
| 718 | inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) |
| 719 | { |
| 720 | return readDateTimeTextImpl<bool>(datetime64, scale, buf, date_lut); |
| 721 | } |
| 722 | |
| 723 | inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf) |
| 724 | { |
| 725 | char s[19]; |
| 726 | size_t size = buf.read(s, 19); |
| 727 | if (19 != size) |
| 728 | { |
| 729 | s[size] = 0; |
| 730 | throw Exception(std::string("Cannot parse datetime " ) + s, ErrorCodes::CANNOT_PARSE_DATETIME); |
| 731 | } |
| 732 | |
| 733 | datetime.year((s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0')); |
| 734 | datetime.month((s[5] - '0') * 10 + (s[6] - '0')); |
| 735 | datetime.day((s[8] - '0') * 10 + (s[9] - '0')); |
| 736 | |
| 737 | datetime.hour((s[11] - '0') * 10 + (s[12] - '0')); |
| 738 | datetime.minute((s[14] - '0') * 10 + (s[15] - '0')); |
| 739 | datetime.second((s[17] - '0') * 10 + (s[18] - '0')); |
| 740 | } |
| 741 | |
| 742 | |
| 743 | /// Generic methods to read value in native binary format. |
| 744 | template <typename T> |
| 745 | inline std::enable_if_t<is_arithmetic_v<T>, void> |
| 746 | readBinary(T & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
| 747 | |
| 748 | inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); } |
| 749 | inline void readBinary(Int128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
| 750 | inline void readBinary(UInt128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
| 751 | inline void readBinary(UInt256 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
| 752 | inline void readBinary(Decimal32 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
| 753 | inline void readBinary(Decimal64 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
| 754 | inline void readBinary(Decimal128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
| 755 | inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); } |
| 756 | |
| 757 | |
| 758 | /// Generic methods to read value in text tab-separated format. |
| 759 | template <typename T> |
| 760 | inline std::enable_if_t<is_integral_v<T>, void> |
| 761 | readText(T & x, ReadBuffer & buf) { readIntText(x, buf); } |
| 762 | |
| 763 | template <typename T> |
| 764 | inline std::enable_if_t<std::is_floating_point_v<T>, void> |
| 765 | readText(T & x, ReadBuffer & buf) { readFloatText(x, buf); } |
| 766 | |
| 767 | inline void readText(bool & x, ReadBuffer & buf) { readBoolText(x, buf); } |
| 768 | inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } |
| 769 | inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } |
| 770 | inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } |
| 771 | inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); } |
| 772 | [[noreturn]] inline void readText(UInt128 &, ReadBuffer &) |
| 773 | { |
| 774 | /** Because UInt128 isn't a natural type, without arithmetic operator and only use as an intermediary type -for UUID- |
| 775 | * it should never arrive here. But because we used the DataTypeNumber class we should have at least a definition of it. |
| 776 | */ |
| 777 | throw Exception("UInt128 cannot be read as a text" , ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 778 | } |
| 779 | |
| 780 | /// Generic methods to read value in text format, |
| 781 | /// possibly in single quotes (only for data types that use quotes in VALUES format of INSERT statement in SQL). |
| 782 | template <typename T> |
| 783 | inline std::enable_if_t<is_arithmetic_v<T>, void> |
| 784 | readQuoted(T & x, ReadBuffer & buf) { readText(x, buf); } |
| 785 | |
| 786 | inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); } |
| 787 | |
| 788 | inline void readQuoted(LocalDate & x, ReadBuffer & buf) |
| 789 | { |
| 790 | assertChar('\'', buf); |
| 791 | readDateText(x, buf); |
| 792 | assertChar('\'', buf); |
| 793 | } |
| 794 | |
| 795 | inline void readQuoted(LocalDateTime & x, ReadBuffer & buf) |
| 796 | { |
| 797 | assertChar('\'', buf); |
| 798 | readDateTimeText(x, buf); |
| 799 | assertChar('\'', buf); |
| 800 | } |
| 801 | |
| 802 | |
| 803 | /// Same as above, but in double quotes. |
| 804 | template <typename T> |
| 805 | inline std::enable_if_t<is_arithmetic_v<T>, void> |
| 806 | readDoubleQuoted(T & x, ReadBuffer & buf) { readText(x, buf); } |
| 807 | |
| 808 | inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); } |
| 809 | |
| 810 | inline void readDoubleQuoted(LocalDate & x, ReadBuffer & buf) |
| 811 | { |
| 812 | assertChar('"', buf); |
| 813 | readDateText(x, buf); |
| 814 | assertChar('"', buf); |
| 815 | } |
| 816 | |
| 817 | inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf) |
| 818 | { |
| 819 | assertChar('"', buf); |
| 820 | readDateTimeText(x, buf); |
| 821 | assertChar('"', buf); |
| 822 | } |
| 823 | |
| 824 | |
| 825 | /// CSV, for numbers, dates: quotes are optional, no special escaping rules. |
| 826 | template <typename T> |
| 827 | inline void readCSVSimple(T & x, ReadBuffer & buf) |
| 828 | { |
| 829 | if (buf.eof()) |
| 830 | throwReadAfterEOF(); |
| 831 | |
| 832 | char maybe_quote = *buf.position(); |
| 833 | |
| 834 | if (maybe_quote == '\'' || maybe_quote == '\"') |
| 835 | ++buf.position(); |
| 836 | |
| 837 | readText(x, buf); |
| 838 | |
| 839 | if (maybe_quote == '\'' || maybe_quote == '\"') |
| 840 | assertChar(maybe_quote, buf); |
| 841 | } |
| 842 | |
| 843 | template <typename T> |
| 844 | inline std::enable_if_t<is_arithmetic_v<T>, void> |
| 845 | readCSV(T & x, ReadBuffer & buf) { readCSVSimple(x, buf); } |
| 846 | |
| 847 | inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); } |
| 848 | inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } |
| 849 | inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); } |
| 850 | inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); } |
| 851 | [[noreturn]] inline void readCSV(UInt128 &, ReadBuffer &) |
| 852 | { |
| 853 | /** Because UInt128 isn't a natural type, without arithmetic operator and only use as an intermediary type -for UUID- |
| 854 | * it should never arrive here. But because we used the DataTypeNumber class we should have at least a definition of it. |
| 855 | */ |
| 856 | throw Exception("UInt128 cannot be read as a text" , ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 857 | } |
| 858 | |
| 859 | template <typename T> |
| 860 | void readBinary(std::vector<T> & x, ReadBuffer & buf) |
| 861 | { |
| 862 | size_t size = 0; |
| 863 | readVarUInt(size, buf); |
| 864 | |
| 865 | if (size > DEFAULT_MAX_STRING_SIZE) |
| 866 | throw Poco::Exception("Too large vector size." ); |
| 867 | |
| 868 | x.resize(size); |
| 869 | for (size_t i = 0; i < size; ++i) |
| 870 | readBinary(x[i], buf); |
| 871 | } |
| 872 | |
| 873 | template <typename T> |
| 874 | void readQuoted(std::vector<T> & x, ReadBuffer & buf) |
| 875 | { |
| 876 | bool first = true; |
| 877 | assertChar('[', buf); |
| 878 | while (!buf.eof() && *buf.position() != ']') |
| 879 | { |
| 880 | if (!first) |
| 881 | { |
| 882 | if (*buf.position() == ',') |
| 883 | ++buf.position(); |
| 884 | else |
| 885 | throw Exception("Cannot read array from text" , ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT); |
| 886 | } |
| 887 | |
| 888 | first = false; |
| 889 | |
| 890 | x.push_back(T()); |
| 891 | readQuoted(x.back(), buf); |
| 892 | } |
| 893 | assertChar(']', buf); |
| 894 | } |
| 895 | |
| 896 | template <typename T> |
| 897 | void readDoubleQuoted(std::vector<T> & x, ReadBuffer & buf) |
| 898 | { |
| 899 | bool first = true; |
| 900 | assertChar('[', buf); |
| 901 | while (!buf.eof() && *buf.position() != ']') |
| 902 | { |
| 903 | if (!first) |
| 904 | { |
| 905 | if (*buf.position() == ',') |
| 906 | ++buf.position(); |
| 907 | else |
| 908 | throw Exception("Cannot read array from text" , ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT); |
| 909 | } |
| 910 | |
| 911 | first = false; |
| 912 | |
| 913 | x.push_back(T()); |
| 914 | readDoubleQuoted(x.back(), buf); |
| 915 | } |
| 916 | assertChar(']', buf); |
| 917 | } |
| 918 | |
| 919 | template <typename T> |
| 920 | void readText(std::vector<T> & x, ReadBuffer & buf) |
| 921 | { |
| 922 | readQuoted(x, buf); |
| 923 | } |
| 924 | |
| 925 | |
| 926 | /// Skip whitespace characters. |
| 927 | inline void skipWhitespaceIfAny(ReadBuffer & buf) |
| 928 | { |
| 929 | while (!buf.eof() && isWhitespaceASCII(*buf.position())) |
| 930 | ++buf.position(); |
| 931 | } |
| 932 | |
| 933 | /// Skips json value. |
| 934 | void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field); |
| 935 | |
| 936 | |
| 937 | /** Read serialized exception. |
| 938 | * During serialization/deserialization some information is lost |
| 939 | * (type is cut to base class, 'message' replaced by 'displayText', and stack trace is appended to 'message') |
| 940 | * Some additional message could be appended to exception (example: you could add information about from where it was received). |
| 941 | */ |
| 942 | void readException(Exception & e, ReadBuffer & buf, const String & additional_message = "" ); |
| 943 | void readAndThrowException(ReadBuffer & buf, const String & additional_message = "" ); |
| 944 | |
| 945 | |
| 946 | /** Helper function for implementation. |
| 947 | */ |
| 948 | template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T> |
| 949 | static inline const char * tryReadIntText(T & x, const char * pos, const char * end) |
| 950 | { |
| 951 | ReadBufferFromMemory in(pos, end - pos); |
| 952 | tryReadIntText<check_overflow>(x, in); |
| 953 | return pos + in.count(); |
| 954 | } |
| 955 | |
| 956 | |
| 957 | /// Convenient methods for reading something from string in text format. |
| 958 | template <typename T> |
| 959 | inline T parse(const char * data, size_t size) |
| 960 | { |
| 961 | T res; |
| 962 | ReadBufferFromMemory buf(data, size); |
| 963 | readText(res, buf); |
| 964 | return res; |
| 965 | } |
| 966 | |
| 967 | /// Read something from text format, but expect complete parse of given text |
| 968 | /// For example: 723145 -- ok, 213MB -- not ok |
| 969 | template <typename T> |
| 970 | inline T completeParse(const char * data, size_t size) |
| 971 | { |
| 972 | T res; |
| 973 | ReadBufferFromMemory buf(data, size); |
| 974 | readText(res, buf); |
| 975 | assertEOF(buf); |
| 976 | return res; |
| 977 | } |
| 978 | |
| 979 | template <typename T> |
| 980 | inline T completeParse(const String & s) |
| 981 | { |
| 982 | return completeParse<T>(s.data(), s.size()); |
| 983 | } |
| 984 | |
| 985 | template <typename T> |
| 986 | inline T completeParse(const char * data) |
| 987 | { |
| 988 | return completeParse<T>(data, strlen(data)); |
| 989 | } |
| 990 | |
| 991 | template <typename T> |
| 992 | inline T parse(const char * data) |
| 993 | { |
| 994 | return parse<T>(data, strlen(data)); |
| 995 | } |
| 996 | |
| 997 | template <typename T> |
| 998 | inline T parse(const String & s) |
| 999 | { |
| 1000 | return parse<T>(s.data(), s.size()); |
| 1001 | } |
| 1002 | |
| 1003 | |
| 1004 | /** Skip UTF-8 BOM if it is under cursor. |
| 1005 | * As BOM is usually located at start of stream, and buffer size is usually larger than three bytes, |
| 1006 | * the function expects, that all three bytes of BOM is fully in buffer (otherwise it don't skip anything). |
| 1007 | */ |
| 1008 | inline void skipBOMIfExists(ReadBuffer & buf) |
| 1009 | { |
| 1010 | if (!buf.eof() |
| 1011 | && buf.position() + 3 < buf.buffer().end() |
| 1012 | && buf.position()[0] == '\xEF' |
| 1013 | && buf.position()[1] == '\xBB' |
| 1014 | && buf.position()[2] == '\xBF') |
| 1015 | { |
| 1016 | buf.position() += 3; |
| 1017 | } |
| 1018 | } |
| 1019 | |
| 1020 | |
| 1021 | /// Skip to next character after next \n. If no \n in stream, skip to end. |
| 1022 | void skipToNextLineOrEOF(ReadBuffer & buf); |
| 1023 | |
| 1024 | /// Skip to next character after next unescaped \n. If no \n in stream, skip to end. Does not throw on invalid escape sequences. |
| 1025 | void skipToUnescapedNextLineOrEOF(ReadBuffer & buf); |
| 1026 | |
| 1027 | template <class TReadBuffer, class... Types> |
| 1028 | std::unique_ptr<ReadBuffer> getReadBuffer(const DB::CompressionMethod method, Types&&... args) |
| 1029 | { |
| 1030 | if (method == DB::CompressionMethod::Gzip) |
| 1031 | { |
| 1032 | auto read_buf = std::make_unique<TReadBuffer>(std::forward<Types>(args)...); |
| 1033 | return std::make_unique<ZlibInflatingReadBuffer>(std::move(read_buf), method); |
| 1034 | } |
| 1035 | return std::make_unique<TReadBuffer>(args...); |
| 1036 | } |
| 1037 | |
| 1038 | /** This function just copies the data from buffer's internal position (in.position()) |
| 1039 | * to current position (from arguments) into memory. |
| 1040 | */ |
| 1041 | void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current); |
| 1042 | |
| 1043 | /** This function is negative to eof(). |
| 1044 | * In fact it returns whether the data was loaded to internal ReadBuffers's buffer or not. |
| 1045 | * And saves data from buffer's position to current if there is no pending data in buffer. |
| 1046 | * Why we have to use this strange function? Consider we have buffer's internal position in the middle |
| 1047 | * of our buffer and the current cursor in the end of the buffer. When we call eof() it calls next(). |
| 1048 | * And this function can fill the buffer with new data, so we will lose the data from previous buffer state. |
| 1049 | */ |
| 1050 | bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current); |
| 1051 | |
| 1052 | } |
| 1053 | |