ReadHelpers.h source code [ClickHouse/dbms/src/IO/ReadHelpers.h]

1	#pragma once
2
3	#include <cmath>
4	#include <cstring>
5	#include <limits>
6	#include <algorithm>
7	#include <iterator>
8
9	#include <type_traits>
10
11	#include <common/DateLUT.h>
12	#include <common/LocalDate.h>
13	#include <common/LocalDateTime.h>
14	#include <common/StringRef.h>
15	#include <common/arithmeticOverflow.h>
16
17	#include <Core/Types.h>
18	#include <Core/DecimalFunctions.h>
19	#include <Core/UUID.h>
20
21	#include <Common/Exception.h>
22	#include <Common/StringUtils/StringUtils.h>
23	#include <Common/Arena.h>
24	#include <Common/UInt128.h>
25	#include <Common/intExp.h>
26
27	#include <Formats/FormatSettings.h>
28
29	#include <IO/CompressionMethod.h>
30	#include <IO/ReadBuffer.h>
31	#include <IO/ReadBufferFromMemory.h>
32	#include <IO/VarInt.h>
33	#include <IO/ZlibInflatingReadBuffer.h>
34
35	#include <DataTypes/DataTypeDateTime.h>
36
37	#ifdef __clang__
38	#pragma clang diagnostic push
39	#pragma clang diagnostic ignored "-Wdouble-promotion"
40	#endif
41
42	#include <double-conversion/double-conversion.h>
43
44	#ifdef __clang__
45	#pragma clang diagnostic pop
46	#endif
47
48
49	/// 1 GiB
50	#define DEFAULT_MAX_STRING_SIZE (1ULL << 30)
51
52
53	namespace DB
54	{
55
56	namespace ErrorCodes
57	{
58	extern const int CANNOT_PARSE_DATE;
59	extern const int CANNOT_PARSE_DATETIME;
60	extern const int CANNOT_PARSE_UUID;
61	extern const int CANNOT_READ_ARRAY_FROM_TEXT;
62	extern const int CANNOT_PARSE_NUMBER;
63	extern const int ILLEGAL_TYPE_OF_ARGUMENT;
64	}
65
66	/// Helper functions for formatted input.
67
68	inline char parseEscapeSequence(char c)
69	{
70	switch (c)
71	{
72	case `'a'`:
73	return `'\a'`;
74	case `'b'`:
75	return `'\b'`;
76	case `'e'`:
77	return `'\x1B'`; /// \e escape sequence is non standard for C and C++ but supported by gcc and clang.
78	case `'f'`:
79	return `'\f'`;
80	case `'n'`:
81	return `'\n'`;
82	case `'r'`:
83	return `'\r'`;
84	case `'t'`:
85	return `'\t'`;
86	case `'v'`:
87	return `'\v'`;
88	case `'0'`:
89	return `'\0'`;
90	default:
91	return c;
92	}
93	}
94
95
96	/// These functions are located in VarInt.h
97	/// inline void throwReadAfterEOF()
98
99
100	inline void readChar(char & x, ReadBuffer & buf)
101	{
102	if (!buf.eof())
103	{
104	x = *buf.position();
105	++buf.position();
106	}
107	else
108	throwReadAfterEOF();
109	}
110
111
112	/// Read POD-type in native format
113	template <typename T>
114	inline void readPODBinary(T & x, ReadBuffer & buf)
115	{
116	buf.readStrict(reinterpret_cast<char >(&x), sizeof*(x));
117	}
118
119	template <typename T>
120	inline void readIntBinary(T & x, ReadBuffer & buf)
121	{
122	readPODBinary(x, buf);
123	}
124
125	template <typename T>
126	inline void readFloatBinary(T & x, ReadBuffer & buf)
127	{
128	readPODBinary(x, buf);
129	}
130
131
132	inline void readStringBinary(std::string & s, ReadBuffer & buf, size_t MAX_STRING_SIZE = DEFAULT_MAX_STRING_SIZE)
133	{
134	size_t size = `0`;
135	readVarUInt(size, buf);
136
137	if (size > MAX_STRING_SIZE)
138	throw Poco::Exception ("Too large string size.");
139
140	s.resize(size);
141	buf.readStrict(s.data(), size);
142	}
143
144
145	inline StringRef readStringBinaryInto(Arena & arena, ReadBuffer & buf)
146	{
147	size_t size = `0`;
148	readVarUInt(size, buf);
149
150	char * data = arena.alloc(size);
151	buf.readStrict(data, size);
152
153	return StringRef (data, size);
154	}
155
156
157	template <typename T>
158	void readVectorBinary(std::vector<T> & v, ReadBuffer & buf, size_t MAX_VECTOR_SIZE = DEFAULT_MAX_STRING_SIZE)
159	{
160	size_t size = `0`;
161	readVarUInt(size, buf);
162
163	if (size > MAX_VECTOR_SIZE)
164	throw Poco::Exception ("Too large vector size.");
165
166	v.resize(size);
167	for (size_t i = `0`; i < size; ++i)
168	readBinary(v[i], buf);
169	}
170
171
172	void assertString(const char * s, ReadBuffer & buf);
173	void assertEOF(ReadBuffer & buf);
174
175	[[noreturn]] void throwAtAssertionFailed(const char * s, ReadBuffer & buf);
176
177	inline void assertChar(char symbol, ReadBuffer & buf)
178	{
179	if (buf.eof() \|\| *buf.position() != symbol)
180	{
181	char err[`2`] = {symbol, `'\0'`};
182	throwAtAssertionFailed(err, buf);
183	}
184	++buf.position();
185	}
186
187	inline void assertString(const String & s, ReadBuffer & buf)
188	{
189	assertString(s.c_str(), buf);
190	}
191
192	bool checkString(const char * s, ReadBuffer & buf);
193	inline bool checkString(const String & s, ReadBuffer & buf)
194	{
195	return checkString(s.c_str(), buf);
196	}
197
198	inline bool checkChar(char c, ReadBuffer & buf)
199	{
200	if (buf.eof() \|\| *buf.position() != c)
201	return false;
202	++buf.position();
203	return true;
204	}
205
206	bool checkStringCaseInsensitive(const char * s, ReadBuffer & buf);
207	inline bool checkStringCaseInsensitive(const String & s, ReadBuffer & buf)
208	{
209	return checkStringCaseInsensitive(s.c_str(), buf);
210	}
211
212	void assertStringCaseInsensitive(const char * s, ReadBuffer & buf);
213	inline void assertStringCaseInsensitive(const String & s, ReadBuffer & buf)
214	{
215	return assertStringCaseInsensitive(s.c_str(), buf);
216	}
217
218	/* Check that next character in buf matches first character of s.*
219	* If true, then check all characters in s and throw exception if it doesn't match.
220	* If false, then return false, and leave position in buffer unchanged.
221	*/
222	bool checkStringByFirstCharacterAndAssertTheRest(const char * s, ReadBuffer & buf);
223	bool checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(const char * s, ReadBuffer & buf);
224
225	inline bool checkStringByFirstCharacterAndAssertTheRest(const String & s, ReadBuffer & buf)
226	{
227	return checkStringByFirstCharacterAndAssertTheRest(s.c_str(), buf);
228	}
229
230	inline bool checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(const String & s, ReadBuffer & buf)
231	{
232	return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(s.c_str(), buf);
233	}
234
235
236	inline void readBoolText(bool & x, ReadBuffer & buf)
237	{
238	char tmp = `'0'`;
239	readChar(tmp, buf);
240	x = tmp != `'0'`;
241	}
242
243	inline void readBoolTextWord(bool & x, ReadBuffer & buf)
244	{
245	if (buf.eof())
246	throwReadAfterEOF();
247
248	if (*buf.position() == `'t'`)
249	{
250	assertString("true", buf);
251	x = true;
252	}
253	else
254	{
255	assertString("false", buf);
256	x = false;
257	}
258	}
259
260	enum class ReadIntTextCheckOverflow
261	{
262	DO_NOT_CHECK_OVERFLOW,
263	CHECK_OVERFLOW,
264	};
265
266	template <typename T, typename ReturnType = void, ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW>
267	ReturnType readIntTextImpl(T & x, ReadBuffer & buf)
268	{
269	static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
270
271	bool negative = false;
272	std::make_unsigned_t<T> res = `0`;
273	if (buf.eof())
274	{
275	if constexpr (throw_exception)
276	throwReadAfterEOF();
277	else
278	return ReturnType(false);
279	}
280
281	const size_t initial_pos = buf.count();
282	while (!buf.eof())
283	{
284	switch (*buf.position())
285	{
286	case `'+'`:
287	break;
288	case `'-'`:
289	if constexpr (is_signed_v<T>)
290	negative = true;
291	else
292	{
293	if constexpr (throw_exception)
294	throw Exception ("Unsigned type must not contain '-' symbol", ErrorCodes::CANNOT_PARSE_NUMBER);
295	else
296	return ReturnType(false);
297	}
298	break;
299	case `'0'`: [[fallthrough]];
300	case `'1'`: [[fallthrough]];
301	case `'2'`: [[fallthrough]];
302	case `'3'`: [[fallthrough]];
303	case `'4'`: [[fallthrough]];
304	case `'5'`: [[fallthrough]];
305	case `'6'`: [[fallthrough]];
306	case `'7'`: [[fallthrough]];
307	case `'8'`: [[fallthrough]];
308	case `'9'`:
309	if constexpr (check_overflow == ReadIntTextCheckOverflow::CHECK_OVERFLOW)
310	{
311	// perform relativelly slow overflow check only when number of decimal digits so far is close to the max for given type.
312	if (buf.count() - initial_pos >= std::numeric_limits<T>::max_digits10)
313	{
314	if (common::mulOverflow(res, static_cast<decltype(res)>(`10`), res)
315	\|\| common::addOverflow(res, static_cast<decltype(res)>(*buf.position() - `'0'`), res))
316	return ReturnType(false);
317	break;
318	}
319	}
320	res *= `10`;
321	res += *buf.position() - `'0'`;
322	break;
323	default:
324	goto end;
325	}
326	++buf.position();
327	}
328
329	end:
330	x = negative ? -res : res;
331
332	return ReturnType(true);
333	}
334
335	template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW, typename T>
336	void readIntText(T & x, ReadBuffer & buf)
337	{
338	readIntTextImpl<T, void, check_overflow>(x, buf);
339	}
340
341	template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T>
342	bool tryReadIntText(T & x, ReadBuffer & buf)
343	{
344	return readIntTextImpl<T, bool, check_overflow>(x, buf);
345	}
346
347	template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::DO_NOT_CHECK_OVERFLOW, typename T>
348	void readIntText(Decimal<T> & x, ReadBuffer & buf)
349	{
350	readIntText<check_overflow>(x.value, buf);
351	}
352
353	/* More efficient variant (about 1.5 times on real dataset).*
354	* Differs in following:
355	* - for numbers starting with zero, parsed only zero;
356	* - symbol '+' before number is not supported;
357	* - symbols :;<=>? are parsed as some numbers.
358	*/
359	template <typename T, bool throw_on_error = true>
360	void readIntTextUnsafe(T & x, ReadBuffer & buf)
361	{
362	bool negative = false;
363	std::make_unsigned_t<T> res = `0`;
364
365	auto on_error = []
366	{
367	if (throw_on_error)
368	throwReadAfterEOF();
369	};
370
371	if (unlikely(buf.eof()))
372	return on_error();
373
374	if (is_signed_v<T> && *buf.position() == `'-'`)
375	{
376	++buf.position();
377	negative = true;
378	if (unlikely(buf.eof()))
379	return on_error();
380	}
381
382	if (buf.position() == `'0'`) /// There are many zeros in real datasets.*
383	{
384	++buf.position();
385	x = `0`;
386	return;
387	}
388
389	while (!buf.eof())
390	{
391	/// This check is suddenly faster than
392	/// unsigned char c = buf.position() - '0';*
393	/// if (c < 10)
394	/// for unknown reason on Xeon E5645.
395
396	if ((buf.position() & `0xF0`) == `0x30`) /// It makes sense to have this condition inside loop.*
397	{
398	res *= `10`;
399	res += *buf.position() & `0x0F`;
400	++buf.position();
401	}
402	else
403	break;
404	}
405
406	/// See note about undefined behaviour above.
407	x = is_signed_v<T> && negative ? -res : res;
408	}
409
410	template <typename T>
411	void tryReadIntTextUnsafe(T & x, ReadBuffer & buf)
412	{
413	return readIntTextUnsafe<T, false>(x, buf);
414	}
415
416
417	/// Look at readFloatText.h
418	template <typename T> void readFloatText(T & x, ReadBuffer & in);
419	template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in);
420
421
422	/// simple: all until '\n' or '\t'
423	void readString(String & s, ReadBuffer & buf);
424
425	void readEscapedString(String & s, ReadBuffer & buf);
426
427	void readQuotedString(String & s, ReadBuffer & buf);
428	void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
429
430	void readDoubleQuotedString(String & s, ReadBuffer & buf);
431	void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
432
433	void readJSONString(String & s, ReadBuffer & buf);
434
435	void readBackQuotedString(String & s, ReadBuffer & buf);
436	void readBackQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
437
438	void readStringUntilEOF(String & s, ReadBuffer & buf);
439	void readEscapedStringUntilEOL(String & s, ReadBuffer & buf);
440
441
442	/* Read string in CSV format.*
443	* Parsing rules:
444	* - string could be placed in quotes; quotes could be single: ' if FormatSettings::CSV::allow_single_quotes is true
445	* or double: " if FormatSettings::CSV::allow_double_quotes is true;
446	* - or string could be unquoted - this is determined by first character;
447	* - if string is unquoted, then it is read until next delimiter,
448	* either until end of line (CR or LF),
449	* or until end of stream;
450	* but spaces and tabs at begin and end of unquoted string are consumed but ignored (note that this behaviour differs from RFC).
451	* - if string is in quotes, then it will be read until closing quote,
452	* but sequences of two consecutive quotes are parsed as single quote inside string;
453	*/
454	void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
455
456
457	/// Read and append result to array of characters.
458	template <typename Vector>
459	void readStringInto(Vector & s, ReadBuffer & buf);
460
461	template <typename Vector>
462	void readNullTerminated(Vector & s, ReadBuffer & buf);
463
464	template <typename Vector>
465	void readEscapedStringInto(Vector & s, ReadBuffer & buf);
466
467	template <bool enable_sql_style_quoting, typename Vector>
468	void readQuotedStringInto(Vector & s, ReadBuffer & buf);
469
470	template <bool enable_sql_style_quoting, typename Vector>
471	void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf);
472
473	template <bool enable_sql_style_quoting, typename Vector>
474	void readBackQuotedStringInto(Vector & s, ReadBuffer & buf);
475
476	template <typename Vector>
477	void readStringUntilEOFInto(Vector & s, ReadBuffer & buf);
478
479	template <typename Vector>
480	void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
481
482	/// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception.
483	template <typename Vector, typename ReturnType = void>
484	ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf);
485
486	template <typename Vector>
487	bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf)
488	{
489	return readJSONStringInto<Vector, bool>(s, buf);
490	}
491
492	/// This could be used as template parameter for functions above, if you want to just skip data.
493	struct NullSink
494	{
495	void append(const char *, size_t) {}
496	void push_back(char) {}
497	};
498
499	void parseUUID(const UInt8 * src36, UInt8 * dst16);
500	void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);
501
502	template <typename IteratorSrc, typename IteratorDst>
503	void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes);
504
505
506	template <typename ReturnType>
507	ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf);
508
509	/// In YYYY-MM-DD format.
510	/// For convenience, Month and Day parts can have single digit instead of two digits.
511	/// Any separators other than '-' are supported.
512	template <typename ReturnType = void>
513	inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
514	{
515	/// Optimistic path, when whole value is in buffer.
516	if (buf.position() + `10` <= buf.buffer().end())
517	{
518	UInt16 year = (buf.position()[`0`] - `'0'`) * `1000` + (buf.position()[`1`] - `'0'`) * `100` + (buf.position()[`2`] - `'0'`) * `10` + (buf.position()[`3`] - `'0'`);
519	buf.position() += `5`;
520
521	UInt8 month = buf.position()[`0`] - `'0'`;
522	if (isNumericASCII(buf.position()[`1`]))
523	{
524	month = month * `10` + buf.position()[`1`] - `'0'`;
525	buf.position() += `3`;
526	}
527	else
528	buf.position() += `2`;
529
530	UInt8 day = buf.position()[`0`] - `'0'`;
531	if (isNumericASCII(buf.position()[`1`]))
532	{
533	day = day * `10` + buf.position()[`1`] - `'0'`;
534	buf.position() += `2`;
535	}
536	else
537	buf.position() += `1`;
538
539	date = LocalDate (year, month, day);
540	return ReturnType(true);
541	}
542	else
543	return readDateTextFallback<ReturnType>(date, buf);
544	}
545
546	template <typename ReturnType = void>
547	inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf)
548	{
549	static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
550
551	LocalDate local_date;
552
553	if constexpr (throw_exception)
554	readDateTextImpl<ReturnType>(local_date, buf);
555	else if (!readDateTextImpl<ReturnType>(local_date, buf))
556	return false;
557
558	date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day());
559	return ReturnType(true);
560	}
561
562
563	inline void readDateText(LocalDate & date, ReadBuffer & buf)
564	{
565	readDateTextImpl<void>(date, buf);
566	}
567
568	inline void readDateText(DayNum & date, ReadBuffer & buf)
569	{
570	readDateTextImpl<void>(date, buf);
571	}
572
573	inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf)
574	{
575	return readDateTextImpl<bool>(date, buf);
576	}
577
578	inline bool tryReadDateText(DayNum & date, ReadBuffer & buf)
579	{
580	return readDateTextImpl<bool>(date, buf);
581	}
582
583
584	inline void readUUIDText(UUID & uuid, ReadBuffer & buf)
585	{
586	char s[`36`];
587	size_t size = buf.read(s, `36`);
588
589	if (size != `36`)
590	{
591	s[size] = `0`;
592	throw Exception (std::string ("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID);
593	}
594
595	parseUUID(reinterpret_cast<const UInt8 >(s), std::reverse_iterator<UInt8 >(reinterpret_cast<UInt8 *>(&uuid) + `16`));
596	}
597
598
599	template <typename T>
600	inline T parse(const char * data, size_t size);
601
602	template <typename T>
603	inline T parseFromString(const String & str)
604	{
605	return parse<T>(str.data(), str.size());
606	}
607
608	#pragma GCC diagnostic push
609	#pragma GCC diagnostic ignored "-Wredundant-decls"
610	// Just dont mess with it. If the redundant redeclaration is removed then ReaderHelpers.h should be included.
611	// This leads to Arena.h inclusion which has a problem with ASAN stuff included properly and messing macro definition
612	// which intefrers with... You dont want to know, really.
613	UInt128 stringToUUID(const String & str);
614	#pragma GCC diagnostic pop
615
616	template <typename ReturnType = void>
617	ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut);
618
619	/* In YYYY-MM-DD hh:mm:ss format, according to specified time zone.*
620	* As an exception, also supported parsing of unix timestamp in form of decimal number.
621	*/
622	template <typename ReturnType = void>
623	inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
624	{
625	/* Read 10 characters, that could represent unix timestamp.*
626	* Only unix timestamp of 5-10 characters is supported.
627	* Then look at 5th character. If it is a number - treat whole as unix timestamp.
628	* If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss format.
629	*/
630
631	/// Optimistic path, when whole value is in buffer.
632	const char * s = buf.position();
633	if (s + `19` <= buf.buffer().end())
634	{
635	if (s[`4`] < `'0'` \|\| s[`4`] > `'9'`)
636	{
637	UInt16 year = (s[`0`] - `'0'`) * `1000` + (s[`1`] - `'0'`) * `100` + (s[`2`] - `'0'`) * `10` + (s[`3`] - `'0'`);
638	UInt8 month = (s[`5`] - `'0'`) * `10` + (s[`6`] - `'0'`);
639	UInt8 day = (s[`8`] - `'0'`) * `10` + (s[`9`] - `'0'`);
640
641	UInt8 hour = (s[`11`] - `'0'`) * `10` + (s[`12`] - `'0'`);
642	UInt8 minute = (s[`14`] - `'0'`) * `10` + (s[`15`] - `'0'`);
643	UInt8 second = (s[`17`] - `'0'`) * `10` + (s[`18`] - `'0'`);
644
645	if (unlikely(year == `0`))
646	datetime = `0`;
647	else
648	datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
649
650	buf.position() += `19`;
651	return ReturnType(true);
652	}
653	else
654	/// Why not readIntTextUnsafe? Because for needs of AdFox, parsing of unix timestamp with leading zeros is supported: 000...NNNN.
655	return readIntTextImpl<time_t, ReturnType, ReadIntTextCheckOverflow::CHECK_OVERFLOW>(datetime, buf);
656	}
657	else
658	return readDateTimeTextFallback<ReturnType>(datetime, buf, date_lut);
659	}
660
661	template <typename ReturnType>
662	inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut)
663	{
664	time_t whole;
665	if (!readDateTimeTextImpl<bool>(whole, buf, date_lut))
666	{
667	return ReturnType(false);
668	}
669
670	DB::DecimalUtils::DecimalComponents<DateTime64::NativeType> c{static_cast<DateTime64::NativeType>(whole), `0`};
671
672	if (!buf.eof() && *buf.position() == `'.'`)
673	{
674	buf.ignore(`1`); // skip separator
675	const auto pos_before_fractional = buf.count();
676	if (!tryReadIntText<ReadIntTextCheckOverflow::CHECK_OVERFLOW>(c.fractional, buf))
677	{
678	return ReturnType(false);
679	}
680
681	// Adjust fractional part to the scale, since decimalFromComponents knows nothing
682	// about convention of ommiting trailing zero on fractional part
683	// and assumes that fractional part value is less than 10^scale.
684
685	// If scale is 3, but we read '12', promote fractional part to '120'.
686	// And vice versa: if we read '1234', denote it to '123'.
687	const auto fractional_length = static_cast<Int32>(buf.count() - pos_before_fractional);
688	if (const auto adjust_scale = static_cast<Int32>(scale) - fractional_length; adjust_scale > `0`)
689	{
690	c.fractional *= common::exp10_i64(adjust_scale);
691	}
692	else if (adjust_scale < `0`)
693	{
694	c.fractional /= common::exp10_i64(-`1` * adjust_scale);
695	}
696	}
697
698	datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(c, scale);
699
700	return ReturnType(true);
701	}
702
703	inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
704	{
705	readDateTimeTextImpl<void>(datetime, buf, date_lut);
706	}
707
708	inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
709	{
710	readDateTimeTextImpl<void>(datetime64, scale, buf, date_lut);
711	}
712
713	inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
714	{
715	return readDateTimeTextImpl<bool>(datetime, buf, date_lut);
716	}
717
718	inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance())
719	{
720	return readDateTimeTextImpl<bool>(datetime64, scale, buf, date_lut);
721	}
722
723	inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf)
724	{
725	char s[`19`];
726	size_t size = buf.read(s, `19`);
727	if (`19` != size)
728	{
729	s[size] = `0`;
730	throw Exception (std::string ("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
731	}
732
733	datetime.year((s[`0`] - `'0'`) * `1000` + (s[`1`] - `'0'`) * `100` + (s[`2`] - `'0'`) * `10` + (s[`3`] - `'0'`));
734	datetime.month((s[`5`] - `'0'`) * `10` + (s[`6`] - `'0'`));
735	datetime.day((s[`8`] - `'0'`) * `10` + (s[`9`] - `'0'`));
736
737	datetime.hour((s[`11`] - `'0'`) * `10` + (s[`12`] - `'0'`));
738	datetime.minute((s[`14`] - `'0'`) * `10` + (s[`15`] - `'0'`));
739	datetime.second((s[`17`] - `'0'`) * `10` + (s[`18`] - `'0'`));
740	}
741
742
743	/// Generic methods to read value in native binary format.
744	template <typename T>
745	inline std::enable_if_t<is_arithmetic_v<T>, void>
746	readBinary(T & x, ReadBuffer & buf) { readPODBinary(x, buf); }
747
748	inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); }
749	inline void readBinary(Int128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
750	inline void readBinary(UInt128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
751	inline void readBinary(UInt256 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
752	inline void readBinary(Decimal32 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
753	inline void readBinary(Decimal64 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
754	inline void readBinary(Decimal128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
755	inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); }
756
757
758	/// Generic methods to read value in text tab-separated format.
759	template <typename T>
760	inline std::enable_if_t<is_integral_v<T>, void>
761	readText(T & x, ReadBuffer & buf) { readIntText(x, buf); }
762
763	template <typename T>
764	inline std::enable_if_t<std::is_floating_point_v<T>, void>
765	readText(T & x, ReadBuffer & buf) { readFloatText(x, buf); }
766
767	inline void readText(bool & x, ReadBuffer & buf) { readBoolText(x, buf); }
768	inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); }
769	inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); }
770	inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); }
771	inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); }
772	[[noreturn]] inline void readText(UInt128 &, ReadBuffer &)
773	{
774	/* Because UInt128 isn't a natural type, without arithmetic operator and only use as an intermediary type -for UUID-*
775	* it should never arrive here. But because we used the DataTypeNumber class we should have at least a definition of it.
776	*/
777	throw Exception ("UInt128 cannot be read as a text", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
778	}
779
780	/// Generic methods to read value in text format,
781	/// possibly in single quotes (only for data types that use quotes in VALUES format of INSERT statement in SQL).
782	template <typename T>
783	inline std::enable_if_t<is_arithmetic_v<T>, void>
784	readQuoted(T & x, ReadBuffer & buf) { readText(x, buf); }
785
786	inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); }
787
788	inline void readQuoted(LocalDate & x, ReadBuffer & buf)
789	{
790	assertChar(`'\''`, buf);
791	readDateText(x, buf);
792	assertChar(`'\''`, buf);
793	}
794
795	inline void readQuoted(LocalDateTime & x, ReadBuffer & buf)
796	{
797	assertChar(`'\''`, buf);
798	readDateTimeText(x, buf);
799	assertChar(`'\''`, buf);
800	}
801
802
803	/// Same as above, but in double quotes.
804	template <typename T>
805	inline std::enable_if_t<is_arithmetic_v<T>, void>
806	readDoubleQuoted(T & x, ReadBuffer & buf) { readText(x, buf); }
807
808	inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); }
809
810	inline void readDoubleQuoted(LocalDate & x, ReadBuffer & buf)
811	{
812	assertChar(`'"'`, buf);
813	readDateText(x, buf);
814	assertChar(`'"'`, buf);
815	}
816
817	inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf)
818	{
819	assertChar(`'"'`, buf);
820	readDateTimeText(x, buf);
821	assertChar(`'"'`, buf);
822	}
823
824
825	/// CSV, for numbers, dates: quotes are optional, no special escaping rules.
826	template <typename T>
827	inline void readCSVSimple(T & x, ReadBuffer & buf)
828	{
829	if (buf.eof())
830	throwReadAfterEOF();
831
832	char maybe_quote = *buf.position();
833
834	if (maybe_quote == `'\''` \|\| maybe_quote == `'\"'`)
835	++buf.position();
836
837	readText(x, buf);
838
839	if (maybe_quote == `'\''` \|\| maybe_quote == `'\"'`)
840	assertChar(maybe_quote, buf);
841	}
842
843	template <typename T>
844	inline std::enable_if_t<is_arithmetic_v<T>, void>
845	readCSV(T & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
846
847	inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); }
848	inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
849	inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
850	inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
851	[[noreturn]] inline void readCSV(UInt128 &, ReadBuffer &)
852	{
853	/* Because UInt128 isn't a natural type, without arithmetic operator and only use as an intermediary type -for UUID-*
854	* it should never arrive here. But because we used the DataTypeNumber class we should have at least a definition of it.
855	*/
856	throw Exception ("UInt128 cannot be read as a text", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
857	}
858
859	template <typename T>
860	void readBinary(std::vector<T> & x, ReadBuffer & buf)
861	{
862	size_t size = `0`;
863	readVarUInt(size, buf);
864
865	if (size > DEFAULT_MAX_STRING_SIZE)
866	throw Poco::Exception ("Too large vector size.");
867
868	x.resize(size);
869	for (size_t i = `0`; i < size; ++i)
870	readBinary(x[i], buf);
871	}
872
873	template <typename T>
874	void readQuoted(std::vector<T> & x, ReadBuffer & buf)
875	{
876	bool first = true;
877	assertChar(`'['`, buf);
878	while (!buf.eof() && *buf.position() != `']'`)
879	{
880	if (!first)
881	{
882	if (*buf.position() == `','`)
883	++buf.position();
884	else
885	throw Exception ("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
886	}
887
888	first = false;
889
890	x.push_back(T());
891	readQuoted(x.back(), buf);
892	}
893	assertChar(`']'`, buf);
894	}
895
896	template <typename T>
897	void readDoubleQuoted(std::vector<T> & x, ReadBuffer & buf)
898	{
899	bool first = true;
900	assertChar(`'['`, buf);
901	while (!buf.eof() && *buf.position() != `']'`)
902	{
903	if (!first)
904	{
905	if (*buf.position() == `','`)
906	++buf.position();
907	else
908	throw Exception ("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
909	}
910
911	first = false;
912
913	x.push_back(T());
914	readDoubleQuoted(x.back(), buf);
915	}
916	assertChar(`']'`, buf);
917	}
918
919	template <typename T>
920	void readText(std::vector<T> & x, ReadBuffer & buf)
921	{
922	readQuoted(x, buf);
923	}
924
925
926	/// Skip whitespace characters.
927	inline void skipWhitespaceIfAny(ReadBuffer & buf)
928	{
929	while (!buf.eof() && isWhitespaceASCII(*buf.position()))
930	++buf.position();
931	}
932
933	/// Skips json value.
934	void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field);
935
936
937	/* Read serialized exception.*
938	* During serialization/deserialization some information is lost
939	* (type is cut to base class, 'message' replaced by 'displayText', and stack trace is appended to 'message')
940	* Some additional message could be appended to exception (example: you could add information about from where it was received).
941	*/
942	void readException(Exception & e, ReadBuffer & buf, const String & additional_message = "");
943	void readAndThrowException(ReadBuffer & buf, const String & additional_message = "");
944
945
946	/* Helper function for implementation.*
947	*/
948	template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T>
949	static inline const char * tryReadIntText(T & x, const char * pos, const char * end)
950	{
951	ReadBufferFromMemory in(pos, end - pos);
952	tryReadIntText<check_overflow>(x, in);
953	return pos + in.count();
954	}
955
956
957	/// Convenient methods for reading something from string in text format.
958	template <typename T>
959	inline T parse(const char * data, size_t size)
960	{
961	T res;
962	ReadBufferFromMemory buf(data, size);
963	readText(res, buf);
964	return res;
965	}
966
967	/// Read something from text format, but expect complete parse of given text
968	/// For example: 723145 -- ok, 213MB -- not ok
969	template <typename T>
970	inline T completeParse(const char * data, size_t size)
971	{
972	T res;
973	ReadBufferFromMemory buf(data, size);
974	readText(res, buf);
975	assertEOF(buf);
976	return res;
977	}
978
979	template <typename T>
980	inline T completeParse(const String & s)
981	{
982	return completeParse<T>(s.data(), s.size());
983	}
984
985	template <typename T>
986	inline T completeParse(const char * data)
987	{
988	return completeParse<T>(data, strlen(data));
989	}
990
991	template <typename T>
992	inline T parse(const char * data)
993	{
994	return parse<T>(data, strlen(data));
995	}
996
997	template <typename T>
998	inline T parse(const String & s)
999	{
1000	return parse<T>(s.data(), s.size());
1001	}
1002
1003
1004	/* Skip UTF-8 BOM if it is under cursor.*
1005	* As BOM is usually located at start of stream, and buffer size is usually larger than three bytes,
1006	* the function expects, that all three bytes of BOM is fully in buffer (otherwise it don't skip anything).
1007	*/
1008	inline void skipBOMIfExists(ReadBuffer & buf)
1009	{
1010	if (!buf.eof()
1011	&& buf.position() + `3` < buf.buffer().end()
1012	&& buf.position()[`0`] == `'\xEF'`
1013	&& buf.position()[`1`] == `'\xBB'`
1014	&& buf.position()[`2`] == `'\xBF'`)
1015	{
1016	buf.position() += `3`;
1017	}
1018	}
1019
1020
1021	/// Skip to next character after next \n. If no \n in stream, skip to end.
1022	void skipToNextLineOrEOF(ReadBuffer & buf);
1023
1024	/// Skip to next character after next unescaped \n. If no \n in stream, skip to end. Does not throw on invalid escape sequences.
1025	void skipToUnescapedNextLineOrEOF(ReadBuffer & buf);
1026
1027	template <class TReadBuffer, class... Types>
1028	std::unique_ptr<ReadBuffer> getReadBuffer(const DB::CompressionMethod method, Types&&... args)
1029	{
1030	if (method == DB::CompressionMethod::Gzip)
1031	{
1032	auto read_buf = std::make_unique<TReadBuffer>(std::forward<Types>(args)...);
1033	return std::make_unique<ZlibInflatingReadBuffer>(std::move(read_buf), method);
1034	}
1035	return std::make_unique<TReadBuffer>(args...);
1036	}
1037
1038	/* This function just copies the data from buffer's internal position (in.position())*
1039	* to current position (from arguments) into memory.
1040	*/
1041	void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current);
1042
1043	/* This function is negative to eof().*
1044	* In fact it returns whether the data was loaded to internal ReadBuffers's buffer or not.
1045	* And saves data from buffer's position to current if there is no pending data in buffer.
1046	* Why we have to use this strange function? Consider we have buffer's internal position in the middle
1047	* of our buffer and the current cursor in the end of the buffer. When we call eof() it calls next().
1048	* And this function can fill the buffer with new data, so we will lose the data from previous buffer state.
1049	*/
1050	bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current);
1051
1052	}
1053

Browse the source code of ClickHouse/dbms/src/IO/ReadHelpers.h