ReadHelpers.cpp source code [ClickHouse/dbms/src/IO/ReadHelpers.cpp]

1	#include <Core/Defines.h>
2	#include <Common/hex.h>
3	#include <Common/PODArray.h>
4	#include <Common/StringUtils/StringUtils.h>
5	#include <Common/memcpySmall.h>
6	#include <Formats/FormatSettings.h>
7	#include <IO/WriteHelpers.h>
8	#include <IO/WriteBufferFromString.h>
9	#include <IO/readFloatText.h>
10	#include <IO/Operators.h>
11	#include <common/find_symbols.h>
12	#include <stdlib.h>
13
14	#ifdef __SSE2__
15	#include <emmintrin.h>
16	#endif
17
18	namespace DB
19	{
20
21	namespace ErrorCodes
22	{
23	extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
24	extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
25	extern const int CANNOT_PARSE_QUOTED_STRING;
26	extern const int CANNOT_PARSE_DATETIME;
27	extern const int CANNOT_PARSE_DATE;
28	extern const int INCORRECT_DATA;
29	}
30
31	template <typename IteratorSrc, typename IteratorDst>
32	void parseHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes)
33	{
34	size_t src_pos = `0`;
35	size_t dst_pos = `0`;
36	for (; dst_pos < num_bytes; ++dst_pos)
37	{
38	dst[dst_pos] = UInt8(unhex(src[src_pos])) * `16` + UInt8(unhex(src[src_pos + `1`]));
39	src_pos += `2`;
40	}
41	}
42
43	void parseUUID(const UInt8 * src36, UInt8 * dst16)
44	{
45	/// If string is not like UUID - implementation specific behaviour.
46
47	parseHex(&src36[`0`], &dst16[`0`], `4`);
48	parseHex(&src36[`9`], &dst16[`4`], `2`);
49	parseHex(&src36[`14`], &dst16[`6`], `2`);
50	parseHex(&src36[`19`], &dst16[`8`], `2`);
51	parseHex(&src36[`24`], &dst16[`10`], `6`);
52	}
53
54	/* Function used when byte ordering is important when parsing uuid*
55	* ex: When we create an UUID type
56	*/
57	void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16)
58	{
59	/// If string is not like UUID - implementation specific behaviour.
60
61	/// FIXME This code looks like trash.
62	parseHex(&src36[`0`], dst16 + `8`, `4`);
63	parseHex(&src36[`9`], dst16 + `12`, `2`);
64	parseHex(&src36[`14`], dst16 + `14`, `2`);
65	parseHex(&src36[`19`], dst16, `2`);
66	parseHex(&src36[`24`], dst16 + `2`, `6`);
67	}
68
69	UInt128 stringToUUID(const String & str)
70	{
71	return parseFromString<UUID>(str);
72	}
73
74	void NO_INLINE throwAtAssertionFailed(const char * s, ReadBuffer & buf)
75	{
76	WriteBufferFromOwnString out;
77	out << "Cannot parse input: expected " << escape << s;
78
79	if (buf.eof())
80	out << " at end of stream.";
81	else
82	out << " before: " << escape << String (buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position()));
83
84	throw Exception (out.str(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
85	}
86
87
88	bool checkString(const char * s, ReadBuffer & buf)
89	{
90	for (; *s; ++s)
91	{
92	if (buf.eof() \|\| buf.position() != s)
93	return false;
94	++buf.position();
95	}
96	return true;
97	}
98
99
100	bool checkStringCaseInsensitive(const char * s, ReadBuffer & buf)
101	{
102	for (; *s; ++s)
103	{
104	if (buf.eof())
105	return false;
106
107	char c = *buf.position();
108	if (!equalsCaseInsensitive(*s, c))
109	return false;
110
111	++buf.position();
112	}
113	return true;
114	}
115
116
117	void assertString(const char * s, ReadBuffer & buf)
118	{
119	if (!checkString(s, buf))
120	throwAtAssertionFailed(s, buf);
121	}
122
123
124	void assertEOF(ReadBuffer & buf)
125	{
126	if (!buf.eof())
127	throwAtAssertionFailed("eof", buf);
128	}
129
130
131	void assertStringCaseInsensitive(const char * s, ReadBuffer & buf)
132	{
133	if (!checkStringCaseInsensitive(s, buf))
134	throwAtAssertionFailed(s, buf);
135	}
136
137
138	bool checkStringByFirstCharacterAndAssertTheRest(const char * s, ReadBuffer & buf)
139	{
140	if (buf.eof() \|\| buf.position() != s)
141	return false;
142
143	assertString(s, buf);
144	return true;
145	}
146
147	bool checkStringByFirstCharacterAndAssertTheRestCaseInsensitive(const char * s, ReadBuffer & buf)
148	{
149	if (buf.eof())
150	return false;
151
152	char c = *buf.position();
153	if (!equalsCaseInsensitive(*s, c))
154	return false;
155
156	assertStringCaseInsensitive(s, buf);
157	return true;
158	}
159
160
161	template <typename T>
162	static void appendToStringOrVector(T & s, ReadBuffer & rb, const char * end)
163	{
164	s.append(rb.position(), end - rb.position());
165	}
166
167	template <>
168	inline void appendToStringOrVector(PaddedPODArray<UInt8> & s, ReadBuffer & rb, const char * end)
169	{
170	if (rb.isPadded())
171	s.insertSmallAllowReadWriteOverflow15(rb.position(), end);
172	else
173	s.insert(rb.position(), end);
174	}
175
176	template <>
177	inline void appendToStringOrVector(PODArray<char> & s, ReadBuffer & rb, const char * end)
178	{
179	s.insert(rb.position(), end);
180	}
181
182	template <typename Vector>
183	void readStringInto(Vector & s, ReadBuffer & buf)
184	{
185	while (!buf.eof())
186	{
187	char * next_pos = find_first_symbols<`'\t'`, `'\n'`>(buf.position(), buf.buffer().end());
188
189	appendToStringOrVector(s, buf, next_pos);
190	buf.position() = next_pos;
191
192	if (buf.hasPendingData())
193	return;
194	}
195	}
196
197	template <typename Vector>
198	void readNullTerminated(Vector & s, ReadBuffer & buf)
199	{
200	while (!buf.eof())
201	{
202	char * next_pos = find_first_symbols<`'\0'`>(buf.position(), buf.buffer().end());
203
204	appendToStringOrVector(s, buf, next_pos);
205	buf.position() = next_pos;
206
207	if (buf.hasPendingData())
208	break;
209	}
210	buf.ignore();
211	}
212
213	template void readNullTerminated<PODArray<char>>(PODArray<char> & s, ReadBuffer & buf);
214	template void readNullTerminated<String>(String & s, ReadBuffer & buf);
215
216	void readString(String & s, ReadBuffer & buf)
217	{
218	s.clear();
219	readStringInto(s, buf);
220	}
221
222	template void readStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
223
224
225	template <typename Vector>
226	void readStringUntilEOFInto(Vector & s, ReadBuffer & buf)
227	{
228	while (!buf.eof())
229	{
230	appendToStringOrVector(s, buf, buf.buffer().end());
231	buf.position() = buf.buffer().end();
232
233	if (buf.hasPendingData())
234	return;
235	}
236	}
237
238
239	void readStringUntilEOF(String & s, ReadBuffer & buf)
240	{
241	s.clear();
242	readStringUntilEOFInto(s, buf);
243	}
244
245	template <typename Vector>
246	void readEscapedStringUntilEOLInto(Vector & s, ReadBuffer & buf)
247	{
248	while (!buf.eof())
249	{
250	char * next_pos = find_first_symbols<`'\n'`, `'\\'`>(buf.position(), buf.buffer().end());
251
252	appendToStringOrVector(s, buf, next_pos);
253	buf.position() = next_pos;
254
255	if (!buf.hasPendingData())
256	continue;
257
258	if (*buf.position() == `'\n'`)
259	return;
260
261	if (*buf.position() == `'\\'`)
262	parseComplexEscapeSequence(s, buf);
263	}
264	}
265
266
267	void readEscapedStringUntilEOL(String & s, ReadBuffer & buf)
268	{
269	s.clear();
270	readEscapedStringUntilEOLInto(s, buf);
271	}
272
273	template void readStringUntilEOFInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
274
275
276	/* Parse the escape sequence, which can be simple (one character after backslash) or more complex (multiple characters).*
277	* It is assumed that the cursor is located on the `\` symbol
278	*/
279	template <typename Vector>
280	static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
281	{
282	++buf.position();
283	if (buf.eof())
284	throw Exception ("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
285
286	if (*buf.position() == `'x'`)
287	{
288	++buf.position();
289	/// escape sequence of the form \xAA
290	char hex_code[`2`];
291	readPODBinary(hex_code, buf);
292	s.push_back(unhex2(hex_code));
293	}
294	else if (*buf.position() == `'N'`)
295	{
296	/// Support for NULLs: \N sequence must be parsed as empty string.
297	++buf.position();
298	}
299	else
300	{
301	/// The usual escape sequence of a single character.
302	s.push_back(parseEscapeSequence(*buf.position()));
303	++buf.position();
304	}
305	}
306
307
308	template <typename Vector, typename ReturnType>
309	static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
310	{
311	static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
312
313	auto error = [](const char * message [[maybe_unused]], int code [[maybe_unused]])
314	{
315	if constexpr (throw_exception)
316	throw Exception (message, code);
317	return ReturnType(false);
318	};
319
320	++buf.position();
321	if (buf.eof())
322	return error("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
323
324	switch (*buf.position())
325	{
326	case `'"'`:
327	s.push_back(`'"'`);
328	break;
329	case `'\\'`:
330	s.push_back(`'\\'`);
331	break;
332	case `'/'`:
333	s.push_back(`'/'`);
334	break;
335	case `'b'`:
336	s.push_back(`'\b'`);
337	break;
338	case `'f'`:
339	s.push_back(`'\f'`);
340	break;
341	case `'n'`:
342	s.push_back(`'\n'`);
343	break;
344	case `'r'`:
345	s.push_back(`'\r'`);
346	break;
347	case `'t'`:
348	s.push_back(`'\t'`);
349	break;
350	case `'u'`:
351	{
352	++buf.position();
353
354	char hex_code[`4`];
355	if (`4` != buf.read(hex_code, `4`))
356	return error("Cannot parse escape sequence: less than four bytes after \\u", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
357
358	/// \u0000 - special case
359	if (`0` == memcmp(hex_code, "0000", `4`))
360	{
361	s.push_back(`0`);
362	return ReturnType(true);
363	}
364
365	UInt16 code_point = unhex4(hex_code);
366
367	if (code_point <= `0x7F`)
368	{
369	s.push_back(code_point);
370	}
371	else if (code_point <= `0x07FF`)
372	{
373	s.push_back(((code_point >> `6`) & `0x1F`) \| `0xC0`);
374	s.push_back((code_point & `0x3F`) \| `0x80`);
375	}
376	else
377	{
378	/// Surrogate pair.
379	if (code_point >= `0xD800` && code_point <= `0xDBFF`)
380	{
381	if (!checkString("\\u", buf))
382	return error("Cannot parse escape sequence: missing second part of surrogate pair", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
383
384	char second_hex_code[`4`];
385	if (`4` != buf.read(second_hex_code, `4`))
386	return error("Cannot parse escape sequence: less than four bytes after \\u of second part of surrogate pair",
387	ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
388
389	UInt16 second_code_point = unhex4(second_hex_code);
390
391	if (second_code_point >= `0xDC00` && second_code_point <= `0xDFFF`)
392	{
393	UInt32 full_code_point = `0x10000` + (code_point - `0xD800`) * `1024` + (second_code_point - `0xDC00`);
394
395	s.push_back(((full_code_point >> `18`) & `0x07`) \| `0xF0`);
396	s.push_back(((full_code_point >> `12`) & `0x3F`) \| `0x80`);
397	s.push_back(((full_code_point >> `6`) & `0x3F`) \| `0x80`);
398	s.push_back((full_code_point & `0x3F`) \| `0x80`);
399	}
400	else
401	return error("Incorrect surrogate pair of unicode escape sequences in JSON", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
402	}
403	else
404	{
405	s.push_back(((code_point >> `12`) & `0x0F`) \| `0xE0`);
406	s.push_back(((code_point >> `6`) & `0x3F`) \| `0x80`);
407	s.push_back((code_point & `0x3F`) \| `0x80`);
408	}
409	}
410
411	return ReturnType(true);
412	}
413	default:
414	s.push_back(*buf.position());
415	break;
416	}
417
418	++buf.position();
419	return ReturnType(true);
420	}
421
422
423	template <typename Vector>
424	void readEscapedStringInto(Vector & s, ReadBuffer & buf)
425	{
426	while (!buf.eof())
427	{
428	char * next_pos = find_first_symbols<`'\t'`, `'\n'`, `'\\'`>(buf.position(), buf.buffer().end());
429
430	appendToStringOrVector(s, buf, next_pos);
431	buf.position() = next_pos;
432
433	if (!buf.hasPendingData())
434	continue;
435
436	if (buf.position() == `'\t'` \|\| buf.position() == `'\n'`)
437	return;
438
439	if (*buf.position() == `'\\'`)
440	parseComplexEscapeSequence(s, buf);
441	}
442	}
443
444	void readEscapedString(String & s, ReadBuffer & buf)
445	{
446	s.clear();
447	readEscapedStringInto(s, buf);
448	}
449
450	template void readEscapedStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
451	template void readEscapedStringInto<NullSink>(NullSink & s, ReadBuffer & buf);
452
453
454	/* If enable_sql_style_quoting == true,*
455	* strings like 'abc''def' will be parsed as abc'def.
456	* Please note, that even with SQL style quoting enabled,
457	* backslash escape sequences are also parsed,
458	* that could be slightly confusing.
459	*/
460	template <char quote, bool enable_sql_style_quoting, typename Vector>
461	static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf)
462	{
463	if (buf.eof() \|\| *buf.position() != quote)
464	throw Exception ("Cannot parse quoted string: expected opening quote",
465	ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
466	++buf.position();
467
468	while (!buf.eof())
469	{
470	char * next_pos = find_first_symbols<`'\\'`, quote>(buf.position(), buf.buffer().end());
471
472	appendToStringOrVector(s, buf, next_pos);
473	buf.position() = next_pos;
474
475	if (!buf.hasPendingData())
476	continue;
477
478	if (*buf.position() == quote)
479	{
480	++buf.position();
481
482	if (enable_sql_style_quoting && !buf.eof() && *buf.position() == quote)
483	{
484	s.push_back(quote);
485	++buf.position();
486	continue;
487	}
488
489	return;
490	}
491
492	if (*buf.position() == `'\\'`)
493	parseComplexEscapeSequence(s, buf);
494	}
495
496	throw Exception ("Cannot parse quoted string: expected closing quote",
497	ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
498	}
499
500	template <bool enable_sql_style_quoting, typename Vector>
501	void readQuotedStringInto(Vector & s, ReadBuffer & buf)
502	{
503	readAnyQuotedStringInto<`'\''`, enable_sql_style_quoting>(s, buf);
504	}
505
506	template <bool enable_sql_style_quoting, typename Vector>
507	void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf)
508	{
509	readAnyQuotedStringInto<`'"'`, enable_sql_style_quoting>(s, buf);
510	}
511
512	template <bool enable_sql_style_quoting, typename Vector>
513	void readBackQuotedStringInto(Vector & s, ReadBuffer & buf)
514	{
515	readAnyQuotedStringInto<'`', enable_sql_style_quoting>(s, buf);
516	}
517
518
519	void readQuotedString(String & s, ReadBuffer & buf)
520	{
521	s.clear();
522	readQuotedStringInto<false>(s, buf);
523	}
524
525	void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
526	{
527	s.clear();
528	readQuotedStringInto<true>(s, buf);
529	}
530
531
532	template void readQuotedStringInto<true>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
533	template void readDoubleQuotedStringInto<false>(NullSink & s, ReadBuffer & buf);
534
535	void readDoubleQuotedString(String & s, ReadBuffer & buf)
536	{
537	s.clear();
538	readDoubleQuotedStringInto<false>(s, buf);
539	}
540
541	void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
542	{
543	s.clear();
544	readDoubleQuotedStringInto<true>(s, buf);
545	}
546
547	void readBackQuotedString(String & s, ReadBuffer & buf)
548	{
549	s.clear();
550	readBackQuotedStringInto<false>(s, buf);
551	}
552
553	void readBackQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
554	{
555	s.clear();
556	readBackQuotedStringInto<true>(s, buf);
557	}
558
559
560	template <typename Vector>
561	void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings)
562	{
563	if (buf.eof())
564	throwReadAfterEOF();
565
566	const char delimiter = settings.delimiter;
567	const char maybe_quote = *buf.position();
568
569	/// Emptiness and not even in quotation marks.
570	if (maybe_quote == delimiter)
571	return;
572
573	if ((settings.allow_single_quotes && maybe_quote == `'\''`) \|\| (settings.allow_double_quotes && maybe_quote == `'"'`))
574	{
575	++buf.position();
576
577	/// The quoted case. We are looking for the next quotation mark.
578	while (!buf.eof())
579	{
580	char * next_pos = reinterpret_cast<char *>(memchr(buf.position(), maybe_quote, buf.buffer().end() - buf.position()));
581
582	if (nullptr == next_pos)
583	next_pos = buf.buffer().end();
584
585	appendToStringOrVector(s, buf, next_pos);
586	buf.position() = next_pos;
587
588	if (!buf.hasPendingData())
589	continue;
590
591	/// Now there is a quotation mark under the cursor. Is there any following?
592	++buf.position();
593	if (buf.eof())
594	return;
595
596	if (*buf.position() == maybe_quote)
597	{
598	s.push_back(maybe_quote);
599	++buf.position();
600	continue;
601	}
602
603	return;
604	}
605	}
606	else
607	{
608	/// Unquoted case. Look for delimiter or \r or \n.
609	while (!buf.eof())
610	{
611	char * next_pos = buf.position();
612
613	[&]()
614	{
615	#ifdef __SSE2__
616	auto rc = _mm_set1_epi8(`'\r'`);
617	auto nc = _mm_set1_epi8(`'\n'`);
618	auto dc = _mm_set1_epi8(delimiter);
619	for (; next_pos + `15` < buf.buffer().end(); next_pos += `16`)
620	{
621	__m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(next_pos));
622	auto eq = _mm_or_si128(_mm_or_si128(_mm_cmpeq_epi8(bytes, rc), _mm_cmpeq_epi8(bytes, nc)), _mm_cmpeq_epi8(bytes, dc));
623	uint16_t bit_mask = _mm_movemask_epi8(eq);
624	if (bit_mask)
625	{
626	next_pos += __builtin_ctz(bit_mask);
627	return;
628	}
629	}
630	#endif
631	while (next_pos < buf.buffer().end()
632	&& next_pos != delimiter && next_pos != `'\r'` && *next_pos != `'\n'`)
633	++next_pos;
634	}();
635
636
637	appendToStringOrVector(s, buf, next_pos);
638	buf.position() = next_pos;
639
640	if (!buf.hasPendingData())
641	continue;
642
643	/* CSV format can contain insignificant spaces and tabs.*
644	* Usually the task of skipping them is for the calling code.
645	* But in this case, it will be difficult to do this, so remove the trailing whitespace by yourself.
646	*/
647	size_t size = s.size();
648	while (size > `0`
649	&& (s[size - `1`] == `' '` \|\| s[size - `1`] == `'\t'`))
650	--size;
651
652	s.resize(size);
653	return;
654	}
655	}
656	}
657
658	void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings)
659	{
660	s.clear();
661	readCSVStringInto(s, buf, settings);
662	}
663
664	template void readCSVStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
665
666
667	template <typename Vector, typename ReturnType>
668	ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf)
669	{
670	static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
671
672	auto error = [](const char * message [[maybe_unused]], int code [[maybe_unused]])
673	{
674	if constexpr (throw_exception)
675	throw Exception (message, code);
676	return ReturnType(false);
677	};
678
679	if (buf.eof() \|\| *buf.position() != `'"'`)
680	return error("Cannot parse JSON string: expected opening quote", ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
681	++buf.position();
682
683	while (!buf.eof())
684	{
685	char * next_pos = find_first_symbols<`'\\'`, `'"'`>(buf.position(), buf.buffer().end());
686
687	appendToStringOrVector(s, buf, next_pos);
688	buf.position() = next_pos;
689
690	if (!buf.hasPendingData())
691	continue;
692
693	if (*buf.position() == `'"'`)
694	{
695	++buf.position();
696	return ReturnType(true);
697	}
698
699	if (*buf.position() == `'\\'`)
700	parseJSONEscapeSequence<Vector, ReturnType>(s, buf);
701	}
702
703	return error("Cannot parse JSON string: expected closing quote", ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
704	}
705
706	void readJSONString(String & s, ReadBuffer & buf)
707	{
708	s.clear();
709	readJSONStringInto(s, buf);
710	}
711
712	template void readJSONStringInto<PaddedPODArray<UInt8>, void>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
713	template bool readJSONStringInto<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
714	template void readJSONStringInto<NullSink>(NullSink & s, ReadBuffer & buf);
715	template void readJSONStringInto<String>(String & s, ReadBuffer & buf);
716
717
718	template <typename ReturnType>
719	ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf)
720	{
721	static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
722
723	auto error = []
724	{
725	if constexpr (throw_exception)
726	throw Exception ("Cannot parse date: value is too short", ErrorCodes::CANNOT_PARSE_DATE);
727	return ReturnType(false);
728	};
729
730	auto ignore_delimiter = [&]
731	{
732	if (!buf.eof())
733	{
734	++buf.position();
735	return true;
736	}
737	else
738	return false;
739	};
740
741	auto append_digit = [&](auto & x)
742	{
743	if (!buf.eof() && isNumericASCII(*buf.position()))
744	{
745	x = x * `10` + (*buf.position() - `'0'`);
746	++buf.position();
747	return true;
748	}
749	else
750	return false;
751	};
752
753	UInt16 year = `0`;
754	if (!append_digit(year)
755	\|\| !append_digit(year)
756	\|\| !append_digit(year)
757	\|\| !append_digit(year))
758	return error();
759
760	if (!ignore_delimiter())
761	return error();
762
763	UInt8 month = `0`;
764	if (!append_digit(month))
765	return error();
766	append_digit(month);
767
768	if (!ignore_delimiter())
769	return error();
770
771	UInt8 day = `0`;
772	if (!append_digit(day))
773	return error();
774	append_digit(day);
775
776	date = LocalDate (year, month, day);
777	return ReturnType(true);
778	}
779
780	template void readDateTextFallback<void>(LocalDate &, ReadBuffer &);
781	template bool readDateTextFallback<bool>(LocalDate &, ReadBuffer &);
782
783
784	template <typename ReturnType>
785	ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
786	{
787	static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
788
789	static constexpr auto DATE_TIME_BROKEN_DOWN_LENGTH = `19`;
790	static constexpr auto UNIX_TIMESTAMP_MAX_LENGTH = `10`;
791
792	char s[DATE_TIME_BROKEN_DOWN_LENGTH];
793	char * s_pos = s;
794
795	/// A piece similar to unix timestamp.
796	while (s_pos < s + UNIX_TIMESTAMP_MAX_LENGTH && !buf.eof() && isNumericASCII(*buf.position()))
797	{
798	s_pos = buf.position();
799	++s_pos;
800	++buf.position();
801	}
802
803	/// 2015-01-01 01:02:03
804	if (s_pos == s + `4` && !buf.eof() && (buf.position() < `'0'` \|\| buf.position() > `'9'`))
805	{
806	const size_t remaining_size = DATE_TIME_BROKEN_DOWN_LENGTH - (s_pos - s);
807	size_t size = buf.read(s_pos, remaining_size);
808	if (remaining_size != size)
809	{
810	s_pos[size] = `0`;
811
812	if constexpr (throw_exception)
813	throw Exception (std::string ("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
814	else
815	return false;
816	}
817
818	UInt16 year = (s[`0`] - `'0'`) * `1000` + (s[`1`] - `'0'`) * `100` + (s[`2`] - `'0'`) * `10` + (s[`3`] - `'0'`);
819	UInt8 month = (s[`5`] - `'0'`) * `10` + (s[`6`] - `'0'`);
820	UInt8 day = (s[`8`] - `'0'`) * `10` + (s[`9`] - `'0'`);
821
822	UInt8 hour = (s[`11`] - `'0'`) * `10` + (s[`12`] - `'0'`);
823	UInt8 minute = (s[`14`] - `'0'`) * `10` + (s[`15`] - `'0'`);
824	UInt8 second = (s[`17`] - `'0'`) * `10` + (s[`18`] - `'0'`);
825
826	if (unlikely(year == `0`))
827	datetime = `0`;
828	else
829	datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
830	}
831	else
832	{
833	/// Only unix timestamp of 5-10 characters is supported. For consistency. See readDateTimeTextImpl.
834	if (s_pos - s >= `5` && s_pos - s <= `10`)
835	{
836	/// Not very efficient.
837	datetime = `0`;
838	for (const char * digit_pos = s; digit_pos < s_pos; ++digit_pos)
839	datetime = datetime * `10` + *digit_pos - `'0'`;
840	}
841	else
842	{
843	if constexpr (throw_exception)
844	throw Exception ("Cannot parse datetime", ErrorCodes::CANNOT_PARSE_DATETIME);
845	else
846	return false;
847	}
848	}
849
850	return ReturnType(true);
851	}
852
853	template void readDateTimeTextFallback<void>(time_t &, ReadBuffer &, const DateLUTImpl &);
854	template bool readDateTimeTextFallback<bool>(time_t &, ReadBuffer &, const DateLUTImpl &);
855
856
857	void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field)
858	{
859	if (buf.eof())
860	throw Exception ("Unexpected EOF for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
861	else if (buf.position() == `'"'`) /// skip double-quoted string*
862	{
863	NullSink sink;
864	readJSONStringInto(sink, buf);
865	}
866	else if (isNumericASCII(buf.position()) \|\| buf.position() == `'-'` \|\| buf.position() == `'+'` \|\| buf.position() == `'.'`) /// skip number
867	{
868	if (*buf.position() == `'+'`)
869	++buf.position();
870
871	double v;
872	if (!tryReadFloatText(v, buf))
873	throw Exception ("Expected a number field for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
874	}
875	else if (buf.position() == `'n'`) /// skip null*
876	{
877	assertString("null", buf);
878	}
879	else if (buf.position() == `'t'`) /// skip true*
880	{
881	assertString("true", buf);
882	}
883	else if (buf.position() == `'f'`) /// skip false*
884	{
885	assertString("false", buf);
886	}
887	else if (*buf.position() == `'['`)
888	{
889	++buf.position();
890	skipWhitespaceIfAny(buf);
891
892	if (!buf.eof() && buf.position() == `']'`) /// skip empty array*
893	{
894	++buf.position();
895	return;
896	}
897
898	while (true)
899	{
900	skipJSONField(buf, name_of_field);
901	skipWhitespaceIfAny(buf);
902
903	if (!buf.eof() && *buf.position() == `','`)
904	{
905	++buf.position();
906	skipWhitespaceIfAny(buf);
907	}
908	else if (!buf.eof() && *buf.position() == `']'`)
909	{
910	++buf.position();
911	break;
912	}
913	else
914	throw Exception ("Unexpected symbol for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
915	}
916	}
917	else if (buf.position() == `'{'`) /// skip whole object*
918	{
919	++buf.position();
920	skipWhitespaceIfAny(buf);
921
922	while (!buf.eof() && *buf.position() != `'}'`)
923	{
924	// field name
925	if (*buf.position() == `'"'`)
926	{
927	NullSink sink;
928	readJSONStringInto(sink, buf);
929	}
930	else
931	throw Exception ("Unexpected symbol for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
932
933	// ':'
934	skipWhitespaceIfAny(buf);
935	if (buf.eof() \|\| !(*buf.position() == `':'`))
936	throw Exception ("Unexpected symbol for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
937	++buf.position();
938	skipWhitespaceIfAny(buf);
939
940	skipJSONField(buf, name_of_field);
941	skipWhitespaceIfAny(buf);
942
943	// optional ','
944	if (!buf.eof() && *buf.position() == `','`)
945	{
946	++buf.position();
947	skipWhitespaceIfAny(buf);
948	}
949	}
950
951	if (buf.eof())
952	throw Exception ("Unexpected EOF for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
953	++buf.position();
954	}
955	else
956	{
957	throw Exception ("Unexpected symbol '" + std::string (*buf.position(), `1`) + "' for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA);
958	}
959	}
960
961
962	void readException(Exception & e, ReadBuffer & buf, const String & additional_message)
963	{
964	int code = `0`;
965	String name;
966	String message;
967	String stack_trace;
968	bool has_nested = false;
969
970	readBinary(code, buf);
971	readBinary(name, buf);
972	readBinary(message, buf);
973	readBinary(stack_trace, buf);
974	readBinary(has_nested, buf);
975
976	WriteBufferFromOwnString out;
977
978	if (!additional_message.empty())
979	out << additional_message << ". ";
980
981	if (name != "DB::Exception")
982	out << name << ". ";
983
984	out << message << ".";
985
986	if (!stack_trace.empty())
987	out << " Stack trace:\n\n" << stack_trace;
988
989	if (has_nested)
990	{
991	Exception nested;
992	readException(nested, buf);
993	e = Exception (out.str(), nested, code);
994	}
995	else
996	e = Exception (out.str(), code);
997	}
998
999	void readAndThrowException(ReadBuffer & buf, const String & additional_message)
1000	{
1001	Exception e;
1002	readException(e, buf, additional_message);
1003	e.rethrow();
1004	}
1005
1006
1007	void skipToNextLineOrEOF(ReadBuffer & buf)
1008	{
1009	while (!buf.eof())
1010	{
1011	char * next_pos = find_first_symbols<`'\n'`>(buf.position(), buf.buffer().end());
1012	buf.position() = next_pos;
1013
1014	if (!buf.hasPendingData())
1015	continue;
1016
1017	if (*buf.position() == `'\n'`)
1018	{
1019	++buf.position();
1020	return;
1021	}
1022	}
1023	}
1024
1025
1026	void skipToUnescapedNextLineOrEOF(ReadBuffer & buf)
1027	{
1028	while (!buf.eof())
1029	{
1030	char * next_pos = find_first_symbols<`'\n'`, `'\\'`>(buf.position(), buf.buffer().end());
1031	buf.position() = next_pos;
1032
1033	if (!buf.hasPendingData())
1034	continue;
1035
1036	if (*buf.position() == `'\n'`)
1037	{
1038	++buf.position();
1039	return;
1040	}
1041
1042	if (*buf.position() == `'\\'`)
1043	{
1044	++buf.position();
1045	if (buf.eof())
1046	return;
1047
1048	/// Skip escaped character. We do not consider escape sequences with more than one character after backslash (\x01).
1049	/// It's ok for the purpose of this function, because we are interested only in \n and \\.
1050	++buf.position();
1051	continue;
1052	}
1053	}
1054	}
1055
1056	void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current)
1057	{
1058	assert(current >= in.position());
1059	assert(current <= in.buffer().end());
1060
1061	const int old_bytes = memory.size();
1062	const int additional_bytes = current - in.position();
1063	const int new_bytes = old_bytes + additional_bytes;
1064	/// There are no new bytes to add to memory.
1065	/// No need to do extra stuff.
1066	if (new_bytes == `0`)
1067	return;
1068	memory.resize(new_bytes);
1069	memcpy(memory.data() + old_bytes, in.position(), additional_bytes);
1070	in.position() = current;
1071	}
1072
1073	bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current)
1074	{
1075	assert(current <= in.buffer().end());
1076
1077	if (current < in.buffer().end())
1078	return true;
1079
1080	saveUpToPosition(in, memory, current);
1081	bool loaded_more = !in.eof();
1082	assert(in.position() == in.buffer().begin());
1083	current = in.position();
1084	return loaded_more;
1085	}
1086
1087	}
1088

Browse the source code of ClickHouse/dbms/src/IO/ReadHelpers.cpp