json_stream_parser.cc source code [Velox/build/_deps/protobuf-src/src/google/protobuf/util/internal/json_stream_parser.cc]

1	// Protocol Buffers - Google's data interchange format
2	// Copyright 2008 Google Inc. All rights reserved.
3	// https://developers.google.com/protocol-buffers/
4	//
5	// Redistribution and use in source and binary forms, with or without
6	// modification, are permitted provided that the following conditions are
7	// met:
8	//
9	// Redistributions of source code must retain the above copyright*
10	// notice, this list of conditions and the following disclaimer.
11	// Redistributions in binary form must reproduce the above*
12	// copyright notice, this list of conditions and the following disclaimer
13	// in the documentation and/or other materials provided with the
14	// distribution.
15	// Neither the name of Google Inc. nor the names of its*
16	// contributors may be used to endorse or promote products derived from
17	// this software without specific prior written permission.
18	//
19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31	#include <google/protobuf/util/internal/json_stream_parser.h>
32
33	#include <algorithm>
34	#include <cctype>
35	#include <cmath>
36	#include <memory>
37	#include <stack>
38	#include <string>
39
40	#include <google/protobuf/stubs/common.h>
41	#include <google/protobuf/stubs/logging.h>
42	#include <google/protobuf/stubs/strutil.h>
43	#include <google/protobuf/stubs/status.h>
44	#include <google/protobuf/util/internal/object_writer.h>
45	#include <google/protobuf/util/internal/json_escaping.h>
46
47
48	namespace google {
49	namespace protobuf {
50	namespace util {
51
52	namespace converter {
53
54	// Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X)
55	static const int kUnicodeEscapedLength = `6`;
56
57	static const int kDefaultMaxRecursionDepth = `100`;
58
59	// These cannot be constexpr for portability with VS2015.
60	static const StringPiece kKeywordTrue = "true";
61	static const StringPiece kKeywordFalse = "false";
62	static const StringPiece kKeywordNull = "null";
63
64	inline bool IsLetter(char c) {
65	return (`'a'` <= c && c <= `'z'`) \|\| (`'A'` <= c && c <= `'Z'`) \|\| (c == `'_'`) \|\|
66	(c == `'$'`);
67	}
68
69	inline bool IsAlphanumeric(char c) {
70	return IsLetter(c) \|\| (`'0'` <= c && c <= `'9'`);
71	}
72
73	// Indicates a character may not be part of an unquoted key.
74	inline bool IsKeySeparator(char c) {
75	return (ascii_isspace(c) \|\| c == `'"'` \|\| c == `'\''` \|\| c == `'{'` \|\|
76	c == `'}'` \|\| c == `'['` \|\| c == `']'` \|\| c == `':'` \|\| c == `','`);
77	}
78
79	inline void ReplaceInvalidCodePoints(StringPiece str,
80	const std::string& replacement,
81	std::string* dst) {
82	while (!str.empty()) {
83	int n_valid_bytes = internal::UTF8SpnStructurallyValid(str);
84	StringPiece valid_part = str.substr(pos: `0`, n: n_valid_bytes);
85	StrAppend(dest: dst, a: valid_part);
86
87	if (n_valid_bytes == str.size()) {
88	break;
89	}
90
91	// Append replacement value.
92	StrAppend(dest: dst, a: replacement);
93
94	// Move past valid bytes + one invalid byte.
95	str.remove_prefix(n: n_valid_bytes + `1`);
96	}
97	}
98
99	static bool ConsumeKey(StringPiece* input, StringPiece* key) {
100	if (input->empty() \|\| !IsLetter(c: (input)[`0`])) return* false;
101	int len = `1`;
102	for (; len < input->size(); ++len) {
103	if (!IsAlphanumeric(c: (*input)[len])) {
104	break;
105	}
106	}
107	*key = StringPiece(input->data(), len);
108	*input = StringPiece(input->data() + len, input->size() - len);
109	return true;
110	}
111
112	// Same as 'ConsumeKey', but allows a widened set of key characters.
113	static bool ConsumeKeyPermissive(StringPiece* input,
114	StringPiece* key) {
115	if (input->empty() \|\| !IsLetter(c: (input)[`0`])) return* false;
116	int len = `1`;
117	for (; len < input->size(); ++len) {
118	if (IsKeySeparator(c: (*input)[len])) {
119	break;
120	}
121	}
122	*key = StringPiece(input->data(), len);
123	*input = StringPiece(input->data() + len, input->size() - len);
124	return true;
125	}
126
127	static bool MatchKey(StringPiece input) {
128	return !input.empty() && IsLetter(c: input [`0`]);
129	}
130
131	JsonStreamParser::JsonStreamParser(ObjectWriter* ow)
132	: ow_(ow),
133	stack_(),
134	leftover_(),
135	json_(),
136	p_(),
137	key_(),
138	key_storage_(),
139	finishing_(false),
140	seen_non_whitespace_(false),
141	allow_no_root_element_(false),
142	parsed_(),
143	parsed_storage_(),
144	string_open_(`0`),
145	chunk_storage_(),
146	coerce_to_utf8_(false),
147	utf8_replacement_character_(" "),
148	allow_empty_null_(false),
149	allow_permissive_key_naming_(false),
150	loose_float_number_conversion_(false),
151	recursion_depth_(`0`),
152	max_recursion_depth_(kDefaultMaxRecursionDepth) {
153	// Initialize the stack with a single value to be parsed.
154	stack_.push(x: VALUE);
155	}
156
157	JsonStreamParser::~JsonStreamParser() {}
158
159
160	util::Status JsonStreamParser::Parse(StringPiece json) {
161	StringPiece chunk = json;
162	// If we have leftovers from a previous chunk, append the new chunk to it
163	// and create a new StringPiece pointing at the string's data. This could
164	// be large but we rely on the chunks to be small, assuming they are
165	// fragments of a Cord.
166	if (!leftover_.empty()) {
167	// Don't point chunk to leftover_ because leftover_ will be updated in
168	// ParseChunk(chunk).
169	chunk_storage_.swap(s&: leftover_);
170	StrAppend(dest: &chunk_storage_, a: json);
171	chunk = StringPiece(chunk_storage_);
172	}
173
174	// Find the structurally valid UTF8 prefix and parse only that.
175	int n = internal::UTF8SpnStructurallyValid(str: chunk);
176	if (n > `0`) {
177	util::Status status = ParseChunk(chunk: chunk.substr(pos: `0`, n));
178
179	// Any leftover characters are stashed in leftover_ for later parsing when
180	// there is more data available.
181	StrAppend(dest: &leftover_, a: chunk.substr(pos: n));
182	return status;
183	} else {
184	leftover_.assign(s: chunk.data(), n: chunk.size());
185	return util::Status();
186	}
187	}
188
189	util::Status JsonStreamParser::FinishParse() {
190	// If we do not expect anything and there is nothing left to parse we're all
191	// done.
192	if (stack_.empty() && leftover_.empty()) {
193	return util::Status();
194	}
195
196	// Lifetime needs to last until RunParser returns, so keep this variable
197	// outside of the coerce_to_utf8 block.
198	std::unique_ptr<std::string> scratch;
199
200	bool is_valid_utf8 = internal::IsStructurallyValidUTF8(str: leftover_);
201	if (coerce_to_utf8_ && !is_valid_utf8) {
202	scratch.reset(p: new std::string);
203	scratch ->reserve(res_arg: leftover_.size() * utf8_replacement_character_.size());
204	ReplaceInvalidCodePoints(str: leftover_, replacement: utf8_replacement_character_,
205	dst: scratch.get());
206	p_ = json_ = *scratch;
207	} else {
208	p_ = json_ = leftover_;
209	if (!is_valid_utf8) {
210	return ReportFailure(message: "Encountered non UTF-8 code points.",
211	parse_code: ParseErrorType::NON_UTF_8);
212	}
213	}
214
215	// Parse the remainder in finishing mode, which reports errors for things like
216	// unterminated strings or unknown tokens that would normally be retried.
217	finishing_ = true;
218	util::Status result = RunParser();
219	if (result.ok()) {
220	SkipWhitespace();
221	if (!p_.empty()) {
222	result =
223	ReportFailure(message: "Parsing terminated before end of input.",
224	parse_code: ParseErrorType::PARSING_TERMINATED_BEFORE_END_OF_INPUT);
225	}
226	}
227	return result;
228	}
229
230	util::Status JsonStreamParser::ParseChunk(StringPiece chunk) {
231	// Do not do any work if the chunk is empty.
232	if (chunk.empty()) return util::Status();
233
234	p_ = json_ = chunk;
235
236	finishing_ = false;
237	util::Status result = RunParser();
238	if (!result.ok()) return result;
239
240	SkipWhitespace();
241	if (p_.empty()) {
242	// If we parsed everything we had, clear the leftover.
243	leftover_.clear();
244	} else {
245	// If we do not expect anything i.e. stack is empty, and we have non-empty
246	// string left to parse, we report an error.
247	if (stack_.empty()) {
248	return ReportFailure(
249	message: "Parsing terminated before end of input.",
250	parse_code: ParseErrorType::PARSING_TERMINATED_BEFORE_END_OF_INPUT);
251	}
252	// If we expect future data i.e. stack is non-empty, and we have some
253	// unparsed data left, we save it for later parse.
254	leftover_ = std::string (p_);
255	}
256	return util::Status();
257	}
258
259	bool JsonStreamParser::IsInputAllWhiteSpaces(TokenType type) {
260	// Conclude the whole input is full of white spaces by:
261	// - it is at the finishing stage
262	// - we have run out of the input data
263	// - haven't seen non-whitespace char so far
264	if (finishing_ && p_.empty() && type == UNKNOWN && !seen_non_whitespace_) {
265	return true;
266	}
267	return false;
268	}
269
270	util::Status JsonStreamParser::RunParser() {
271	while (!stack_.empty()) {
272	ParseType type = stack_.top();
273	TokenType t = (string_open_ == `0`) ? GetNextTokenType() : BEGIN_STRING;
274	stack_.pop();
275	util::Status result;
276	switch (type) {
277	case VALUE:
278	if (allow_no_root_element_ && IsInputAllWhiteSpaces(type: t)) {
279	return util::Status();
280	}
281	result = ParseValue(type: t);
282	break;
283
284	case OBJ_MID:
285	result = ParseObjectMid(type: t);
286	break;
287
288	case ENTRY:
289	result = ParseEntry(type: t);
290	break;
291
292	case ENTRY_MID:
293	result = ParseEntryMid(type: t);
294	break;
295
296	case ARRAY_VALUE:
297	result = ParseArrayValue(type: t);
298	break;
299
300	case ARRAY_MID:
301	result = ParseArrayMid(type: t);
302	break;
303
304	default:
305	result =
306	util::InternalError(message: StrCat(a: "Unknown parse type: ", b: type));
307	break;
308	}
309	if (!result.ok()) {
310	// If we were cancelled, save our state and try again later.
311	if (!finishing_ && util::IsCancelled(status: result)) {
312	stack_.push(x: type);
313	// If we have a key we still need to render, make sure to save off the
314	// contents in our own storage.
315	if (!key_.empty() && key_storage_.empty()) {
316	StrAppend(dest: &key_storage_, a: key_);
317	key_ = StringPiece(key_storage_);
318	}
319	result = util::Status();
320	}
321	return result;
322	}
323	}
324	return util::Status();
325	}
326
327	util::Status JsonStreamParser::ParseValue(TokenType type) {
328	switch (type) {
329	case BEGIN_OBJECT:
330	return HandleBeginObject();
331	case BEGIN_ARRAY:
332	return HandleBeginArray();
333	case BEGIN_STRING:
334	return ParseString();
335	case BEGIN_NUMBER:
336	return ParseNumber();
337	case BEGIN_TRUE:
338	return ParseTrue();
339	case BEGIN_FALSE:
340	return ParseFalse();
341	case BEGIN_NULL:
342	return ParseNull();
343	case UNKNOWN:
344	return ReportUnknown(message: "Expected a value.", parse_code: ParseErrorType::EXPECTED_VALUE);
345	default: {
346	// Special case for having been cut off while parsing, wait for more data.
347	// This handles things like 'fals' being at the end of the string, we
348	// don't know if the next char would be e, completing it, or something
349	// else, making it invalid.
350	if (!finishing_ && p_.length() < kKeywordFalse.length()) {
351	return util::CancelledError(message: "");
352	}
353
354	if (allow_empty_null_ && IsEmptyNullAllowed(type)) {
355	return ParseEmptyNull();
356	}
357	return ReportFailure(message: "Unexpected token.",
358	parse_code: ParseErrorType::UNEXPECTED_TOKEN);
359	}
360	}
361	}
362
363	util::Status JsonStreamParser::ParseString() {
364	util::Status result = ParseStringHelper();
365	if (result.ok()) {
366	ow_->RenderString(name: key_, value: parsed_);
367	key_ = StringPiece();
368	parsed_ = StringPiece();
369	parsed_storage_.clear();
370	}
371	return result;
372	}
373
374	util::Status JsonStreamParser::ParseStringHelper() {
375	// If we haven't seen the start quote, grab it and remember it for later.
376	if (string_open_ == `0`) {
377	string_open_ = *p_.data();
378	GOOGLE_DCHECK(string_open_ == `'\"'` \|\| string_open_ == `'\''`);
379	Advance();
380	}
381	// Track where we last copied data from so we can minimize copying.
382	const char* last = p_.data();
383	while (!p_.empty()) {
384	const char* data = p_.data();
385	if (*data == `'\\'`) {
386	// We're about to handle an escape, copy all bytes from last to data.
387	if (last < data) {
388	parsed_storage_.append(s: last, n: data - last);
389	}
390	// If we ran out of string after the \, cancel or report an error
391	// depending on if we expect more data later.
392	if (p_.length() == `1`) {
393	if (!finishing_) {
394	return util::CancelledError(message: "");
395	}
396	return ReportFailure(message: "Closing quote expected in string.",
397	parse_code: ParseErrorType::EXPECTED_CLOSING_QUOTE);
398	}
399	// Parse a unicode escape if we found \u in the string.
400	if (data[`1`] == `'u'`) {
401	util::Status result = ParseUnicodeEscape();
402	if (!result.ok()) {
403	return result;
404	}
405	// Move last pointer past the unicode escape and continue.
406	last = p_.data();
407	continue;
408	}
409	// Handle the standard set of backslash-escaped characters.
410	switch (data[`1`]) {
411	case `'b'`:
412	parsed_storage_.push_back(c: `'\b'`);
413	break;
414	case `'f'`:
415	parsed_storage_.push_back(c: `'\f'`);
416	break;
417	case `'n'`:
418	parsed_storage_.push_back(c: `'\n'`);
419	break;
420	case `'r'`:
421	parsed_storage_.push_back(c: `'\r'`);
422	break;
423	case `'t'`:
424	parsed_storage_.push_back(c: `'\t'`);
425	break;
426	case `'v'`:
427	parsed_storage_.push_back(c: `'\v'`);
428	break;
429	default:
430	parsed_storage_.push_back(c: data[`1`]);
431	}
432	// We handled two characters, so advance past them and continue.
433	p_.remove_prefix(n: `2`);
434	last = p_.data();
435	continue;
436	}
437	// If we found the closing quote note it, advance past it, and return.
438	if (*data == string_open_) {
439	// If we didn't copy anything, reuse the input buffer.
440	if (parsed_storage_.empty()) {
441	parsed_ = StringPiece(last, data - last);
442	} else {
443	if (last < data) {
444	parsed_storage_.append(s: last, n: data - last);
445	}
446	parsed_ = StringPiece(parsed_storage_);
447	}
448	// Clear the quote char so next time we try to parse a string we'll
449	// start fresh.
450	string_open_ = `0`;
451	Advance();
452	return util::Status();
453	}
454	// Normal character, just advance past it.
455	Advance();
456	}
457	// If we ran out of characters, copy over what we have so far.
458	if (last < p_.data()) {
459	parsed_storage_.append(s: last, n: p_.data() - last);
460	}
461	// If we didn't find the closing quote but we expect more data, cancel for now
462	if (!finishing_) {
463	return util::CancelledError(message: "");
464	}
465	// End of string reached without a closing quote, report an error.
466	string_open_ = `0`;
467	return ReportFailure(message: "Closing quote expected in string.",
468	parse_code: ParseErrorType::EXPECTED_CLOSING_QUOTE);
469	}
470
471	// Converts a unicode escaped character to a decimal value stored in a char32
472	// for use in UTF8 encoding utility. We assume that str begins with \uhhhh and
473	// convert that from the hex number to a decimal value.
474	//
475	// There are some security exploits with UTF-8 that we should be careful of:
476	// - http://www.unicode.org/reports/tr36/#UTF-8_Exploit
477	// - http://sites/intl-eng/design-guide/core-application
478	util::Status JsonStreamParser::ParseUnicodeEscape() {
479	if (p_.length() < kUnicodeEscapedLength) {
480	if (!finishing_) {
481	return util::CancelledError(message: "");
482	}
483	return ReportFailure(message: "Illegal hex string.",
484	parse_code: ParseErrorType::ILLEGAL_HEX_STRING);
485	}
486	GOOGLE_DCHECK_EQ(`'\\'`, p_.data()[`0`]);
487	GOOGLE_DCHECK_EQ(`'u'`, p_.data()[`1`]);
488	uint32_t code = `0`;
489	for (int i = `2`; i < kUnicodeEscapedLength; ++i) {
490	if (!isxdigit(p_.data()[i])) {
491	return ReportFailure(message: "Invalid escape sequence.",
492	parse_code: ParseErrorType::INVALID_ESCAPE_SEQUENCE);
493	}
494	code = (code << `4`) + hex_digit_to_int(c: p_.data()[i]);
495	}
496	if (code >= JsonEscaping::kMinHighSurrogate &&
497	code <= JsonEscaping::kMaxHighSurrogate) {
498	if (p_.length() < `2` * kUnicodeEscapedLength) {
499	if (!finishing_) {
500	return util::CancelledError(message: "");
501	}
502	if (!coerce_to_utf8_) {
503	return ReportFailure(message: "Missing low surrogate.",
504	parse_code: ParseErrorType::MISSING_LOW_SURROGATE);
505	}
506	} else if (p_.data()[kUnicodeEscapedLength] == `'\\'` &&
507	p_.data()[kUnicodeEscapedLength + `1`] == `'u'`) {
508	uint32_t low_code = `0`;
509	for (int i = kUnicodeEscapedLength + `2`; i < `2` * kUnicodeEscapedLength;
510	++i) {
511	if (!isxdigit(p_.data()[i])) {
512	return ReportFailure(message: "Invalid escape sequence.",
513	parse_code: ParseErrorType::INVALID_ESCAPE_SEQUENCE);
514	}
515	low_code = (low_code << `4`) + hex_digit_to_int(c: p_.data()[i]);
516	}
517	if (low_code >= JsonEscaping::kMinLowSurrogate &&
518	low_code <= JsonEscaping::kMaxLowSurrogate) {
519	// Convert UTF-16 surrogate pair to 21-bit Unicode codepoint.
520	code = (((code & `0x3FF`) << `10`) \| (low_code & `0x3FF`)) +
521	JsonEscaping::kMinSupplementaryCodePoint;
522	// Advance past the first code unit escape.
523	p_.remove_prefix(n: kUnicodeEscapedLength);
524	} else if (!coerce_to_utf8_) {
525	return ReportFailure(message: "Invalid low surrogate.",
526	parse_code: ParseErrorType::INVALID_LOW_SURROGATE);
527	}
528	} else if (!coerce_to_utf8_) {
529	return ReportFailure(message: "Missing low surrogate.",
530	parse_code: ParseErrorType::MISSING_LOW_SURROGATE);
531	}
532	}
533	if (!coerce_to_utf8_ && !IsValidCodePoint(code_point: code)) {
534	return ReportFailure(message: "Invalid unicode code point.",
535	parse_code: ParseErrorType::INVALID_UNICODE);
536	}
537	char buf[UTFmax];
538	int len = EncodeAsUTF8Char(code_point: code, output: buf);
539	// Advance past the [final] code unit escape.
540	p_.remove_prefix(n: kUnicodeEscapedLength);
541	parsed_storage_.append(s: buf, n: len);
542	return util::Status();
543	}
544
545	util::Status JsonStreamParser::ParseNumber() {
546	NumberResult number;
547	util::Status result = ParseNumberHelper(result: &number);
548	if (result.ok()) {
549	switch (number.type) {
550	case NumberResult::DOUBLE:
551	ow_->RenderDouble(name: key_, value: number.double_val);
552	key_ = StringPiece();
553	break;
554
555	case NumberResult::INT:
556	ow_->RenderInt64(name: key_, value: number.int_val);
557	key_ = StringPiece();
558	break;
559
560	case NumberResult::UINT:
561	ow_->RenderUint64(name: key_, value: number.uint_val);
562	key_ = StringPiece();
563	break;
564
565	default:
566	return ReportFailure(message: "Unable to parse number.",
567	parse_code: ParseErrorType::UNABLE_TO_PARSE_NUMBER);
568	}
569	}
570	return result;
571	}
572
573	util::Status JsonStreamParser::ParseDoubleHelper(const std::string& number,
574	NumberResult* result) {
575	if (!safe_strtod(str: number, value: &result->double_val)) {
576	return ReportFailure(message: "Unable to parse number.",
577	parse_code: ParseErrorType::UNABLE_TO_PARSE_NUMBER);
578	}
579	if (!loose_float_number_conversion_ && !std::isfinite(x: result->double_val)) {
580	return ReportFailure(message: "Number exceeds the range of double.",
581	parse_code: ParseErrorType::NUMBER_EXCEEDS_RANGE_DOUBLE);
582	}
583	result->type = NumberResult::DOUBLE;
584	return util::Status();
585	}
586
587	util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
588	const char* data = p_.data();
589	int length = p_.length();
590
591	// Look for the first non-numeric character, or the end of the string.
592	int index = `0`;
593	bool floating = false;
594	bool negative = data[index] == `'-'`;
595	// Find the first character that cannot be part of the number. Along the way
596	// detect if the number needs to be parsed as a double.
597	// Note that this restricts numbers to the JSON specification, so for example
598	// we do not support hex or octal notations.
599	for (; index < length; ++index) {
600	char c = data[index];
601	if (isdigit(c)) continue;
602	if (c == `'.'` \|\| c == `'e'` \|\| c == `'E'`) {
603	floating = true;
604	continue;
605	}
606	if (c == `'+'` \|\| c == `'-'` \|\| c == `'x'`) continue;
607	// Not a valid number character, break out.
608	break;
609	}
610
611	// If the entire input is a valid number, and we may have more content in the
612	// future, we abort for now and resume when we know more.
613	if (index == length && !finishing_) {
614	return util::CancelledError(message: "");
615	}
616
617	// Create a string containing just the number, so we can use safe_strtoX
618	std::string number = std::string (p_.substr(pos: `0`, n: index));
619
620	// Floating point number, parse as a double.
621	if (floating) {
622	util::Status status = ParseDoubleHelper(number, result);
623	if (status.ok()) {
624	p_.remove_prefix(n: index);
625	}
626	return status;
627	}
628
629	// Positive non-floating point number, parse as a uint64_t.
630	if (!negative) {
631	// Octal/Hex numbers are not valid JSON values.
632	if (number.length() >= `2` && number [`0`] == `'0'`) {
633	return ReportFailure(
634	message: "Octal/hex numbers are not valid JSON values.",
635	parse_code: ParseErrorType::OCTAL_OR_HEX_ARE_NOT_VALID_JSON_VALUES);
636	}
637	if (safe_strtou64(str: number, value: &result->uint_val)) {
638	result->type = NumberResult::UINT;
639	p_.remove_prefix(n: index);
640	return util::Status();
641	} else {
642	// If the value is too large, parse it as double.
643	util::Status status = ParseDoubleHelper(number, result);
644	if (status.ok()) {
645	p_.remove_prefix(n: index);
646	}
647	return status;
648	}
649	}
650
651	// Octal/Hex numbers are not valid JSON values.
652	if (number.length() >= `3` && number [`1`] == `'0'`) {
653	return ReportFailure(
654	message: "Octal/hex numbers are not valid JSON values.",
655	parse_code: ParseErrorType::OCTAL_OR_HEX_ARE_NOT_VALID_JSON_VALUES);
656	}
657	// Negative non-floating point number, parse as an int64_t.
658	if (safe_strto64(str: number, value: &result->int_val)) {
659	result->type = NumberResult::INT;
660	p_.remove_prefix(n: index);
661	return util::Status();
662	} else {
663	// If the value is too large, parse it as double.
664	util::Status status = ParseDoubleHelper(number, result);
665	if (status.ok()) {
666	p_.remove_prefix(n: index);
667	}
668	return status;
669	}
670	}
671
672	util::Status JsonStreamParser::HandleBeginObject() {
673	GOOGLE_DCHECK_EQ(`'{'`, *p_.data());
674	Advance();
675	ow_->StartObject(name: key_);
676	auto status = IncrementRecursionDepth(key: key_);
677	if (!status.ok()) {
678	return status;
679	}
680	key_ = StringPiece();
681	stack_.push(x: ENTRY);
682	return util::Status();
683	}
684
685	util::Status JsonStreamParser::ParseObjectMid(TokenType type) {
686	if (type == UNKNOWN) {
687	return ReportUnknown(message: "Expected , or } after key:value pair.",
688	parse_code: ParseErrorType::EXPECTED_COMMA_OR_BRACES);
689	}
690
691	// Object is complete, advance past the comma and render the EndObject.
692	if (type == END_OBJECT) {
693	Advance();
694	ow_->EndObject();
695	--recursion_depth_;
696	return util::Status();
697	}
698	// Found a comma, advance past it and get ready for an entry.
699	if (type == VALUE_SEPARATOR) {
700	Advance();
701	stack_.push(x: ENTRY);
702	return util::Status();
703	}
704	// Illegal token after key:value pair.
705	return ReportFailure(message: "Expected , or } after key:value pair.",
706	parse_code: ParseErrorType::EXPECTED_COMMA_OR_BRACES);
707	}
708
709	util::Status JsonStreamParser::ParseEntry(TokenType type) {
710	if (type == UNKNOWN) {
711	return ReportUnknown(message: "Expected an object key or }.",
712	parse_code: ParseErrorType::EXPECTED_OBJECT_KEY_OR_BRACES);
713	}
714
715	// Close the object and return. This allows for trailing commas.
716	if (type == END_OBJECT) {
717	ow_->EndObject();
718	Advance();
719	--recursion_depth_;
720	return util::Status();
721	}
722
723	util::Status result;
724	if (type == BEGIN_STRING) {
725	// Key is a string (standard JSON), parse it and store the string.
726	result = ParseStringHelper();
727	if (result.ok()) {
728	key_storage_.clear();
729	if (!parsed_storage_.empty()) {
730	parsed_storage_.swap(s&: key_storage_);
731	key_ = StringPiece(key_storage_);
732	} else {
733	key_ = parsed_;
734	}
735	parsed_ = StringPiece();
736	}
737	} else if (type == BEGIN_KEY) {
738	// Key is a bare key (back compat), create a StringPiece pointing to it.
739	result = ParseKey();
740	} else if (type == BEGIN_NULL \|\| type == BEGIN_TRUE \|\| type == BEGIN_FALSE) {
741	// Key may be a bare key that begins with a reserved word.
742	result = ParseKey();
743	if (result.ok() && (key_ == kKeywordNull \|\| key_ == kKeywordTrue \|\|
744	key_ == kKeywordFalse)) {
745	result = ReportFailure(message: "Expected an object key or }.",
746	parse_code: ParseErrorType::EXPECTED_OBJECT_KEY_OR_BRACES);
747	}
748	} else {
749	// Unknown key type, report an error.
750	result = ReportFailure(message: "Expected an object key or }.",
751	parse_code: ParseErrorType::EXPECTED_OBJECT_KEY_OR_BRACES);
752	}
753	// On success we next expect an entry mid ':' then an object mid ',' or '}'
754	if (result.ok()) {
755	stack_.push(x: OBJ_MID);
756	stack_.push(x: ENTRY_MID);
757	}
758	return result;
759	}
760
761	util::Status JsonStreamParser::ParseEntryMid(TokenType type) {
762	if (type == UNKNOWN) {
763	return ReportUnknown(message: "Expected : between key:value pair.",
764	parse_code: ParseErrorType::EXPECTED_COLON);
765	}
766	if (type == ENTRY_SEPARATOR) {
767	Advance();
768	stack_.push(x: VALUE);
769	return util::Status();
770	}
771	return ReportFailure(message: "Expected : between key:value pair.",
772	parse_code: ParseErrorType::EXPECTED_COLON);
773	}
774
775	util::Status JsonStreamParser::HandleBeginArray() {
776	GOOGLE_DCHECK_EQ(`'['`, *p_.data());
777	Advance();
778	ow_->StartList(name: key_);
779	key_ = StringPiece();
780	stack_.push(x: ARRAY_VALUE);
781	return util::Status();
782	}
783
784	util::Status JsonStreamParser::ParseArrayValue(TokenType type) {
785	if (type == UNKNOWN) {
786	return ReportUnknown(message: "Expected a value or ] within an array.",
787	parse_code: ParseErrorType::EXPECTED_VALUE_OR_BRACKET);
788	}
789
790	if (type == END_ARRAY) {
791	ow_->EndList();
792	Advance();
793	return util::Status();
794	}
795
796	// The ParseValue call may push something onto the stack so we need to make
797	// sure an ARRAY_MID is after it, so we push it on now. Also, the parsing of
798	// empty-null array value is relying on this ARRAY_MID token.
799	stack_.push(x: ARRAY_MID);
800	util::Status result = ParseValue(type);
801	if (util::IsCancelled(status: result)) {
802	// If we were cancelled, pop back off the ARRAY_MID so we don't try to
803	// push it on again when we try over.
804	stack_.pop();
805	}
806	return result;
807	}
808
809	util::Status JsonStreamParser::ParseArrayMid(TokenType type) {
810	if (type == UNKNOWN) {
811	return ReportUnknown(message: "Expected , or ] after array value.",
812	parse_code: ParseErrorType::EXPECTED_COMMA_OR_BRACKET);
813	}
814
815	if (type == END_ARRAY) {
816	ow_->EndList();
817	Advance();
818	return util::Status();
819	}
820
821	// Found a comma, advance past it and expect an array value next.
822	if (type == VALUE_SEPARATOR) {
823	Advance();
824	stack_.push(x: ARRAY_VALUE);
825	return util::Status();
826	}
827	// Illegal token after array value.
828	return ReportFailure(message: "Expected , or ] after array value.",
829	parse_code: ParseErrorType::EXPECTED_COMMA_OR_BRACKET);
830	}
831
832	util::Status JsonStreamParser::ParseTrue() {
833	ow_->RenderBool(name: key_, value: true);
834	key_ = StringPiece();
835	p_.remove_prefix(n: kKeywordTrue.length());
836	return util::Status();
837	}
838
839	util::Status JsonStreamParser::ParseFalse() {
840	ow_->RenderBool(name: key_, value: false);
841	key_ = StringPiece();
842	p_.remove_prefix(n: kKeywordFalse.length());
843	return util::Status();
844	}
845
846	util::Status JsonStreamParser::ParseNull() {
847	ow_->RenderNull(name: key_);
848	key_ = StringPiece();
849	p_.remove_prefix(n: kKeywordNull.length());
850	return util::Status();
851	}
852
853	util::Status JsonStreamParser::ParseEmptyNull() {
854	ow_->RenderNull(name: key_);
855	key_ = StringPiece();
856	return util::Status();
857	}
858
859	bool JsonStreamParser::IsEmptyNullAllowed(TokenType type) {
860	if (stack_.empty()) return false;
861	return (stack_.top() == ARRAY_MID && type == VALUE_SEPARATOR) \|\|
862	stack_.top() == OBJ_MID;
863	}
864
865	util::Status JsonStreamParser::ReportFailure(StringPiece message,
866	ParseErrorType parse_code) {
867	(void)parse_code; // Parameter is used in Google-internal code.
868	static const int kContextLength = `20`;
869	const char* p_start = p_.data();
870	const char* json_start = json_.data();
871	const char* begin = std::max(p_start - kContextLength, json_start);
872	const char* end =
873	std::min(p_start + kContextLength, json_start + json_.size());
874	StringPiece segment(begin, end - begin);
875	std::string location(p_start - begin, `' '`);
876	location.push_back(c: `'^'`);
877	auto status = util::InvalidArgumentError(
878	message: StrCat(a: message, b: "\n", c: segment, d: "\n", e: location));
879	return status;
880	}
881
882	util::Status JsonStreamParser::ReportUnknown(StringPiece message,
883	ParseErrorType parse_code) {
884	// If we aren't finishing the parse, cancel parsing and try later.
885	if (!finishing_) {
886	return util::CancelledError(message: "");
887	}
888	if (p_.empty()) {
889	return ReportFailure(message: StrCat(a: "Unexpected end of string. ", b: message),
890	parse_code);
891	}
892	return ReportFailure(message, parse_code);
893	}
894
895	util::Status JsonStreamParser::IncrementRecursionDepth(
896	StringPiece key) const {
897	if (++recursion_depth_ > max_recursion_depth_) {
898	return util::InvalidArgumentError(message: StrCat(
899	a: "Message too deep. Max recursion depth reached for key '", b: key, c: "'"));
900	}
901	return util::Status();
902	}
903
904	void JsonStreamParser::SkipWhitespace() {
905	while (!p_.empty() && ascii_isspace(c: *p_.data())) {
906	Advance();
907	}
908	if (!p_.empty() && !ascii_isspace(c: *p_.data())) {
909	seen_non_whitespace_ = true;
910	}
911	}
912
913	void JsonStreamParser::Advance() {
914	// Advance by moving one UTF8 character while making sure we don't go beyond
915	// the length of StringPiece.
916	p_.remove_prefix(n: std::min<int>(
917	p_.length(), UTF8FirstLetterNumBytes(src: p_.data(), len: p_.length())));
918	}
919
920	util::Status JsonStreamParser::ParseKey() {
921	StringPiece original = p_;
922
923	if (allow_permissive_key_naming_) {
924	if (!ConsumeKeyPermissive(input: &p_, key: &key_)) {
925	return ReportFailure(message: "Invalid key or variable name.",
926	parse_code: ParseErrorType::INVALID_KEY_OR_VARIABLE_NAME);
927	}
928	} else {
929	if (!ConsumeKey(input: &p_, key: &key_)) {
930	return ReportFailure(message: "Invalid key or variable name.",
931	parse_code: ParseErrorType::INVALID_KEY_OR_VARIABLE_NAME);
932	}
933	}
934
935	// If we consumed everything but expect more data, reset p_ and cancel since
936	// we can't know if the key was complete or not.
937	if (!finishing_ && p_.empty()) {
938	p_ = original;
939	return util::CancelledError(message: "");
940	}
941	// Since we aren't using the key storage, clear it out.
942	key_storage_.clear();
943	return util::Status();
944	}
945
946	JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() {
947	SkipWhitespace();
948
949	int size = p_.size();
950	if (size == `0`) {
951	// If we ran out of data, report unknown and we'll place the previous parse
952	// type onto the stack and try again when we have more data.
953	return UNKNOWN;
954	}
955	// TODO(sven): Split this method based on context since different contexts
956	// support different tokens. Would slightly speed up processing?
957	const char* data = p_.data();
958	StringPiece data_view = StringPiece(data, size);
959	if (data == `'\"'` \|\| data == `'\''`) return BEGIN_STRING;
960	if (data == `'-'` \|\| (`'0'` <= data && *data <= `'9'`)) {
961	return BEGIN_NUMBER;
962	}
963	if (size >= kKeywordTrue.length() &&
964	HasPrefixString(str: data_view, prefix: kKeywordTrue)) {
965	return BEGIN_TRUE;
966	}
967	if (size >= kKeywordFalse.length() &&
968	HasPrefixString(str: data_view, prefix: kKeywordFalse)) {
969	return BEGIN_FALSE;
970	}
971	if (size >= kKeywordNull.length() &&
972	HasPrefixString(str: data_view, prefix: kKeywordNull)) {
973	return BEGIN_NULL;
974	}
975	if (data == `'{'`) return* BEGIN_OBJECT;
976	if (data == `'}'`) return* END_OBJECT;
977	if (data == `'['`) return* BEGIN_ARRAY;
978	if (data == `']'`) return* END_ARRAY;
979	if (data == `':'`) return* ENTRY_SEPARATOR;
980	if (data == `','`) return* VALUE_SEPARATOR;
981	if (MatchKey(input: p_)) {
982	return BEGIN_KEY;
983	}
984
985	// We don't know that we necessarily have an invalid token here, just that we
986	// can't parse what we have so far. So we don't report an error and just
987	// return UNKNOWN so we can try again later when we have more data, or if we
988	// finish and we have leftovers.
989	return UNKNOWN;
990	}
991
992	} // namespace converter
993	} // namespace util
994	} // namespace protobuf
995	} // namespace google
996

Browse the source code of Velox/build/_deps/protobuf-src/src/google/protobuf/util/internal/json_stream_parser.cc