json.cpp source code [folly/json.cpp]

1	/*
2	* Copyright 2011-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#include <folly/json.h>
17
18	#include <algorithm>
19	#include <functional>
20	#include <iterator>
21	#include <type_traits>
22
23	#include <boost/algorithm/string.hpp>
24
25	#include <folly/Conv.h>
26	#include <folly/Portability.h>
27	#include <folly/Range.h>
28	#include <folly/String.h>
29	#include <folly/Unicode.h>
30	#include <folly/lang/Bits.h>
31	#include <folly/portability/Constexpr.h>
32
33	namespace folly {
34
35	//////////////////////////////////////////////////////////////////////
36
37	namespace json {
38	namespace {
39
40	struct Printer {
41	explicit Printer(
42	std::string& out,
43	unsigned* indentLevel,
44	serialization_opts const* opts)
45	: out_(out), indentLevel_(indentLevel), opts_(*opts) {}
46
47	void operator()(dynamic const& v) const {
48	switch (v.type()) {
49	case dynamic::DOUBLE:
50	if (!opts_.allow_nan_inf &&
51	(std::isnan(v.asDouble()) \|\| std::isinf(v.asDouble()))) {
52	throw std::runtime_error (
53	"folly::toJson: JSON object value was a "
54	"NaN or INF");
55	}
56	toAppend(
57	v.asDouble(), &out_, opts_.double_mode, opts_.double_num_digits);
58	break;
59	case dynamic::INT64: {
60	auto intval = v.asInt();
61	if (opts_.javascript_safe) {
62	// Use folly::to to check that this integer can be represented
63	// as a double without loss of precision.
64	intval = int64_t(to<double>(intval));
65	}
66	toAppend(intval, &out_);
67	break;
68	}
69	case dynamic::BOOL:
70	out_ += v.asBool() ? "true" : "false";
71	break;
72	case dynamic::NULLT:
73	out_ += "null";
74	break;
75	case dynamic::STRING:
76	escapeString(v.asString(), out_, opts_);
77	break;
78	case dynamic::OBJECT:
79	printObject(v);
80	break;
81	case dynamic::ARRAY:
82	printArray(v);
83	break;
84	default:
85	CHECK(`0`) << "Bad type " << v.type();
86	}
87	}
88
89	private:
90	void printKV(const std::pair<const dynamic, dynamic>& p) const {
91	if (!opts_.allow_non_string_keys && !p.first.isString()) {
92	throw std::runtime_error (
93	"folly::toJson: JSON object key was not a "
94	"string");
95	}
96	(*this)(p.first);
97	mapColon();
98	(*this)(p.second);
99	}
100
101	template <typename Iterator>
102	void printKVPairs(Iterator begin, Iterator end) const {
103	printKV(*begin);
104	for (++begin; begin != end; ++begin) {
105	out_ += `','`;
106	newline();
107	printKV(*begin);
108	}
109	}
110
111	void printObject(dynamic const& o) const {
112	if (o.empty()) {
113	out_ += "{}";
114	return;
115	}
116
117	out_ += `'{'`;
118	indent();
119	newline();
120	if (opts_.sort_keys \|\| opts_.sort_keys_by) {
121	using ref = std::reference_wrapper<decltype(o.items())::value_type const>;
122	std::vector<ref> refs(o.items().begin(), o.items().end());
123
124	using SortByRef = FunctionRef<bool(dynamic const&, dynamic const&)>;
125	auto const& sort_keys_by = opts_.sort_keys_by
126	? SortByRef (opts_.sort_keys_by)
127	: SortByRef (std::less<dynamic>());
128	std::sort(refs.begin(), refs.end(), [&](ref a, ref b) {
129	// Only compare keys. No ordering among identical keys.
130	return sort_keys_by (a.get().first, b.get().first);
131	});
132	printKVPairs(refs.cbegin(), refs.cend());
133	} else {
134	printKVPairs(o.items().begin(), o.items().end());
135	}
136	outdent();
137	newline();
138	out_ += `'}'`;
139	}
140
141	void printArray(dynamic const& a) const {
142	if (a.empty()) {
143	out_ += "[]";
144	return;
145	}
146
147	out_ += `'['`;
148	indent();
149	newline();
150	(*this)(a [`0`]);
151	for (auto& val : range(std::next(a.begin()), a.end())) {
152	out_ += `','`;
153	newline();
154	(*this)(val);
155	}
156	outdent();
157	newline();
158	out_ += `']'`;
159	}
160
161	private:
162	void outdent() const {
163	if (indentLevel_) {
164	--*indentLevel_;
165	}
166	}
167
168	void indent() const {
169	if (indentLevel_) {
170	++*indentLevel_;
171	}
172	}
173
174	void newline() const {
175	if (indentLevel_) {
176	out_ += to<std::string>(`'\n'`, std::string (indentLevel_ `2`, `' '`));
177	}
178	}
179
180	void mapColon() const {
181	out_ += indentLevel_ ? ": " : ":";
182	}
183
184	private:
185	std::string& out_;
186	unsigned* const indentLevel_;
187	serialization_opts const& opts_;
188	};
189
190	//////////////////////////////////////////////////////////////////////
191
192	struct FOLLY_EXPORT ParseError : std::runtime_error {
193	explicit ParseError(
194	unsigned int line,
195	std::string const& context,
196	std::string const& expected)
197	: std::runtime_error (to<std::string>(
198	"json parse error on line ",
199	line,
200	!context.empty() ? to<std::string>(" near `", context, `'\''`) : "",
201	": ",
202	expected)) {}
203	};
204
205	// Wraps our input buffer with some helper functions.
206	struct Input {
207	explicit Input(StringPiece range, json::serialization_opts const* opts)
208	: range_(range), opts_(*opts), lineNum_(`0`) {
209	storeCurrent();
210	}
211
212	Input(Input const&) = delete;
213	Input& operator=(Input const&) = delete;
214
215	char const* begin() const {
216	return range_.begin();
217	}
218
219	// Parse ahead for as long as the supplied predicate is satisfied,
220	// returning a range of what was skipped.
221	template <class Predicate>
222	StringPiece skipWhile(const Predicate& p) {
223	std::size_t skipped = `0`;
224	for (; skipped < range_.size(); ++skipped) {
225	if (!p(range_[skipped])) {
226	break;
227	}
228	if (range_[skipped] == `'\n'`) {
229	++lineNum_;
230	}
231	}
232	auto ret = range_.subpiece(`0`, skipped);
233	range_.advance(skipped);
234	storeCurrent();
235	return ret;
236	}
237
238	StringPiece skipDigits() {
239	return skipWhile([](char c) { return c >= `'0'` && c <= `'9'`; });
240	}
241
242	StringPiece skipMinusAndDigits() {
243	bool firstChar = true;
244	return skipWhile([&firstChar](char c) {
245	bool result = (c >= `'0'` && c <= `'9'`) \|\| (firstChar && c == `'-'`);
246	firstChar = false;
247	return result;
248	});
249	}
250
251	void skipWhitespace() {
252	unsigned index = `0`;
253	while (true) {
254	while (index < range_.size() && range_[index] == `' '`) {
255	index++;
256	}
257	if (index < range_.size()) {
258	if (range_[index] == `'\n'`) {
259	index++;
260	++lineNum_;
261	continue;
262	}
263	if (range_[index] == `'\t'` \|\| range_[index] == `'\r'`) {
264	index++;
265	continue;
266	}
267	}
268	break;
269	}
270	range_.advance(index);
271	storeCurrent();
272	}
273
274	void expect(char c) {
275	if (**this != c) {
276	throw ParseError (
277	lineNum_, context(), to<std::string>("expected '", c, `'\''`));
278	}
279	++*this;
280	}
281
282	std::size_t size() const {
283	return range_.size();
284	}
285
286	int operator() const* {
287	return current_;
288	}
289
290	void operator++() {
291	range_.pop_front();
292	storeCurrent();
293	}
294
295	template <class T>
296	T extract() {
297	try {
298	return to<T>(&range_);
299	} catch (std::exception const& e) {
300	error(e.what());
301	}
302	}
303
304	bool consume(StringPiece str) {
305	if (boost::starts_with(range_, str)) {
306	range_.advance(str.size());
307	storeCurrent();
308	return true;
309	}
310	return false;
311	}
312
313	std::string context() const {
314	return range_.subpiece(`0`, `16` / arbitrary /).toString();
315	}
316
317	dynamic error(char const* what) const {
318	throw ParseError (lineNum_, context(), what);
319	}
320
321	json::serialization_opts const& getOpts() {
322	return opts_;
323	}
324
325	void incrementRecursionLevel() {
326	if (currentRecursionLevel_ > opts_.recursion_limit) {
327	error("recursion limit exceeded");
328	}
329	currentRecursionLevel_++;
330	}
331
332	void decrementRecursionLevel() {
333	currentRecursionLevel_--;
334	}
335
336	private:
337	void storeCurrent() {
338	current_ = range_.empty() ? EOF : range_.front();
339	}
340
341	private:
342	StringPiece range_;
343	json::serialization_opts const& opts_;
344	unsigned lineNum_;
345	int current_;
346	unsigned int currentRecursionLevel_{`0`};
347	};
348
349	class RecursionGuard {
350	public:
351	explicit RecursionGuard(Input& in) : in_(in) {
352	in_.incrementRecursionLevel();
353	}
354
355	~RecursionGuard() {
356	in_.decrementRecursionLevel();
357	}
358
359	private:
360	Input& in_;
361	};
362
363	dynamic parseValue(Input& in);
364	std::string parseString(Input& in);
365	dynamic parseNumber(Input& in);
366
367	dynamic parseObject(Input& in) {
368	DCHECK_EQ(*in, `'{'`);
369	++in;
370
371	dynamic ret = dynamic::object;
372
373	in.skipWhitespace();
374	if (*in == `'}'`) {
375	++in;
376	return ret;
377	}
378
379	for (;;) {
380	if (in.getOpts().allow_trailing_comma && *in == `'}'`) {
381	break;
382	}
383	if (in == `'\"'`) { // string*
384	auto key = parseString(in);
385	in.skipWhitespace();
386	in.expect(`':'`);
387	in.skipWhitespace();
388	ret.insert(std::move(key), parseValue(in));
389	} else if (!in.getOpts().allow_non_string_keys) {
390	in.error("expected string for object key name");
391	} else {
392	auto key = parseValue(in);
393	in.skipWhitespace();
394	in.expect(`':'`);
395	in.skipWhitespace();
396	ret.insert(std::move(key), parseValue(in));
397	}
398
399	in.skipWhitespace();
400	if (*in != `','`) {
401	break;
402	}
403	++in;
404	in.skipWhitespace();
405	}
406	in.expect(`'}'`);
407
408	return ret;
409	}
410
411	dynamic parseArray(Input& in) {
412	DCHECK_EQ(*in, `'['`);
413	++in;
414
415	dynamic ret = dynamic::array;
416
417	in.skipWhitespace();
418	if (*in == `']'`) {
419	++in;
420	return ret;
421	}
422
423	for (;;) {
424	if (in.getOpts().allow_trailing_comma && *in == `']'`) {
425	break;
426	}
427	ret.push_back(parseValue(in));
428	in.skipWhitespace();
429	if (*in != `','`) {
430	break;
431	}
432	++in;
433	in.skipWhitespace();
434	}
435	in.expect(`']'`);
436
437	return ret;
438	}
439
440	dynamic parseNumber(Input& in) {
441	bool const negative = (*in == `'-'`);
442	if (negative && in.consume("-Infinity")) {
443	if (in.getOpts().parse_numbers_as_strings) {
444	return "-Infinity";
445	} else {
446	return -std::numeric_limits<double>::infinity();
447	}
448	}
449
450	auto integral = in.skipMinusAndDigits();
451	if (negative && integral.size() < `2`) {
452	in.error("expected digits after `-'");
453	}
454
455	auto const wasE = in == `'e'` \|\| in == `'E'`;
456
457	constexpr const char* maxInt = "9223372036854775807";
458	constexpr const char* minInt = "-9223372036854775808";
459	constexpr auto maxIntLen = constexpr_strlen(maxInt);
460	constexpr auto minIntLen = constexpr_strlen(minInt);
461
462	if (*in != `'.'` && !wasE && in.getOpts().parse_numbers_as_strings) {
463	return integral;
464	}
465
466	if (*in != `'.'` && !wasE) {
467	if (LIKELY(!in.getOpts().double_fallback \|\| integral.size() < maxIntLen) \|\|
468	(!negative && integral.size() == maxIntLen && integral <= maxInt) \|\|
469	(negative && integral.size() == minIntLen && integral <= minInt)) {
470	auto val = to<int64_t>(integral);
471	in.skipWhitespace();
472	return val;
473	} else {
474	auto val = to<double>(integral);
475	in.skipWhitespace();
476	return val;
477	}
478	}
479
480	auto end = !wasE ? (++in, in.skipDigits().end()) : in.begin();
481	if (in == `'e'` \|\| in == `'E'`) {
482	++in;
483	if (in == `'+'` \|\| in == `'-'`) {
484	++in;
485	}
486	auto expPart = in.skipDigits();
487	end = expPart.end();
488	}
489	auto fullNum = range(integral.begin(), end);
490	if (in.getOpts().parse_numbers_as_strings) {
491	return fullNum;
492	}
493	auto val = to<double>(fullNum);
494	return val;
495	}
496
497	std::string decodeUnicodeEscape(Input& in) {
498	auto hexVal = [&](int c) -> uint16_t {
499	// clang-format off
500	return uint16_t(
501	c >= `'0'` && c <= `'9'` ? c - `'0'` :
502	c >= `'a'` && c <= `'f'` ? c - `'a'` + `10` :
503	c >= `'A'` && c <= `'F'` ? c - `'A'` + `10` :
504	(in.error("invalid hex digit"), `0`));
505	// clang-format on
506	};
507
508	auto readHex = [&]() -> uint16_t {
509	if (in.size() < `4`) {
510	in.error("expected 4 hex digits");
511	}
512
513	uint16_t ret = uint16_t(hexVal (in) `4096`);
514	++in;
515	ret += hexVal (in) `256`;
516	++in;
517	ret += hexVal (in) `16`;
518	++in;
519	ret += hexVal (*in);
520	++in;
521	return ret;
522	};
523
524	/*
525	* If the value encoded is in the surrogate pair range, we need to
526	* make sure there is another escape that we can use also.
527	*/
528	uint32_t codePoint = readHex ();
529	if (codePoint >= `0xd800` && codePoint <= `0xdbff`) {
530	if (!in.consume("\\u")) {
531	in.error(
532	"expected another unicode escape for second half of "
533	"surrogate pair");
534	}
535	uint16_t second = readHex ();
536	if (second >= `0xdc00` && second <= `0xdfff`) {
537	codePoint = `0x10000` + ((codePoint & `0x3ff`) << `10`) + (second & `0x3ff`);
538	} else {
539	in.error("second character in surrogate pair is invalid");
540	}
541	} else if (codePoint >= `0xdc00` && codePoint <= `0xdfff`) {
542	in.error("invalid unicode code point (in range [0xdc00,0xdfff])");
543	}
544
545	return codePointToUtf8(codePoint);
546	}
547
548	std::string parseString(Input& in) {
549	DCHECK_EQ(*in, `'\"'`);
550	++in;
551
552	std::string ret;
553	for (;;) {
554	auto range = in.skipWhile([](char c) { return c != `'\"'` && c != `'\\'`; });
555	ret.append(range.begin(), range.end());
556
557	if (*in == `'\"'`) {
558	++in;
559	break;
560	}
561	if (*in == `'\\'`) {
562	++in;
563	switch (*in) {
564	// clang-format off
565	case `'\"'`: ret.push_back(`'\"'`); ++in; break;
566	case `'\\'`: ret.push_back(`'\\'`); ++in; break;
567	case `'/'`: ret.push_back(`'/'`); ++in; break;
568	case `'b'`: ret.push_back(`'\b'`); ++in; break;
569	case `'f'`: ret.push_back(`'\f'`); ++in; break;
570	case `'n'`: ret.push_back(`'\n'`); ++in; break;
571	case `'r'`: ret.push_back(`'\r'`); ++in; break;
572	case `'t'`: ret.push_back(`'\t'`); ++in; break;
573	case `'u'`: ++in; ret += decodeUnicodeEscape(in); break;
574	// clang-format on
575	default:
576	in.error(
577	to<std::string>("unknown escape ", *in, " in string").c_str());
578	}
579	continue;
580	}
581	if (*in == EOF) {
582	in.error("unterminated string");
583	}
584	if (!*in) {
585	/*
586	* Apparently we're actually supposed to ban all control
587	* characters from strings. This seems unnecessarily
588	* restrictive, so we're only banning zero bytes. (Since the
589	* string is presumed to be UTF-8 encoded it's fine to just
590	* check this way.)
591	*/
592	in.error("null byte in string");
593	}
594
595	ret.push_back(char(*in));
596	++in;
597	}
598
599	return ret;
600	}
601
602	dynamic parseValue(Input& in) {
603	RecursionGuard guard(in);
604
605	in.skipWhitespace();
606	// clang-format off
607	return
608	*in == `'['` ? parseArray(in) :
609	*in == `'{'` ? parseObject(in) :
610	*in == `'\"'` ? parseString(in) :
611	(in == `'-'` \|\| (in >= `'0'` && *in <= `'9'`)) ? parseNumber(in) :
612	in.consume("true") ? true :
613	in.consume("false") ? false :
614	in.consume("null") ? nullptr :
615	in.consume("Infinity") ?
616	(in.getOpts().parse_numbers_as_strings ? (dynamic)"Infinity" :
617	(dynamic)std::numeric_limits<double>::infinity()) :
618	in.consume("NaN") ?
619	(in.getOpts().parse_numbers_as_strings ? (dynamic)"NaN" :
620	(dynamic)std::numeric_limits<double>::quiet_NaN()) :
621	in.error("expected json value");
622	// clang-format on
623	}
624
625	} // namespace
626
627	//////////////////////////////////////////////////////////////////////
628
629	std::array<uint64_t, `2`> buildExtraAsciiToEscapeBitmap(StringPiece chars) {
630	std::array<uint64_t, `2`> escapes{{`0`, `0`}};
631	for (auto b : ByteRange (chars)) {
632	if (b >= `0x20` && b < `0x80`) {
633	escapes [b / `64`] \|= uint64_t(`1`) << (b % `64`);
634	}
635	}
636	return escapes;
637	}
638
639	std::string serialize(dynamic const& dyn, serialization_opts const& opts) {
640	std::string ret;
641	unsigned indentLevel = `0`;
642	Printer p(ret, opts.pretty_formatting ? &indentLevel : nullptr, &opts);
643	p (dyn);
644	return ret;
645	}
646
647	// Fast path to determine the longest prefix that can be left
648	// unescaped in a string of sizeof(T) bytes packed in an integer of
649	// type T.
650	template <bool EnableExtraAsciiEscapes, class T>
651	size_t firstEscapableInWord(T s, const serialization_opts& opts) {
652	static_assert(std::is_unsigned<T>::value, "Unsigned integer required");
653	static constexpr T kOnes = ~T() / `255`; // 0x...0101
654	static constexpr T kMsbs = kOnes * `0x80`; // 0x...8080
655
656	// Sets the MSB of bytes < b. Precondition: b < 128.
657	auto isLess = [](T w, uint8_t b) {
658	// A byte is < b iff subtracting b underflows, so we check that
659	// the MSB wasn't set before and it's set after the subtraction.
660	return (w - kOnes * b) & ~w & kMsbs;
661	};
662
663	auto isChar = [&](uint8_t c) {
664	// A byte is == c iff it is 0 if xored with c.
665	return isLess(s ^ (kOnes * c), `1`);
666	};
667
668	// The following masks have the MSB set for each byte of the word
669	// that satisfies the corresponding condition.
670	auto isHigh = s & kMsbs; // >= 128
671	auto isLow = isLess(s, `0x20`); // <= 0x1f
672	auto needsEscape = isHigh \| isLow \| isChar(`'\\'`) \| isChar(`'"'`);
673
674	if / constexpr / (EnableExtraAsciiEscapes) {
675	// Deal with optional bitmap for unicode escapes. Escapes can optionally be
676	// set for ascii characters 32 - 127, so the inner loop may run up to 96
677	// times. However, for the case where 0 or a handful of bits are set,
678	// looping will be minimal through use of findFirstSet.
679	for (size_t i = `0`; i < opts.extra_ascii_to_escape_bitmap.size(); ++i) {
680	const auto offset = i * `64`;
681	// Clear first 32 characters if this is the first index, since those are
682	// always escaped.
683	auto bitmap = opts.extra_ascii_to_escape_bitmap [i] &
684	(i == `0` ? uint64_t(-`1`) << `32` : ~`0UL`);
685	while (bitmap) {
686	auto bit = folly::findFirstSet(bitmap);
687	needsEscape \|= isChar(offset + bit - `1`);
688	bitmap &= bitmap - `1`;
689	}
690	}
691	}
692
693	if (!needsEscape) {
694	return sizeof(T);
695	}
696
697	if (folly::kIsLittleEndian) {
698	return folly::findFirstSet(needsEscape) / `8` - `1`;
699	} else {
700	return sizeof(T) - folly::findLastSet(needsEscape) / `8`;
701	}
702	}
703
704	// Escape a string so that it is legal to print it in JSON text.
705	template <bool EnableExtraAsciiEscapes>
706	void escapeStringImpl(
707	StringPiece input,
708	std::string& out,
709	const serialization_opts& opts) {
710	auto hexDigit = [](uint8_t c) -> char {
711	return c < `10` ? c + `'0'` : c - `10` + `'a'`;
712	};
713
714	out.push_back(`'\"'`);
715
716	auto* p = reinterpret_cast<const unsigned char*>(input.begin());
717	auto* q = reinterpret_cast<const unsigned char*>(input.begin());
718	auto* e = reinterpret_cast<const unsigned char*>(input.end());
719
720	while (p < e) {
721	// Find the longest prefix that does not need escaping, and copy
722	// it literally into the output string.
723	auto firstEsc = p;
724	while (firstEsc < e) {
725	auto avail = e - firstEsc;
726	uint64_t word = `0`;
727	if (avail >= `8`) {
728	word = folly::loadUnaligned<uint64_t>(firstEsc);
729	} else {
730	word = folly::partialLoadUnaligned<uint64_t>(firstEsc, avail);
731	}
732	auto prefix = firstEscapableInWord<EnableExtraAsciiEscapes>(word, opts);
733	DCHECK_LE(prefix, avail);
734	firstEsc += prefix;
735	if (prefix < `8`) {
736	break;
737	}
738	}
739	if (firstEsc > p) {
740	out.append(reinterpret_cast<const char*>(p), firstEsc - p);
741	p = firstEsc;
742	// We can't be in the middle of a multibyte sequence, so we can reset q.
743	q = p;
744	if (p == e) {
745	break;
746	}
747	}
748
749	// Handle the next byte that may need escaping.
750
751	// Since non-ascii encoding inherently does utf8 validation
752	// we explicitly validate utf8 only if non-ascii encoding is disabled.
753	if ((opts.validate_utf8 \|\| opts.skip_invalid_utf8) &&
754	!opts.encode_non_ascii) {
755	// To achieve better spatial and temporal coherence
756	// we do utf8 validation progressively along with the
757	// string-escaping instead of two separate passes.
758
759	// As the encoding progresses, q will stay at or ahead of p.
760	CHECK_GE(q, p);
761
762	// As p catches up with q, move q forward.
763	if (q == p) {
764	// calling utf8_decode has the side effect of
765	// checking that utf8 encodings are valid
766	char32_t v = utf8ToCodePoint(q, e, opts.skip_invalid_utf8);
767	if (opts.skip_invalid_utf8 && v == U`'\ufffd'`) {
768	out.append(u8"\ufffd");
769	p = q;
770	continue;
771	}
772	}
773	}
774
775	auto encodeUnicode = opts.encode_non_ascii && (*p & `0x80`);
776	if / constexpr / (EnableExtraAsciiEscapes) {
777	encodeUnicode = encodeUnicode \|\|
778	(p >= `0x20` && p < `0x80` &&
779	(opts.extra_ascii_to_escape_bitmap [*p / `64`] &
780	(uint64_t(`1`) << (*p % `64`))));
781	}
782
783	if (encodeUnicode) {
784	// note that this if condition captures utf8 chars
785	// with value > 127, so size > 1 byte (or they are whitelisted for
786	// Unicode encoding).
787	// NOTE: char32_t / char16_t are both unsigned.
788	char32_t cp = utf8ToCodePoint(p, e, opts.skip_invalid_utf8);
789	auto writeHex = [&](char16_t v) {
790	char buf[] = "\\u\0\0\0\0";
791	buf[`2`] = hexDigit((v >> `12`) & `0x0f`);
792	buf[`3`] = hexDigit((v >> `8`) & `0x0f`);
793	buf[`4`] = hexDigit((v >> `4`) & `0x0f`);
794	buf[`5`] = hexDigit(v & `0x0f`);
795	out.append(buf, `6`);
796	};
797	// From the ECMA-404 The JSON Data Interchange Syntax 2nd Edition Dec 2017
798	if (cp < `0x10000u`) {
799	// If the code point is in the Basic Multilingual Plane (U+0000 through
800	// U+FFFF), then it may be represented as a six-character sequence:
801	// a reverse solidus, followed by the lowercase letter u, followed by
802	// four hexadecimal digits that encode the code point.
803	writeHex(static_cast<char16_t>(cp));
804	} else {
805	// To escape a code point that is not in the Basic Multilingual Plane,
806	// the character may be represented as a twelve-character sequence,
807	// encoding the UTF-16 surrogate pair corresponding to the code point.
808	writeHex(static_cast<char16_t>(
809	`0xd800u` + (((cp - `0x10000u`) >> `10`) & `0x3ffu`)));
810	writeHex(static_cast<char16_t>(`0xdc00u` + ((cp - `0x10000u`) & `0x3ffu`)));
811	}
812	} else if (p == `'\\'` \|\| p == `'\"'`) {
813	char buf[] = "\\\0";
814	buf[`1`] = char(*p++);
815	out.append(buf, `2`);
816	} else if (*p <= `0x1f`) {
817	switch (*p) {
818	// clang-format off
819	case `'\b'`: out.append("\\b"); p++; break;
820	case `'\f'`: out.append("\\f"); p++; break;
821	case `'\n'`: out.append("\\n"); p++; break;
822	case `'\r'`: out.append("\\r"); p++; break;
823	case `'\t'`: out.append("\\t"); p++; break;
824	// clang-format on
825	default:
826	// Note that this if condition captures non readable chars
827	// with value < 32, so size = 1 byte (e.g control chars).
828	char buf[] = "\\u00\0\0";
829	buf[`4`] = hexDigit(uint8_t((*p & `0xf0`) >> `4`));
830	buf[`5`] = hexDigit(uint8_t(*p & `0xf`));
831	out.append(buf, `6`);
832	p++;
833	}
834	} else {
835	out.push_back(char(*p++));
836	}
837	}
838
839	out.push_back(`'\"'`);
840	}
841
842	void escapeString(
843	StringPiece input,
844	std::string& out,
845	const serialization_opts& opts) {
846	if (FOLLY_UNLIKELY(
847	opts.extra_ascii_to_escape_bitmap[`0`] \|\|
848	opts.extra_ascii_to_escape_bitmap[`1`])) {
849	escapeStringImpl<true>(input, out, opts);
850	} else {
851	escapeStringImpl<false>(input, out, opts);
852	}
853	}
854
855	std::string stripComments(StringPiece jsonC) {
856	std::string result;
857	enum class State {
858	None,
859	InString,
860	InlineComment,
861	LineComment
862	} state = State::None;
863
864	for (size_t i = `0`; i < jsonC.size(); ++i) {
865	auto s = jsonC.subpiece(i);
866	switch (state) {
867	case State::None:
868	if (s.startsWith("/*")) {
869	state = State::InlineComment;
870	++i;
871	continue;
872	} else if (s.startsWith("//")) {
873	state = State::LineComment;
874	++i;
875	continue;
876	} else if (s [`0`] == `'\"'`) {
877	state = State::InString;
878	}
879	result.push_back(s [`0`]);
880	break;
881	case State::InString:
882	if (s [`0`] == `'\\'`) {
883	if (UNLIKELY(s.size() == `1`)) {
884	throw std::logic_error ("Invalid JSONC: string is not terminated");
885	}
886	result.push_back(s [`0`]);
887	result.push_back(s [`1`]);
888	++i;
889	continue;
890	} else if (s [`0`] == `'\"'`) {
891	state = State::None;
892	}
893	result.push_back(s [`0`]);
894	break;
895	case State::InlineComment:
896	if (s.startsWith("*/")) {
897	state = State::None;
898	++i;
899	}
900	break;
901	case State::LineComment:
902	if (s [`0`] == `'\n'`) {
903	// skip the line break. It doesn't matter.
904	state = State::None;
905	}
906	break;
907	default:
908	throw std::logic_error ("Unknown comment state");
909	}
910	}
911	return result;
912	}
913
914	} // namespace json
915
916	//////////////////////////////////////////////////////////////////////
917
918	dynamic parseJson(StringPiece range) {
919	return parseJson(range, json::serialization_opts ());
920	}
921
922	dynamic parseJson(StringPiece range, json::serialization_opts const& opts) {
923	json::Input in(range, &opts);
924
925	auto ret = parseValue(in);
926	in.skipWhitespace();
927	if (in.size() && *in != `'\0'`) {
928	in.error("parsing didn't consume all input");
929	}
930	return ret;
931	}
932
933	std::string toJson(dynamic const& dyn) {
934	return json::serialize(dyn, json::serialization_opts ());
935	}
936
937	std::string toPrettyJson(dynamic const& dyn) {
938	json::serialization_opts opts;
939	opts.pretty_formatting = true;
940	return json::serialize(dyn, opts);
941	}
942
943	//////////////////////////////////////////////////////////////////////
944	// dynamic::print_as_pseudo_json() is implemented here for header
945	// ordering reasons (most of the dynamic implementation is in
946	// dynamic-inl.h, which we don't want to include json.h).
947
948	void dynamic::print_as_pseudo_json(std::ostream& out) const {
949	json::serialization_opts opts;
950	opts.allow_non_string_keys = true;
951	opts.allow_nan_inf = true;
952	out << json::serialize(*this, opts);
953	}
954
955	void PrintTo(const dynamic& dyn, std::ostream* os) {
956	json::serialization_opts opts;
957	opts.allow_nan_inf = true;
958	opts.allow_non_string_keys = true;
959	opts.pretty_formatting = true;
960	opts.sort_keys = true;
961	*os << json::serialize(dyn, opts);
962	}
963
964	//////////////////////////////////////////////////////////////////////
965
966	} // namespace folly
967

Browse the source code of folly/json.cpp