1/*
2 * Copyright 2011-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <folly/json.h>
17
18#include <algorithm>
19#include <functional>
20#include <iterator>
21#include <type_traits>
22
23#include <boost/algorithm/string.hpp>
24
25#include <folly/Conv.h>
26#include <folly/Portability.h>
27#include <folly/Range.h>
28#include <folly/String.h>
29#include <folly/Unicode.h>
30#include <folly/lang/Bits.h>
31#include <folly/portability/Constexpr.h>
32
33namespace folly {
34
35//////////////////////////////////////////////////////////////////////
36
37namespace json {
38namespace {
39
40struct Printer {
41 explicit Printer(
42 std::string& out,
43 unsigned* indentLevel,
44 serialization_opts const* opts)
45 : out_(out), indentLevel_(indentLevel), opts_(*opts) {}
46
47 void operator()(dynamic const& v) const {
48 switch (v.type()) {
49 case dynamic::DOUBLE:
50 if (!opts_.allow_nan_inf &&
51 (std::isnan(v.asDouble()) || std::isinf(v.asDouble()))) {
52 throw std::runtime_error(
53 "folly::toJson: JSON object value was a "
54 "NaN or INF");
55 }
56 toAppend(
57 v.asDouble(), &out_, opts_.double_mode, opts_.double_num_digits);
58 break;
59 case dynamic::INT64: {
60 auto intval = v.asInt();
61 if (opts_.javascript_safe) {
62 // Use folly::to to check that this integer can be represented
63 // as a double without loss of precision.
64 intval = int64_t(to<double>(intval));
65 }
66 toAppend(intval, &out_);
67 break;
68 }
69 case dynamic::BOOL:
70 out_ += v.asBool() ? "true" : "false";
71 break;
72 case dynamic::NULLT:
73 out_ += "null";
74 break;
75 case dynamic::STRING:
76 escapeString(v.asString(), out_, opts_);
77 break;
78 case dynamic::OBJECT:
79 printObject(v);
80 break;
81 case dynamic::ARRAY:
82 printArray(v);
83 break;
84 default:
85 CHECK(0) << "Bad type " << v.type();
86 }
87 }
88
89 private:
90 void printKV(const std::pair<const dynamic, dynamic>& p) const {
91 if (!opts_.allow_non_string_keys && !p.first.isString()) {
92 throw std::runtime_error(
93 "folly::toJson: JSON object key was not a "
94 "string");
95 }
96 (*this)(p.first);
97 mapColon();
98 (*this)(p.second);
99 }
100
101 template <typename Iterator>
102 void printKVPairs(Iterator begin, Iterator end) const {
103 printKV(*begin);
104 for (++begin; begin != end; ++begin) {
105 out_ += ',';
106 newline();
107 printKV(*begin);
108 }
109 }
110
111 void printObject(dynamic const& o) const {
112 if (o.empty()) {
113 out_ += "{}";
114 return;
115 }
116
117 out_ += '{';
118 indent();
119 newline();
120 if (opts_.sort_keys || opts_.sort_keys_by) {
121 using ref = std::reference_wrapper<decltype(o.items())::value_type const>;
122 std::vector<ref> refs(o.items().begin(), o.items().end());
123
124 using SortByRef = FunctionRef<bool(dynamic const&, dynamic const&)>;
125 auto const& sort_keys_by = opts_.sort_keys_by
126 ? SortByRef(opts_.sort_keys_by)
127 : SortByRef(std::less<dynamic>());
128 std::sort(refs.begin(), refs.end(), [&](ref a, ref b) {
129 // Only compare keys. No ordering among identical keys.
130 return sort_keys_by(a.get().first, b.get().first);
131 });
132 printKVPairs(refs.cbegin(), refs.cend());
133 } else {
134 printKVPairs(o.items().begin(), o.items().end());
135 }
136 outdent();
137 newline();
138 out_ += '}';
139 }
140
141 void printArray(dynamic const& a) const {
142 if (a.empty()) {
143 out_ += "[]";
144 return;
145 }
146
147 out_ += '[';
148 indent();
149 newline();
150 (*this)(a[0]);
151 for (auto& val : range(std::next(a.begin()), a.end())) {
152 out_ += ',';
153 newline();
154 (*this)(val);
155 }
156 outdent();
157 newline();
158 out_ += ']';
159 }
160
161 private:
162 void outdent() const {
163 if (indentLevel_) {
164 --*indentLevel_;
165 }
166 }
167
168 void indent() const {
169 if (indentLevel_) {
170 ++*indentLevel_;
171 }
172 }
173
174 void newline() const {
175 if (indentLevel_) {
176 out_ += to<std::string>('\n', std::string(*indentLevel_ * 2, ' '));
177 }
178 }
179
180 void mapColon() const {
181 out_ += indentLevel_ ? ": " : ":";
182 }
183
184 private:
185 std::string& out_;
186 unsigned* const indentLevel_;
187 serialization_opts const& opts_;
188};
189
190//////////////////////////////////////////////////////////////////////
191
192struct FOLLY_EXPORT ParseError : std::runtime_error {
193 explicit ParseError(
194 unsigned int line,
195 std::string const& context,
196 std::string const& expected)
197 : std::runtime_error(to<std::string>(
198 "json parse error on line ",
199 line,
200 !context.empty() ? to<std::string>(" near `", context, '\'') : "",
201 ": ",
202 expected)) {}
203};
204
205// Wraps our input buffer with some helper functions.
206struct Input {
207 explicit Input(StringPiece range, json::serialization_opts const* opts)
208 : range_(range), opts_(*opts), lineNum_(0) {
209 storeCurrent();
210 }
211
212 Input(Input const&) = delete;
213 Input& operator=(Input const&) = delete;
214
215 char const* begin() const {
216 return range_.begin();
217 }
218
219 // Parse ahead for as long as the supplied predicate is satisfied,
220 // returning a range of what was skipped.
221 template <class Predicate>
222 StringPiece skipWhile(const Predicate& p) {
223 std::size_t skipped = 0;
224 for (; skipped < range_.size(); ++skipped) {
225 if (!p(range_[skipped])) {
226 break;
227 }
228 if (range_[skipped] == '\n') {
229 ++lineNum_;
230 }
231 }
232 auto ret = range_.subpiece(0, skipped);
233 range_.advance(skipped);
234 storeCurrent();
235 return ret;
236 }
237
238 StringPiece skipDigits() {
239 return skipWhile([](char c) { return c >= '0' && c <= '9'; });
240 }
241
242 StringPiece skipMinusAndDigits() {
243 bool firstChar = true;
244 return skipWhile([&firstChar](char c) {
245 bool result = (c >= '0' && c <= '9') || (firstChar && c == '-');
246 firstChar = false;
247 return result;
248 });
249 }
250
251 void skipWhitespace() {
252 unsigned index = 0;
253 while (true) {
254 while (index < range_.size() && range_[index] == ' ') {
255 index++;
256 }
257 if (index < range_.size()) {
258 if (range_[index] == '\n') {
259 index++;
260 ++lineNum_;
261 continue;
262 }
263 if (range_[index] == '\t' || range_[index] == '\r') {
264 index++;
265 continue;
266 }
267 }
268 break;
269 }
270 range_.advance(index);
271 storeCurrent();
272 }
273
274 void expect(char c) {
275 if (**this != c) {
276 throw ParseError(
277 lineNum_, context(), to<std::string>("expected '", c, '\''));
278 }
279 ++*this;
280 }
281
282 std::size_t size() const {
283 return range_.size();
284 }
285
286 int operator*() const {
287 return current_;
288 }
289
290 void operator++() {
291 range_.pop_front();
292 storeCurrent();
293 }
294
295 template <class T>
296 T extract() {
297 try {
298 return to<T>(&range_);
299 } catch (std::exception const& e) {
300 error(e.what());
301 }
302 }
303
304 bool consume(StringPiece str) {
305 if (boost::starts_with(range_, str)) {
306 range_.advance(str.size());
307 storeCurrent();
308 return true;
309 }
310 return false;
311 }
312
313 std::string context() const {
314 return range_.subpiece(0, 16 /* arbitrary */).toString();
315 }
316
317 dynamic error(char const* what) const {
318 throw ParseError(lineNum_, context(), what);
319 }
320
321 json::serialization_opts const& getOpts() {
322 return opts_;
323 }
324
325 void incrementRecursionLevel() {
326 if (currentRecursionLevel_ > opts_.recursion_limit) {
327 error("recursion limit exceeded");
328 }
329 currentRecursionLevel_++;
330 }
331
332 void decrementRecursionLevel() {
333 currentRecursionLevel_--;
334 }
335
336 private:
337 void storeCurrent() {
338 current_ = range_.empty() ? EOF : range_.front();
339 }
340
341 private:
342 StringPiece range_;
343 json::serialization_opts const& opts_;
344 unsigned lineNum_;
345 int current_;
346 unsigned int currentRecursionLevel_{0};
347};
348
349class RecursionGuard {
350 public:
351 explicit RecursionGuard(Input& in) : in_(in) {
352 in_.incrementRecursionLevel();
353 }
354
355 ~RecursionGuard() {
356 in_.decrementRecursionLevel();
357 }
358
359 private:
360 Input& in_;
361};
362
363dynamic parseValue(Input& in);
364std::string parseString(Input& in);
365dynamic parseNumber(Input& in);
366
367dynamic parseObject(Input& in) {
368 DCHECK_EQ(*in, '{');
369 ++in;
370
371 dynamic ret = dynamic::object;
372
373 in.skipWhitespace();
374 if (*in == '}') {
375 ++in;
376 return ret;
377 }
378
379 for (;;) {
380 if (in.getOpts().allow_trailing_comma && *in == '}') {
381 break;
382 }
383 if (*in == '\"') { // string
384 auto key = parseString(in);
385 in.skipWhitespace();
386 in.expect(':');
387 in.skipWhitespace();
388 ret.insert(std::move(key), parseValue(in));
389 } else if (!in.getOpts().allow_non_string_keys) {
390 in.error("expected string for object key name");
391 } else {
392 auto key = parseValue(in);
393 in.skipWhitespace();
394 in.expect(':');
395 in.skipWhitespace();
396 ret.insert(std::move(key), parseValue(in));
397 }
398
399 in.skipWhitespace();
400 if (*in != ',') {
401 break;
402 }
403 ++in;
404 in.skipWhitespace();
405 }
406 in.expect('}');
407
408 return ret;
409}
410
411dynamic parseArray(Input& in) {
412 DCHECK_EQ(*in, '[');
413 ++in;
414
415 dynamic ret = dynamic::array;
416
417 in.skipWhitespace();
418 if (*in == ']') {
419 ++in;
420 return ret;
421 }
422
423 for (;;) {
424 if (in.getOpts().allow_trailing_comma && *in == ']') {
425 break;
426 }
427 ret.push_back(parseValue(in));
428 in.skipWhitespace();
429 if (*in != ',') {
430 break;
431 }
432 ++in;
433 in.skipWhitespace();
434 }
435 in.expect(']');
436
437 return ret;
438}
439
440dynamic parseNumber(Input& in) {
441 bool const negative = (*in == '-');
442 if (negative && in.consume("-Infinity")) {
443 if (in.getOpts().parse_numbers_as_strings) {
444 return "-Infinity";
445 } else {
446 return -std::numeric_limits<double>::infinity();
447 }
448 }
449
450 auto integral = in.skipMinusAndDigits();
451 if (negative && integral.size() < 2) {
452 in.error("expected digits after `-'");
453 }
454
455 auto const wasE = *in == 'e' || *in == 'E';
456
457 constexpr const char* maxInt = "9223372036854775807";
458 constexpr const char* minInt = "-9223372036854775808";
459 constexpr auto maxIntLen = constexpr_strlen(maxInt);
460 constexpr auto minIntLen = constexpr_strlen(minInt);
461
462 if (*in != '.' && !wasE && in.getOpts().parse_numbers_as_strings) {
463 return integral;
464 }
465
466 if (*in != '.' && !wasE) {
467 if (LIKELY(!in.getOpts().double_fallback || integral.size() < maxIntLen) ||
468 (!negative && integral.size() == maxIntLen && integral <= maxInt) ||
469 (negative && integral.size() == minIntLen && integral <= minInt)) {
470 auto val = to<int64_t>(integral);
471 in.skipWhitespace();
472 return val;
473 } else {
474 auto val = to<double>(integral);
475 in.skipWhitespace();
476 return val;
477 }
478 }
479
480 auto end = !wasE ? (++in, in.skipDigits().end()) : in.begin();
481 if (*in == 'e' || *in == 'E') {
482 ++in;
483 if (*in == '+' || *in == '-') {
484 ++in;
485 }
486 auto expPart = in.skipDigits();
487 end = expPart.end();
488 }
489 auto fullNum = range(integral.begin(), end);
490 if (in.getOpts().parse_numbers_as_strings) {
491 return fullNum;
492 }
493 auto val = to<double>(fullNum);
494 return val;
495}
496
497std::string decodeUnicodeEscape(Input& in) {
498 auto hexVal = [&](int c) -> uint16_t {
499 // clang-format off
500 return uint16_t(
501 c >= '0' && c <= '9' ? c - '0' :
502 c >= 'a' && c <= 'f' ? c - 'a' + 10 :
503 c >= 'A' && c <= 'F' ? c - 'A' + 10 :
504 (in.error("invalid hex digit"), 0));
505 // clang-format on
506 };
507
508 auto readHex = [&]() -> uint16_t {
509 if (in.size() < 4) {
510 in.error("expected 4 hex digits");
511 }
512
513 uint16_t ret = uint16_t(hexVal(*in) * 4096);
514 ++in;
515 ret += hexVal(*in) * 256;
516 ++in;
517 ret += hexVal(*in) * 16;
518 ++in;
519 ret += hexVal(*in);
520 ++in;
521 return ret;
522 };
523
524 /*
525 * If the value encoded is in the surrogate pair range, we need to
526 * make sure there is another escape that we can use also.
527 */
528 uint32_t codePoint = readHex();
529 if (codePoint >= 0xd800 && codePoint <= 0xdbff) {
530 if (!in.consume("\\u")) {
531 in.error(
532 "expected another unicode escape for second half of "
533 "surrogate pair");
534 }
535 uint16_t second = readHex();
536 if (second >= 0xdc00 && second <= 0xdfff) {
537 codePoint = 0x10000 + ((codePoint & 0x3ff) << 10) + (second & 0x3ff);
538 } else {
539 in.error("second character in surrogate pair is invalid");
540 }
541 } else if (codePoint >= 0xdc00 && codePoint <= 0xdfff) {
542 in.error("invalid unicode code point (in range [0xdc00,0xdfff])");
543 }
544
545 return codePointToUtf8(codePoint);
546}
547
548std::string parseString(Input& in) {
549 DCHECK_EQ(*in, '\"');
550 ++in;
551
552 std::string ret;
553 for (;;) {
554 auto range = in.skipWhile([](char c) { return c != '\"' && c != '\\'; });
555 ret.append(range.begin(), range.end());
556
557 if (*in == '\"') {
558 ++in;
559 break;
560 }
561 if (*in == '\\') {
562 ++in;
563 switch (*in) {
564 // clang-format off
565 case '\"': ret.push_back('\"'); ++in; break;
566 case '\\': ret.push_back('\\'); ++in; break;
567 case '/': ret.push_back('/'); ++in; break;
568 case 'b': ret.push_back('\b'); ++in; break;
569 case 'f': ret.push_back('\f'); ++in; break;
570 case 'n': ret.push_back('\n'); ++in; break;
571 case 'r': ret.push_back('\r'); ++in; break;
572 case 't': ret.push_back('\t'); ++in; break;
573 case 'u': ++in; ret += decodeUnicodeEscape(in); break;
574 // clang-format on
575 default:
576 in.error(
577 to<std::string>("unknown escape ", *in, " in string").c_str());
578 }
579 continue;
580 }
581 if (*in == EOF) {
582 in.error("unterminated string");
583 }
584 if (!*in) {
585 /*
586 * Apparently we're actually supposed to ban all control
587 * characters from strings. This seems unnecessarily
588 * restrictive, so we're only banning zero bytes. (Since the
589 * string is presumed to be UTF-8 encoded it's fine to just
590 * check this way.)
591 */
592 in.error("null byte in string");
593 }
594
595 ret.push_back(char(*in));
596 ++in;
597 }
598
599 return ret;
600}
601
602dynamic parseValue(Input& in) {
603 RecursionGuard guard(in);
604
605 in.skipWhitespace();
606 // clang-format off
607 return
608 *in == '[' ? parseArray(in) :
609 *in == '{' ? parseObject(in) :
610 *in == '\"' ? parseString(in) :
611 (*in == '-' || (*in >= '0' && *in <= '9')) ? parseNumber(in) :
612 in.consume("true") ? true :
613 in.consume("false") ? false :
614 in.consume("null") ? nullptr :
615 in.consume("Infinity") ?
616 (in.getOpts().parse_numbers_as_strings ? (dynamic)"Infinity" :
617 (dynamic)std::numeric_limits<double>::infinity()) :
618 in.consume("NaN") ?
619 (in.getOpts().parse_numbers_as_strings ? (dynamic)"NaN" :
620 (dynamic)std::numeric_limits<double>::quiet_NaN()) :
621 in.error("expected json value");
622 // clang-format on
623}
624
625} // namespace
626
627//////////////////////////////////////////////////////////////////////
628
629std::array<uint64_t, 2> buildExtraAsciiToEscapeBitmap(StringPiece chars) {
630 std::array<uint64_t, 2> escapes{{0, 0}};
631 for (auto b : ByteRange(chars)) {
632 if (b >= 0x20 && b < 0x80) {
633 escapes[b / 64] |= uint64_t(1) << (b % 64);
634 }
635 }
636 return escapes;
637}
638
639std::string serialize(dynamic const& dyn, serialization_opts const& opts) {
640 std::string ret;
641 unsigned indentLevel = 0;
642 Printer p(ret, opts.pretty_formatting ? &indentLevel : nullptr, &opts);
643 p(dyn);
644 return ret;
645}
646
647// Fast path to determine the longest prefix that can be left
648// unescaped in a string of sizeof(T) bytes packed in an integer of
649// type T.
650template <bool EnableExtraAsciiEscapes, class T>
651size_t firstEscapableInWord(T s, const serialization_opts& opts) {
652 static_assert(std::is_unsigned<T>::value, "Unsigned integer required");
653 static constexpr T kOnes = ~T() / 255; // 0x...0101
654 static constexpr T kMsbs = kOnes * 0x80; // 0x...8080
655
656 // Sets the MSB of bytes < b. Precondition: b < 128.
657 auto isLess = [](T w, uint8_t b) {
658 // A byte is < b iff subtracting b underflows, so we check that
659 // the MSB wasn't set before and it's set after the subtraction.
660 return (w - kOnes * b) & ~w & kMsbs;
661 };
662
663 auto isChar = [&](uint8_t c) {
664 // A byte is == c iff it is 0 if xored with c.
665 return isLess(s ^ (kOnes * c), 1);
666 };
667
668 // The following masks have the MSB set for each byte of the word
669 // that satisfies the corresponding condition.
670 auto isHigh = s & kMsbs; // >= 128
671 auto isLow = isLess(s, 0x20); // <= 0x1f
672 auto needsEscape = isHigh | isLow | isChar('\\') | isChar('"');
673
674 if /* constexpr */ (EnableExtraAsciiEscapes) {
675 // Deal with optional bitmap for unicode escapes. Escapes can optionally be
676 // set for ascii characters 32 - 127, so the inner loop may run up to 96
677 // times. However, for the case where 0 or a handful of bits are set,
678 // looping will be minimal through use of findFirstSet.
679 for (size_t i = 0; i < opts.extra_ascii_to_escape_bitmap.size(); ++i) {
680 const auto offset = i * 64;
681 // Clear first 32 characters if this is the first index, since those are
682 // always escaped.
683 auto bitmap = opts.extra_ascii_to_escape_bitmap[i] &
684 (i == 0 ? uint64_t(-1) << 32 : ~0UL);
685 while (bitmap) {
686 auto bit = folly::findFirstSet(bitmap);
687 needsEscape |= isChar(offset + bit - 1);
688 bitmap &= bitmap - 1;
689 }
690 }
691 }
692
693 if (!needsEscape) {
694 return sizeof(T);
695 }
696
697 if (folly::kIsLittleEndian) {
698 return folly::findFirstSet(needsEscape) / 8 - 1;
699 } else {
700 return sizeof(T) - folly::findLastSet(needsEscape) / 8;
701 }
702}
703
704// Escape a string so that it is legal to print it in JSON text.
705template <bool EnableExtraAsciiEscapes>
706void escapeStringImpl(
707 StringPiece input,
708 std::string& out,
709 const serialization_opts& opts) {
710 auto hexDigit = [](uint8_t c) -> char {
711 return c < 10 ? c + '0' : c - 10 + 'a';
712 };
713
714 out.push_back('\"');
715
716 auto* p = reinterpret_cast<const unsigned char*>(input.begin());
717 auto* q = reinterpret_cast<const unsigned char*>(input.begin());
718 auto* e = reinterpret_cast<const unsigned char*>(input.end());
719
720 while (p < e) {
721 // Find the longest prefix that does not need escaping, and copy
722 // it literally into the output string.
723 auto firstEsc = p;
724 while (firstEsc < e) {
725 auto avail = e - firstEsc;
726 uint64_t word = 0;
727 if (avail >= 8) {
728 word = folly::loadUnaligned<uint64_t>(firstEsc);
729 } else {
730 word = folly::partialLoadUnaligned<uint64_t>(firstEsc, avail);
731 }
732 auto prefix = firstEscapableInWord<EnableExtraAsciiEscapes>(word, opts);
733 DCHECK_LE(prefix, avail);
734 firstEsc += prefix;
735 if (prefix < 8) {
736 break;
737 }
738 }
739 if (firstEsc > p) {
740 out.append(reinterpret_cast<const char*>(p), firstEsc - p);
741 p = firstEsc;
742 // We can't be in the middle of a multibyte sequence, so we can reset q.
743 q = p;
744 if (p == e) {
745 break;
746 }
747 }
748
749 // Handle the next byte that may need escaping.
750
751 // Since non-ascii encoding inherently does utf8 validation
752 // we explicitly validate utf8 only if non-ascii encoding is disabled.
753 if ((opts.validate_utf8 || opts.skip_invalid_utf8) &&
754 !opts.encode_non_ascii) {
755 // To achieve better spatial and temporal coherence
756 // we do utf8 validation progressively along with the
757 // string-escaping instead of two separate passes.
758
759 // As the encoding progresses, q will stay at or ahead of p.
760 CHECK_GE(q, p);
761
762 // As p catches up with q, move q forward.
763 if (q == p) {
764 // calling utf8_decode has the side effect of
765 // checking that utf8 encodings are valid
766 char32_t v = utf8ToCodePoint(q, e, opts.skip_invalid_utf8);
767 if (opts.skip_invalid_utf8 && v == U'\ufffd') {
768 out.append(u8"\ufffd");
769 p = q;
770 continue;
771 }
772 }
773 }
774
775 auto encodeUnicode = opts.encode_non_ascii && (*p & 0x80);
776 if /* constexpr */ (EnableExtraAsciiEscapes) {
777 encodeUnicode = encodeUnicode ||
778 (*p >= 0x20 && *p < 0x80 &&
779 (opts.extra_ascii_to_escape_bitmap[*p / 64] &
780 (uint64_t(1) << (*p % 64))));
781 }
782
783 if (encodeUnicode) {
784 // note that this if condition captures utf8 chars
785 // with value > 127, so size > 1 byte (or they are whitelisted for
786 // Unicode encoding).
787 // NOTE: char32_t / char16_t are both unsigned.
788 char32_t cp = utf8ToCodePoint(p, e, opts.skip_invalid_utf8);
789 auto writeHex = [&](char16_t v) {
790 char buf[] = "\\u\0\0\0\0";
791 buf[2] = hexDigit((v >> 12) & 0x0f);
792 buf[3] = hexDigit((v >> 8) & 0x0f);
793 buf[4] = hexDigit((v >> 4) & 0x0f);
794 buf[5] = hexDigit(v & 0x0f);
795 out.append(buf, 6);
796 };
797 // From the ECMA-404 The JSON Data Interchange Syntax 2nd Edition Dec 2017
798 if (cp < 0x10000u) {
799 // If the code point is in the Basic Multilingual Plane (U+0000 through
800 // U+FFFF), then it may be represented as a six-character sequence:
801 // a reverse solidus, followed by the lowercase letter u, followed by
802 // four hexadecimal digits that encode the code point.
803 writeHex(static_cast<char16_t>(cp));
804 } else {
805 // To escape a code point that is not in the Basic Multilingual Plane,
806 // the character may be represented as a twelve-character sequence,
807 // encoding the UTF-16 surrogate pair corresponding to the code point.
808 writeHex(static_cast<char16_t>(
809 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu)));
810 writeHex(static_cast<char16_t>(0xdc00u + ((cp - 0x10000u) & 0x3ffu)));
811 }
812 } else if (*p == '\\' || *p == '\"') {
813 char buf[] = "\\\0";
814 buf[1] = char(*p++);
815 out.append(buf, 2);
816 } else if (*p <= 0x1f) {
817 switch (*p) {
818 // clang-format off
819 case '\b': out.append("\\b"); p++; break;
820 case '\f': out.append("\\f"); p++; break;
821 case '\n': out.append("\\n"); p++; break;
822 case '\r': out.append("\\r"); p++; break;
823 case '\t': out.append("\\t"); p++; break;
824 // clang-format on
825 default:
826 // Note that this if condition captures non readable chars
827 // with value < 32, so size = 1 byte (e.g control chars).
828 char buf[] = "\\u00\0\0";
829 buf[4] = hexDigit(uint8_t((*p & 0xf0) >> 4));
830 buf[5] = hexDigit(uint8_t(*p & 0xf));
831 out.append(buf, 6);
832 p++;
833 }
834 } else {
835 out.push_back(char(*p++));
836 }
837 }
838
839 out.push_back('\"');
840}
841
842void escapeString(
843 StringPiece input,
844 std::string& out,
845 const serialization_opts& opts) {
846 if (FOLLY_UNLIKELY(
847 opts.extra_ascii_to_escape_bitmap[0] ||
848 opts.extra_ascii_to_escape_bitmap[1])) {
849 escapeStringImpl<true>(input, out, opts);
850 } else {
851 escapeStringImpl<false>(input, out, opts);
852 }
853}
854
855std::string stripComments(StringPiece jsonC) {
856 std::string result;
857 enum class State {
858 None,
859 InString,
860 InlineComment,
861 LineComment
862 } state = State::None;
863
864 for (size_t i = 0; i < jsonC.size(); ++i) {
865 auto s = jsonC.subpiece(i);
866 switch (state) {
867 case State::None:
868 if (s.startsWith("/*")) {
869 state = State::InlineComment;
870 ++i;
871 continue;
872 } else if (s.startsWith("//")) {
873 state = State::LineComment;
874 ++i;
875 continue;
876 } else if (s[0] == '\"') {
877 state = State::InString;
878 }
879 result.push_back(s[0]);
880 break;
881 case State::InString:
882 if (s[0] == '\\') {
883 if (UNLIKELY(s.size() == 1)) {
884 throw std::logic_error("Invalid JSONC: string is not terminated");
885 }
886 result.push_back(s[0]);
887 result.push_back(s[1]);
888 ++i;
889 continue;
890 } else if (s[0] == '\"') {
891 state = State::None;
892 }
893 result.push_back(s[0]);
894 break;
895 case State::InlineComment:
896 if (s.startsWith("*/")) {
897 state = State::None;
898 ++i;
899 }
900 break;
901 case State::LineComment:
902 if (s[0] == '\n') {
903 // skip the line break. It doesn't matter.
904 state = State::None;
905 }
906 break;
907 default:
908 throw std::logic_error("Unknown comment state");
909 }
910 }
911 return result;
912}
913
914} // namespace json
915
916//////////////////////////////////////////////////////////////////////
917
918dynamic parseJson(StringPiece range) {
919 return parseJson(range, json::serialization_opts());
920}
921
922dynamic parseJson(StringPiece range, json::serialization_opts const& opts) {
923 json::Input in(range, &opts);
924
925 auto ret = parseValue(in);
926 in.skipWhitespace();
927 if (in.size() && *in != '\0') {
928 in.error("parsing didn't consume all input");
929 }
930 return ret;
931}
932
933std::string toJson(dynamic const& dyn) {
934 return json::serialize(dyn, json::serialization_opts());
935}
936
937std::string toPrettyJson(dynamic const& dyn) {
938 json::serialization_opts opts;
939 opts.pretty_formatting = true;
940 return json::serialize(dyn, opts);
941}
942
943//////////////////////////////////////////////////////////////////////
944// dynamic::print_as_pseudo_json() is implemented here for header
945// ordering reasons (most of the dynamic implementation is in
946// dynamic-inl.h, which we don't want to include json.h).
947
948void dynamic::print_as_pseudo_json(std::ostream& out) const {
949 json::serialization_opts opts;
950 opts.allow_non_string_keys = true;
951 opts.allow_nan_inf = true;
952 out << json::serialize(*this, opts);
953}
954
955void PrintTo(const dynamic& dyn, std::ostream* os) {
956 json::serialization_opts opts;
957 opts.allow_nan_inf = true;
958 opts.allow_non_string_keys = true;
959 opts.pretty_formatting = true;
960 opts.sort_keys = true;
961 *os << json::serialize(dyn, opts);
962}
963
964//////////////////////////////////////////////////////////////////////
965
966} // namespace folly
967