1/* Copyright (c) 2013 Dropbox, Inc.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 */
21
22#include "json11.hpp"
23#include <cassert>
24#include <cmath>
25#include <cstdlib>
26#include <cstdio>
27#include <limits>
28
29namespace json11 {
30
31static const int max_depth = 200;
32
33using std::string;
34using std::vector;
35using std::map;
36using std::make_shared;
37using std::initializer_list;
38using std::move;
39
40/* Helper for representing null - just a do-nothing struct, plus comparison
41 * operators so the helpers in JsonValue work. We can't use nullptr_t because
42 * it may not be orderable.
43 */
44struct NullStruct {
45 bool operator==(NullStruct) const { return true; }
46 bool operator<(NullStruct) const { return false; }
47};
48
49/* * * * * * * * * * * * * * * * * * * *
50 * Serialization
51 */
52
53static void dump(NullStruct, string &out) {
54 out += "null";
55}
56
57static void dump(double value, string &out) {
58 if (std::isfinite(value)) {
59 char buf[32];
60 snprintf(buf, sizeof buf, "%.17g", value);
61 out += buf;
62 } else {
63 out += "null";
64 }
65}
66
67static void dump(int value, string &out) {
68 char buf[32];
69 snprintf(buf, sizeof buf, "%d", value);
70 out += buf;
71}
72
73static void dump(bool value, string &out) {
74 out += value ? "true" : "false";
75}
76
77static void dump(const string &value, string &out) {
78 out += '"';
79 for (size_t i = 0; i < value.length(); i++) {
80 const char ch = value[i];
81 if (ch == '\\') {
82 out += "\\\\";
83 } else if (ch == '"') {
84 out += "\\\"";
85 } else if (ch == '\b') {
86 out += "\\b";
87 } else if (ch == '\f') {
88 out += "\\f";
89 } else if (ch == '\n') {
90 out += "\\n";
91 } else if (ch == '\r') {
92 out += "\\r";
93 } else if (ch == '\t') {
94 out += "\\t";
95 } else if (static_cast<uint8_t>(ch) <= 0x1f) {
96 char buf[8];
97 snprintf(buf, sizeof buf, "\\u%04x", ch);
98 out += buf;
99 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
100 && static_cast<uint8_t>(value[i+2]) == 0xa8) {
101 out += "\\u2028";
102 i += 2;
103 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
104 && static_cast<uint8_t>(value[i+2]) == 0xa9) {
105 out += "\\u2029";
106 i += 2;
107 } else {
108 out += ch;
109 }
110 }
111 out += '"';
112}
113
114static void dump(const Json::array &values, string &out) {
115 bool first = true;
116 out += "[";
117 for (const auto &value : values) {
118 if (!first)
119 out += ", ";
120 value.dump(out);
121 first = false;
122 }
123 out += "]";
124}
125
126static void dump(const Json::object &values, string &out) {
127 bool first = true;
128 out += "{";
129 for (const auto &kv : values) {
130 if (!first)
131 out += ", ";
132 dump(kv.first, out);
133 out += ": ";
134 kv.second.dump(out);
135 first = false;
136 }
137 out += "}";
138}
139
140void Json::dump(string &out) const {
141 m_ptr->dump(out);
142}
143
144/* * * * * * * * * * * * * * * * * * * *
145 * Value wrappers
146 */
147
148template <Json::Type tag, typename T>
149class Value : public JsonValue {
150protected:
151
152 // Constructors
153 explicit Value(const T &value) : m_value(value) {}
154 explicit Value(T &&value) : m_value(move(value)) {}
155
156 // Get type tag
157 Json::Type type() const override {
158 return tag;
159 }
160
161 // Comparisons
162 bool equals(const JsonValue * other) const override {
163 return m_value == static_cast<const Value<tag, T> *>(other)->m_value;
164 }
165 bool less(const JsonValue * other) const override {
166 return m_value < static_cast<const Value<tag, T> *>(other)->m_value;
167 }
168
169 const T m_value;
170 void dump(string &out) const override { json11::dump(m_value, out); }
171};
172
173class JsonDouble final : public Value<Json::NUMBER, double> {
174 double number_value() const override { return m_value; }
175 int int_value() const override { return static_cast<int>(m_value); }
176 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
177 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
178public:
179 explicit JsonDouble(double value) : Value(value) {}
180};
181
182class JsonInt final : public Value<Json::NUMBER, int> {
183 double number_value() const override { return m_value; }
184 int int_value() const override { return m_value; }
185 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
186 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
187public:
188 explicit JsonInt(int value) : Value(value) {}
189};
190
191class JsonBoolean final : public Value<Json::BOOL, bool> {
192 bool bool_value() const override { return m_value; }
193public:
194 explicit JsonBoolean(bool value) : Value(value) {}
195};
196
197class JsonString final : public Value<Json::STRING, string> {
198 const string &string_value() const override { return m_value; }
199public:
200 explicit JsonString(const string &value) : Value(value) {}
201 explicit JsonString(string &&value) : Value(move(value)) {}
202};
203
204class JsonArray final : public Value<Json::ARRAY, Json::array> {
205 const Json::array &array_items() const override { return m_value; }
206 const Json & operator[](size_t i) const override;
207public:
208 explicit JsonArray(const Json::array &value) : Value(value) {}
209 explicit JsonArray(Json::array &&value) : Value(move(value)) {}
210};
211
212class JsonObject final : public Value<Json::OBJECT, Json::object> {
213 const Json::object &object_items() const override { return m_value; }
214 const Json & operator[](const string &key) const override;
215public:
216 explicit JsonObject(const Json::object &value) : Value(value) {}
217 explicit JsonObject(Json::object &&value) : Value(move(value)) {}
218};
219
220class JsonNull final : public Value<Json::NUL, NullStruct> {
221public:
222 JsonNull() : Value({}) {}
223};
224
225/* * * * * * * * * * * * * * * * * * * *
226 * Static globals - static-init-safe
227 */
228struct Statics {
229 const std::shared_ptr<JsonValue> null = make_shared<JsonNull>();
230 const std::shared_ptr<JsonValue> t = make_shared<JsonBoolean>(true);
231 const std::shared_ptr<JsonValue> f = make_shared<JsonBoolean>(false);
232 const string empty_string;
233 const vector<Json> empty_vector;
234 const map<string, Json> empty_map;
235 Statics() {}
236};
237
238static const Statics & statics() {
239 static const Statics s {};
240 return s;
241}
242
243static const Json & static_null() {
244 // This has to be separate, not in Statics, because Json() accesses statics().null.
245 static const Json json_null;
246 return json_null;
247}
248
249/* * * * * * * * * * * * * * * * * * * *
250 * Constructors
251 */
252
253Json::Json() noexcept : m_ptr(statics().null) {}
254Json::Json(std::nullptr_t) noexcept : m_ptr(statics().null) {}
255Json::Json(double value) : m_ptr(make_shared<JsonDouble>(value)) {}
256Json::Json(int value) : m_ptr(make_shared<JsonInt>(value)) {}
257Json::Json(bool value) : m_ptr(value ? statics().t : statics().f) {}
258Json::Json(const string &value) : m_ptr(make_shared<JsonString>(value)) {}
259Json::Json(string &&value) : m_ptr(make_shared<JsonString>(move(value))) {}
260Json::Json(const char * value) : m_ptr(make_shared<JsonString>(value)) {}
261Json::Json(const Json::array &values) : m_ptr(make_shared<JsonArray>(values)) {}
262Json::Json(Json::array &&values) : m_ptr(make_shared<JsonArray>(move(values))) {}
263Json::Json(const Json::object &values) : m_ptr(make_shared<JsonObject>(values)) {}
264Json::Json(Json::object &&values) : m_ptr(make_shared<JsonObject>(move(values))) {}
265
266/* * * * * * * * * * * * * * * * * * * *
267 * Accessors
268 */
269
270Json::Type Json::type() const { return m_ptr->type(); }
271double Json::number_value() const { return m_ptr->number_value(); }
272int Json::int_value() const { return m_ptr->int_value(); }
273bool Json::bool_value() const { return m_ptr->bool_value(); }
274const string & Json::string_value() const { return m_ptr->string_value(); }
275const vector<Json> & Json::array_items() const { return m_ptr->array_items(); }
276const map<string, Json> & Json::object_items() const { return m_ptr->object_items(); }
277const Json & Json::operator[] (size_t i) const { return (*m_ptr)[i]; }
278const Json & Json::operator[] (const string &key) const { return (*m_ptr)[key]; }
279
280double JsonValue::number_value() const { return 0; }
281int JsonValue::int_value() const { return 0; }
282bool JsonValue::bool_value() const { return false; }
283const string & JsonValue::string_value() const { return statics().empty_string; }
284const vector<Json> & JsonValue::array_items() const { return statics().empty_vector; }
285const map<string, Json> & JsonValue::object_items() const { return statics().empty_map; }
286const Json & JsonValue::operator[] (size_t) const { return static_null(); }
287const Json & JsonValue::operator[] (const string &) const { return static_null(); }
288
289const Json & JsonObject::operator[] (const string &key) const {
290 auto iter = m_value.find(key);
291 return (iter == m_value.end()) ? static_null() : iter->second;
292}
293const Json & JsonArray::operator[] (size_t i) const {
294 if (i >= m_value.size()) return static_null();
295 else return m_value[i];
296}
297
298/* * * * * * * * * * * * * * * * * * * *
299 * Comparison
300 */
301
302bool Json::operator== (const Json &other) const {
303 if (m_ptr == other.m_ptr)
304 return true;
305 if (m_ptr->type() != other.m_ptr->type())
306 return false;
307
308 return m_ptr->equals(other.m_ptr.get());
309}
310
311bool Json::operator< (const Json &other) const {
312 if (m_ptr == other.m_ptr)
313 return false;
314 if (m_ptr->type() != other.m_ptr->type())
315 return m_ptr->type() < other.m_ptr->type();
316
317 return m_ptr->less(other.m_ptr.get());
318}
319
320/* * * * * * * * * * * * * * * * * * * *
321 * Parsing
322 */
323
324/* esc(c)
325 *
326 * Format char c suitable for printing in an error message.
327 */
328static inline string esc(char c) {
329 char buf[12];
330 if (static_cast<uint8_t>(c) >= 0x20 && static_cast<uint8_t>(c) <= 0x7f) {
331 snprintf(buf, sizeof buf, "'%c' (%d)", c, c);
332 } else {
333 snprintf(buf, sizeof buf, "(%d)", c);
334 }
335 return string(buf);
336}
337
338static inline bool in_range(long x, long lower, long upper) {
339 return (x >= lower && x <= upper);
340}
341
342namespace {
343/* JsonParser
344 *
345 * Object that tracks all state of an in-progress parse.
346 */
347struct JsonParser final {
348
349 /* State
350 */
351 const string &str;
352 size_t i;
353 string &err;
354 bool failed;
355 const JsonParse strategy;
356
357 /* fail(msg, err_ret = Json())
358 *
359 * Mark this parse as failed.
360 */
361 Json fail(string &&msg) {
362 return fail(move(msg), Json());
363 }
364
365 template <typename T>
366 T fail(string &&msg, const T err_ret) {
367 if (!failed)
368 err = std::move(msg);
369 failed = true;
370 return err_ret;
371 }
372
373 /* consume_whitespace()
374 *
375 * Advance until the current character is non-whitespace.
376 */
377 void consume_whitespace() {
378 while (str[i] == ' ' || str[i] == '\r' || str[i] == '\n' || str[i] == '\t')
379 i++;
380 }
381
382 /* consume_comment()
383 *
384 * Advance comments (c-style inline and multiline).
385 */
386 bool consume_comment() {
387 bool comment_found = false;
388 if (str[i] == '/') {
389 i++;
390 if (i == str.size())
391 return fail("unexpected end of input after start of comment", false);
392 if (str[i] == '/') { // inline comment
393 i++;
394 // advance until next line, or end of input
395 while (i < str.size() && str[i] != '\n') {
396 i++;
397 }
398 comment_found = true;
399 }
400 else if (str[i] == '*') { // multiline comment
401 i++;
402 if (i > str.size()-2)
403 return fail("unexpected end of input inside multi-line comment", false);
404 // advance until closing tokens
405 while (!(str[i] == '*' && str[i+1] == '/')) {
406 i++;
407 if (i > str.size()-2)
408 return fail(
409 "unexpected end of input inside multi-line comment", false);
410 }
411 i += 2;
412 comment_found = true;
413 }
414 else
415 return fail("malformed comment", false);
416 }
417 return comment_found;
418 }
419
420 /* consume_garbage()
421 *
422 * Advance until the current character is non-whitespace and non-comment.
423 */
424 void consume_garbage() {
425 consume_whitespace();
426 if(strategy == JsonParse::COMMENTS) {
427 bool comment_found = false;
428 do {
429 comment_found = consume_comment();
430 if (failed) return;
431 consume_whitespace();
432 }
433 while(comment_found);
434 }
435 }
436
437 /* get_next_token()
438 *
439 * Return the next non-whitespace character. If the end of the input is reached,
440 * flag an error and return 0.
441 */
442 char get_next_token() {
443 consume_garbage();
444 if (failed) return static_cast<char>(0);
445 if (i == str.size())
446 return fail("unexpected end of input", static_cast<char>(0));
447
448 return str[i++];
449 }
450
451 /* encode_utf8(pt, out)
452 *
453 * Encode pt as UTF-8 and add it to out.
454 */
455 void encode_utf8(long pt, string & out) {
456 if (pt < 0)
457 return;
458
459 if (pt < 0x80) {
460 out += static_cast<char>(pt);
461 } else if (pt < 0x800) {
462 out += static_cast<char>((pt >> 6) | 0xC0);
463 out += static_cast<char>((pt & 0x3F) | 0x80);
464 } else if (pt < 0x10000) {
465 out += static_cast<char>((pt >> 12) | 0xE0);
466 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
467 out += static_cast<char>((pt & 0x3F) | 0x80);
468 } else {
469 out += static_cast<char>((pt >> 18) | 0xF0);
470 out += static_cast<char>(((pt >> 12) & 0x3F) | 0x80);
471 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
472 out += static_cast<char>((pt & 0x3F) | 0x80);
473 }
474 }
475
476 /* parse_string()
477 *
478 * Parse a string, starting at the current position.
479 */
480 string parse_string() {
481 string out;
482 long last_escaped_codepoint = -1;
483 while (true) {
484 if (i == str.size())
485 return fail("unexpected end of input in string", "");
486
487 char ch = str[i++];
488
489 if (ch == '"') {
490 encode_utf8(last_escaped_codepoint, out);
491 return out;
492 }
493
494 if (in_range(ch, 0, 0x1f))
495 return fail("unescaped " + esc(ch) + " in string", "");
496
497 // The usual case: non-escaped characters
498 if (ch != '\\') {
499 encode_utf8(last_escaped_codepoint, out);
500 last_escaped_codepoint = -1;
501 out += ch;
502 continue;
503 }
504
505 // Handle escapes
506 if (i == str.size())
507 return fail("unexpected end of input in string", "");
508
509 ch = str[i++];
510
511 if (ch == 'u') {
512 // Extract 4-byte escape sequence
513 string esc = str.substr(i, 4);
514 // Explicitly check length of the substring. The following loop
515 // relies on std::string returning the terminating NUL when
516 // accessing str[length]. Checking here reduces brittleness.
517 if (esc.length() < 4) {
518 return fail("bad \\u escape: " + esc, "");
519 }
520 for (size_t j = 0; j < 4; j++) {
521 if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F')
522 && !in_range(esc[j], '0', '9'))
523 return fail("bad \\u escape: " + esc, "");
524 }
525
526 long codepoint = strtol(esc.data(), nullptr, 16);
527
528 // JSON specifies that characters outside the BMP shall be encoded as a pair
529 // of 4-hex-digit \u escapes encoding their surrogate pair components. Check
530 // whether we're in the middle of such a beast: the previous codepoint was an
531 // escaped lead (high) surrogate, and this is a trail (low) surrogate.
532 if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF)
533 && in_range(codepoint, 0xDC00, 0xDFFF)) {
534 // Reassemble the two surrogate pairs into one astral-plane character, per
535 // the UTF-16 algorithm.
536 encode_utf8((((last_escaped_codepoint - 0xD800) << 10)
537 | (codepoint - 0xDC00)) + 0x10000, out);
538 last_escaped_codepoint = -1;
539 } else {
540 encode_utf8(last_escaped_codepoint, out);
541 last_escaped_codepoint = codepoint;
542 }
543
544 i += 4;
545 continue;
546 }
547
548 encode_utf8(last_escaped_codepoint, out);
549 last_escaped_codepoint = -1;
550
551 if (ch == 'b') {
552 out += '\b';
553 } else if (ch == 'f') {
554 out += '\f';
555 } else if (ch == 'n') {
556 out += '\n';
557 } else if (ch == 'r') {
558 out += '\r';
559 } else if (ch == 't') {
560 out += '\t';
561 } else if (ch == '"' || ch == '\\' || ch == '/') {
562 out += ch;
563 } else {
564 return fail("invalid escape character " + esc(ch), "");
565 }
566 }
567 }
568
569 /* parse_number()
570 *
571 * Parse a double.
572 */
573 Json parse_number() {
574 size_t start_pos = i;
575
576 if (str[i] == '-')
577 i++;
578
579 // Integer part
580 if (str[i] == '0') {
581 i++;
582 if (in_range(str[i], '0', '9'))
583 return fail("leading 0s not permitted in numbers");
584 } else if (in_range(str[i], '1', '9')) {
585 i++;
586 while (in_range(str[i], '0', '9'))
587 i++;
588 } else {
589 return fail("invalid " + esc(str[i]) + " in number");
590 }
591
592 if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
593 && (i - start_pos) <= static_cast<size_t>(std::numeric_limits<int>::digits10)) {
594 return std::atoi(str.c_str() + start_pos);
595 }
596
597 // Decimal part
598 if (str[i] == '.') {
599 i++;
600 if (!in_range(str[i], '0', '9'))
601 return fail("at least one digit required in fractional part");
602
603 while (in_range(str[i], '0', '9'))
604 i++;
605 }
606
607 // Exponent part
608 if (str[i] == 'e' || str[i] == 'E') {
609 i++;
610
611 if (str[i] == '+' || str[i] == '-')
612 i++;
613
614 if (!in_range(str[i], '0', '9'))
615 return fail("at least one digit required in exponent");
616
617 while (in_range(str[i], '0', '9'))
618 i++;
619 }
620
621 return std::strtod(str.c_str() + start_pos, nullptr);
622 }
623
624 /* expect(str, res)
625 *
626 * Expect that 'str' starts at the character that was just read. If it does, advance
627 * the input and return res. If not, flag an error.
628 */
629 Json expect(const string &expected, Json res) {
630 assert(i != 0);
631 i--;
632 if (str.compare(i, expected.length(), expected) == 0) {
633 i += expected.length();
634 return res;
635 } else {
636 return fail("parse error: expected " + expected + ", got " + str.substr(i, expected.length()));
637 }
638 }
639
640 /* parse_json()
641 *
642 * Parse a JSON object.
643 */
644 Json parse_json(int depth) {
645 if (depth > max_depth) {
646 return fail("exceeded maximum nesting depth");
647 }
648
649 char ch = get_next_token();
650 if (failed)
651 return Json();
652
653 if (ch == '-' || (ch >= '0' && ch <= '9')) {
654 i--;
655 return parse_number();
656 }
657
658 if (ch == 't')
659 return expect("true", true);
660
661 if (ch == 'f')
662 return expect("false", false);
663
664 if (ch == 'n')
665 return expect("null", Json());
666
667 if (ch == '"')
668 return parse_string();
669
670 if (ch == '{') {
671 map<string, Json> data;
672 ch = get_next_token();
673 if (ch == '}')
674 return data;
675
676 while (1) {
677 if (ch != '"')
678 return fail("expected '\"' in object, got " + esc(ch));
679
680 string key = parse_string();
681 if (failed)
682 return Json();
683
684 ch = get_next_token();
685 if (ch != ':')
686 return fail("expected ':' in object, got " + esc(ch));
687
688 data[std::move(key)] = parse_json(depth + 1);
689 if (failed)
690 return Json();
691
692 ch = get_next_token();
693 if (ch == '}')
694 break;
695 if (ch != ',')
696 return fail("expected ',' in object, got " + esc(ch));
697
698 ch = get_next_token();
699 }
700 return data;
701 }
702
703 if (ch == '[') {
704 vector<Json> data;
705 ch = get_next_token();
706 if (ch == ']')
707 return data;
708
709 while (1) {
710 i--;
711 data.push_back(parse_json(depth + 1));
712 if (failed)
713 return Json();
714
715 ch = get_next_token();
716 if (ch == ']')
717 break;
718 if (ch != ',')
719 return fail("expected ',' in list, got " + esc(ch));
720
721 ch = get_next_token();
722 (void)ch;
723 }
724 return data;
725 }
726
727 return fail("expected value, got " + esc(ch));
728 }
729};
730}//namespace {
731
732Json Json::parse(const string &in, string &err, JsonParse strategy) {
733 JsonParser parser { in, 0, err, false, strategy };
734 Json result = parser.parse_json(0);
735
736 // Check for any trailing garbage
737 parser.consume_garbage();
738 if (parser.failed)
739 return Json();
740 if (parser.i != in.size())
741 return parser.fail("unexpected trailing " + esc(in[parser.i]));
742
743 return result;
744}
745
746// Documented in json11.hpp
747vector<Json> Json::parse_multi(const string &in,
748 std::string::size_type &parser_stop_pos,
749 string &err,
750 JsonParse strategy) {
751 JsonParser parser { in, 0, err, false, strategy };
752 parser_stop_pos = 0;
753 vector<Json> json_vec;
754 while (parser.i != in.size() && !parser.failed) {
755 json_vec.push_back(parser.parse_json(0));
756 if (parser.failed)
757 break;
758
759 // Check for another object
760 parser.consume_garbage();
761 if (parser.failed)
762 break;
763 parser_stop_pos = parser.i;
764 }
765 return json_vec;
766}
767
768/* * * * * * * * * * * * * * * * * * * *
769 * Shape-checking
770 */
771
772bool Json::has_shape(const shape & types, string & err) const {
773 if (!is_object()) {
774 err = "expected JSON object, got " + dump();
775 return false;
776 }
777
778 const auto& obj_items = object_items();
779 for (auto & item : types) {
780 const auto it = obj_items.find(item.first);
781 if (it == obj_items.cend() || it->second.type() != item.second) {
782 err = "bad type for " + item.first + " in " + dump();
783 return false;
784 }
785 }
786
787 return true;
788}
789
790} // namespace json11
791