1 | /* |
2 | * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | /* |
26 | * This is not really json in the state it is now. |
27 | * Some differences: |
28 | * - Double quotes around the key in an object is not enforced. |
29 | * i.e you can write: { foo : "bar" } instead of { "foo" : "bar" }. |
30 | * - Comments are allowed. |
31 | * - The last element in an object or array can have an ending comma. |
32 | */ |
33 | |
34 | #include "precompiled.hpp" |
35 | #include "utilities/json.hpp" |
36 | #include "utilities/ostream.hpp" |
37 | #include <math.h> |
38 | |
39 | const char* strchrnul_(const char *s, int c) { |
40 | const char* tmp = strchr(s, c); |
41 | return tmp == NULL ? s + strlen(s) : tmp; |
42 | } |
43 | |
44 | JSON::JSON(const char* text, bool silent, outputStream* st) |
45 | : _st(st), start(text), pos(text), mark(text), |
46 | level(0), line(1), column(0), silent(silent), _valid(true) |
47 | { |
48 | } |
49 | |
50 | void JSON::parse() { |
51 | assert(start != NULL, "Need something to parse" ); |
52 | if (start == NULL) { |
53 | _valid = false; |
54 | error(INTERNAL_ERROR, "JSON parser was called with a string that was NULL." ); |
55 | } else { |
56 | _valid = parse_json_value(); |
57 | } |
58 | } |
59 | |
60 | bool JSON::valid() { |
61 | return _valid; |
62 | } |
63 | |
64 | bool JSON::parse_json_value() { |
65 | int c; |
66 | |
67 | c = skip_to_token(); |
68 | if (c == -1) { |
69 | return false; |
70 | } |
71 | |
72 | // Must start with object or array |
73 | if (level == 0) { |
74 | |
75 | switch (c) { |
76 | case '{': |
77 | if (parse_json_object() == false) { |
78 | return false; |
79 | } |
80 | c = skip_to_token(); |
81 | if (c > 0) { |
82 | mark_pos(); |
83 | error(SYNTAX_ERROR, "Only one top level object/array is allowed." ); |
84 | return false; |
85 | } else if (c < 0) { |
86 | return false; |
87 | } |
88 | return true; |
89 | |
90 | case '[': |
91 | if (parse_json_array() == false) { |
92 | return false; |
93 | } |
94 | c = skip_to_token(); |
95 | if (c > 0) { |
96 | mark_pos(); |
97 | error(SYNTAX_ERROR, "Only one top level object/array is allowed." ); |
98 | return false; |
99 | } else if (c < 0) { |
100 | return false; |
101 | } |
102 | return true; |
103 | |
104 | case 0: |
105 | error(SYNTAX_ERROR, "EOS was encountered before any json declarations" ); |
106 | return false; |
107 | |
108 | default: |
109 | error(SYNTAX_ERROR, "Json must start with an object or an array." ); |
110 | return false; |
111 | } |
112 | } else { // level > 0 |
113 | switch (c) { |
114 | case '{': |
115 | return parse_json_object(); |
116 | |
117 | case '[': |
118 | return parse_json_array(); |
119 | |
120 | case '"': |
121 | return parse_json_string(); |
122 | |
123 | case '-': case '0': |
124 | case '1': case '2': case '3': |
125 | case '4': case '5': case '6': |
126 | case '7': case '8': case '9': |
127 | return parse_json_number(); |
128 | |
129 | case 't': |
130 | return parse_json_symbol("true" , JSON_TRUE); |
131 | |
132 | case 'f': |
133 | return parse_json_symbol("false" , JSON_FALSE); |
134 | |
135 | case 'n': |
136 | return parse_json_symbol("null" , JSON_NULL); |
137 | |
138 | case 0: |
139 | error(SYNTAX_ERROR, "EOS was encountered when expecting a json value." ); |
140 | return false; |
141 | |
142 | default: |
143 | error(SYNTAX_ERROR, "Could not parse as a json value (did you forget to quote your strings?)." ); |
144 | return false; |
145 | } |
146 | } |
147 | } |
148 | |
149 | // Should only be called when we actually have the start of an object |
150 | // Otherwise it is an internal error |
151 | bool JSON::parse_json_object() { |
152 | NOT_PRODUCT(const char* prev_pos); |
153 | int c; |
154 | |
155 | mark_pos(); |
156 | // Check that we are not called in error |
157 | if (expect_any("{" , "object start" , INTERNAL_ERROR) <= 0) { |
158 | return false; |
159 | } |
160 | |
161 | if (!callback(JSON_OBJECT_BEGIN, NULL, level++)) { |
162 | return false; |
163 | } |
164 | |
165 | for (;;) { |
166 | mark_pos(); |
167 | c = skip_to_token(); |
168 | if (c == 0) { |
169 | error(SYNTAX_ERROR, "EOS when expecting an object key or object end" ); |
170 | return false; |
171 | } else if (c < 0) { |
172 | return false; |
173 | } else if (c == '}') { |
174 | // We got here from either empty object "{}" or ending comma "{a:1,}" |
175 | next(); |
176 | break; |
177 | } |
178 | |
179 | NOT_PRODUCT(prev_pos = pos); |
180 | if (parse_json_key() == false) { |
181 | return false; |
182 | } |
183 | assert(pos > prev_pos, "parsing stalled" ); |
184 | |
185 | skip_to_token(); |
186 | mark_pos(); |
187 | if (expect_any(":" , "object key-value separator" ) <= 0) { |
188 | return false; |
189 | } |
190 | |
191 | skip_to_token(); |
192 | mark_pos(); |
193 | NOT_PRODUCT(prev_pos = pos); |
194 | if (parse_json_value() == false) { |
195 | return false; |
196 | } |
197 | assert(pos > prev_pos, "parsing stalled" ); |
198 | |
199 | c = skip_to_token(); |
200 | mark_pos(); |
201 | if (expect_any(",}" , "value separator or object end" ) <= 0) { |
202 | return false; |
203 | } |
204 | if (c == '}') { |
205 | break; |
206 | } |
207 | } |
208 | |
209 | assert(c == '}', "array parsing ended without object end token ('}')" ); |
210 | return callback(JSON_OBJECT_END, NULL, --level); |
211 | } |
212 | |
213 | // Should only be called when we actually have the start of an array |
214 | // Otherwise it is an internal error |
215 | bool JSON::parse_json_array() { |
216 | NOT_PRODUCT(const char* prev_pos); |
217 | int c; |
218 | |
219 | mark_pos(); |
220 | // Check that we are not called in error |
221 | if (expect_any("[" , "array start character" , INTERNAL_ERROR) <= 0) { |
222 | return false; |
223 | } |
224 | |
225 | if (!callback(JSON_ARRAY_BEGIN, NULL, level++)) { |
226 | return false; |
227 | } |
228 | |
229 | for (;;) { |
230 | mark_pos(); |
231 | c = skip_to_token(); |
232 | if (c == 0) { |
233 | error(SYNTAX_ERROR, "EOS when expecting a json value or array end" ); |
234 | return false; |
235 | } else if (c < 0) { |
236 | return false; |
237 | } else if (c == ']') { |
238 | // We got here from either empty array "[]" or ending comma "[1,]" |
239 | next(); |
240 | break; |
241 | } |
242 | |
243 | mark_pos(); |
244 | NOT_PRODUCT(prev_pos = pos); |
245 | if (parse_json_value() == false) { |
246 | return false; |
247 | } |
248 | assert(pos > prev_pos, "parsing stalled" ); |
249 | |
250 | c = skip_to_token(); |
251 | mark_pos(); |
252 | if (expect_any(",]" , "value separator or array end" ) <= 0) { |
253 | return false; |
254 | } |
255 | if (c == ']') { |
256 | break; |
257 | } |
258 | } |
259 | |
260 | assert(c == ']', "array parsing ended without array end token (']')" ); |
261 | return callback(JSON_ARRAY_END, NULL, --level); |
262 | } |
263 | |
264 | bool JSON::parse_json_string(bool key) { |
265 | const char* end; |
266 | JSON_VAL v; |
267 | |
268 | mark_pos(); |
269 | if (expect_any("\"" , "string start character" , INTERNAL_ERROR) <= 0) { |
270 | return false; |
271 | } |
272 | |
273 | end = strchr(pos, '"'); // TODO: escapes |
274 | if (end == NULL) { |
275 | error(SYNTAX_ERROR, "String started here never ended. Expected \'\"\' before EOS." ); |
276 | return false; |
277 | } |
278 | |
279 | v.str.start = pos; |
280 | v.str.length = end - pos; |
281 | skip(end - pos); |
282 | |
283 | if (expect_any("\"" , "string end character" , INTERNAL_ERROR) <= 0) { |
284 | return false; |
285 | } |
286 | |
287 | if (key == true) { |
288 | return callback(JSON_KEY, &v, level); |
289 | } else { |
290 | return callback(JSON_STRING, &v, level); |
291 | } |
292 | } |
293 | |
294 | // TODO: hotspot equivalents? |
295 | static bool is_alpha(u_char c) { |
296 | return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); |
297 | } |
298 | static bool is_numeric(u_char c) { |
299 | return (c >= '0' && c <= '9'); |
300 | } |
301 | static bool is_alnum(u_char c) { |
302 | return is_alpha(c) || is_numeric(c); |
303 | } |
304 | static bool is_word(u_char c) { |
305 | return c == '_' || is_alnum(c); |
306 | } |
307 | |
308 | // Allow object keys to be without quotation, |
309 | // but then restrict to ([a-zA-Z0-9_])+ |
310 | bool JSON::parse_json_key() { |
311 | const char* begin; |
312 | JSON_VAL v; |
313 | u_char c; |
314 | |
315 | mark_pos(); |
316 | c = peek(); |
317 | if (c == '"') { |
318 | return parse_json_string(true); |
319 | } |
320 | |
321 | begin = pos; |
322 | c = peek(); |
323 | if (c == 0) { |
324 | error(SYNTAX_ERROR, "Got EOS when expecting an object key." ); |
325 | return false; |
326 | } else if (is_word(c) == false) { |
327 | error(SYNTAX_ERROR, "Expected an object key, which can be a double-quoted (\") string or a simple string (only alphanumeric characters and underscore, separated by whitespace) that doesn't need to be quoted." ); |
328 | return false; |
329 | } |
330 | |
331 | for (;;) { |
332 | c = peek(); |
333 | // Allow the key to be delimited by control characters and the object key-value separator ':' |
334 | if (c <= ' ' || c == ':') { |
335 | break; |
336 | } else if (is_word(c) == false) { |
337 | error(SYNTAX_ERROR, "Object key need to be quoted, or consist entirely of alphanumeric characters and underscores." ); |
338 | return false; |
339 | } |
340 | next(); |
341 | } |
342 | |
343 | v.str.start = begin; |
344 | v.str.length = pos - begin; |
345 | return callback(JSON_KEY, &v, level); |
346 | } |
347 | |
348 | bool JSON::parse_json_number() { |
349 | double double_value; |
350 | int tokens, read; |
351 | JSON_VAL v; |
352 | |
353 | mark_pos(); |
354 | |
355 | // Parsing number - for simplicity ints are limited to 2**53 |
356 | // sscanf as a double and check if part is 0. |
357 | tokens = sscanf(pos, "%lf%n" , &double_value, &read); |
358 | assert(tokens <= 1, "scanf implementation that counts as a token, parsing json numbers will always fail" ); |
359 | if (tokens == 1) { |
360 | assert(read > 0, "sanity" ); |
361 | |
362 | if (floor(double_value) == double_value) { |
363 | // No exponent - treat as an int |
364 | v.int_value = (int)double_value; |
365 | if (!callback(JSON_NUMBER_INT, &v, level)) { |
366 | return false; |
367 | } |
368 | } else { |
369 | v.double_value = double_value; |
370 | if (!callback(JSON_NUMBER_FLOAT, &v, level)) { |
371 | return false; |
372 | } |
373 | } |
374 | skip(read); |
375 | return true; |
376 | } |
377 | |
378 | error(SYNTAX_ERROR, "Couldn't parse json number (note that exponents are not supported)." ); |
379 | return false; |
380 | } |
381 | |
382 | bool JSON::parse_json_symbol(const char* name, JSON_TYPE symbol) { |
383 | if (expect_string(name, "maybe you forgot to quote your strings?" ) == false) { |
384 | mark_pos(); |
385 | return false; |
386 | } |
387 | return callback(symbol, NULL, level); |
388 | } |
389 | |
390 | void JSON::mark_pos() { |
391 | assert((mark == start || *(mark - 1)) != 0, "buffer overrun" ); |
392 | assert(mark <= pos, "mark runahead" ); |
393 | |
394 | u_char c; |
395 | |
396 | while (mark < pos) { |
397 | c = *mark; |
398 | assert(c != 0, "pos buffer overrun?" ); |
399 | if (c != 0) { |
400 | mark++; |
401 | column++; |
402 | } |
403 | if (c == '\n') { |
404 | line++; |
405 | column = 0; |
406 | } |
407 | } |
408 | |
409 | assert(mark <= pos, "mark runahead" ); |
410 | } |
411 | |
412 | u_char JSON::next() { |
413 | assert((pos == start || *(pos - 1)) != 0, "buffer overrun" ); |
414 | |
415 | u_char c = *pos; |
416 | if (c != 0) { |
417 | pos++; |
418 | } |
419 | return c; |
420 | } |
421 | |
422 | u_char JSON::peek() { |
423 | return *pos; |
424 | } |
425 | |
426 | // Peek ahead i chars (0 is same as peek()) |
427 | u_char JSON::peek(size_t i) { |
428 | u_char c; |
429 | const char* p; |
430 | |
431 | p = pos; |
432 | c = *p; |
433 | while (i > 0 && c != 0) { |
434 | i--; |
435 | p++; |
436 | c = *p; |
437 | } |
438 | return c; |
439 | } |
440 | |
441 | /* |
442 | * Check that one of the expected characters is next in the stream. |
443 | * If not, it is an error. |
444 | * Returns 0 if EOS is encountered. |
445 | * Returns -1 if the next character was not one of the expected. |
446 | * Otherwise consumes and returns the expected character that was encountered. |
447 | */ |
448 | int JSON::expect_any(const char* valid_chars, const char* error_msg, JSON_ERROR e) { |
449 | size_t len; |
450 | u_char c; |
451 | |
452 | len = strlen(valid_chars); |
453 | assert(len > 0, "need non-empty string" ); |
454 | |
455 | c = peek(); |
456 | if (c == 0) { |
457 | error(e, "Got EOS when expecting %s (%s\'%s\')." , error_msg, len > 1 ? "one of " : "" , valid_chars); |
458 | return 0; |
459 | } |
460 | for (size_t i = 0; i < len; i++) { |
461 | if (c == valid_chars[i]) { |
462 | return next(); |
463 | } |
464 | } |
465 | error(e, "Expected %s (%s\'%s\')." , error_msg, len > 1 ? "one of " : "" , valid_chars); |
466 | return -1; |
467 | } |
468 | |
469 | /* |
470 | * Check that the expected string is next in the stream. |
471 | * If not, it is an error. |
472 | * Consumes the expected characters if they are present. |
473 | * Returns true if the expected characters were present, otherwise false. |
474 | */ |
475 | bool JSON::expect_string(const char* expected_string, const char* error_msg, JSON_ERROR e) { |
476 | u_char c, expected_char; |
477 | size_t len; |
478 | |
479 | assert(expected_string != NULL, "need non-null string" ); |
480 | len = strlen(expected_string); |
481 | assert(len > 0, "need non-empty string" ); |
482 | |
483 | for (size_t i = 0; i < len; i++) { |
484 | expected_char = expected_string[i]; |
485 | assert(expected_char > ' ', "not sane for control characters" ); |
486 | if (expected_char <= ' ') { |
487 | error(INTERNAL_ERROR, "expect got a control char" ); |
488 | } |
489 | c = pos[i]; |
490 | if (c == 0) { |
491 | error(e, "EOS encountered when expecting %s (\"%s\")" , error_msg, expected_string); |
492 | return false; |
493 | } else if (c != expected_char) { |
494 | error(e, "Expected \"%s\" (%s)" , expected_string, error_msg); |
495 | return false; |
496 | } |
497 | } |
498 | skip(len); |
499 | return true; |
500 | } |
501 | |
502 | /* |
503 | * Skip i characters. |
504 | * Returns number of characters skipped. |
505 | */ |
506 | size_t JSON::skip(size_t i) { |
507 | u_char c; |
508 | size_t j; |
509 | |
510 | c = peek(); |
511 | for (j = i; c != 0 && j > 0; j--) { |
512 | c = next(); |
513 | } |
514 | return i - j; |
515 | } |
516 | |
517 | /* |
518 | * Skip whitespace and comments. |
519 | * Returns the first token after whitespace/comments without consuming it |
520 | * Returns 0 if EOS is encountered. |
521 | * Returns -1 if there is an error |
522 | */ |
523 | int JSON::skip_to_token() { |
524 | for (;;) { |
525 | int c = peek(0); |
526 | if (c == '/') { |
527 | u_char c2 = peek(1); |
528 | if (c2 == '/') { |
529 | c = skip_line_comment(); |
530 | } else if (c2 == '*') { |
531 | c = skip_block_comment(); |
532 | if (c < 0) { |
533 | return -1; |
534 | } |
535 | } |
536 | // Fall through to keep checking if there |
537 | // are more whitespace / comments to skip |
538 | } |
539 | if (c == 0 || c > ' ') { |
540 | return c; |
541 | } |
542 | next(); |
543 | } |
544 | return 0; |
545 | } |
546 | |
547 | /* |
548 | * Skip to, and return the wanted char without consuming it |
549 | * Returns 0 if EOS is encountered. |
550 | */ |
551 | u_char JSON::skip_to(u_char want) { |
552 | // We want the bookkeeping done in next(). |
553 | // Otherwise strchr could have been used. |
554 | u_char c; |
555 | for(;;) { |
556 | c = peek(); |
557 | if (c == 0 || c == want) { |
558 | return c; |
559 | } |
560 | next(); |
561 | } |
562 | } |
563 | |
564 | /* |
565 | * Should only be called when we actually have a line comment to skip. |
566 | * Otherwise it is an internal error. |
567 | * |
568 | * Will return the first token after the line comment without consuming it. |
569 | * Returns 0 if EOS is encoutered. |
570 | */ |
571 | u_char JSON::() { |
572 | u_char c; |
573 | |
574 | // Check that we are not called in error |
575 | expect_any("/" , "line comment start" , INTERNAL_ERROR); |
576 | expect_any("/" , "line comment start" , INTERNAL_ERROR); |
577 | |
578 | c = skip_to('\n'); |
579 | if (c == 0) { |
580 | return 0; |
581 | } |
582 | next(); |
583 | return next(); |
584 | } |
585 | |
586 | /* |
587 | * Should only be called when we actually have a block comment to skip. |
588 | * Otherwise it is an internal error. |
589 | * |
590 | * Returns the first token after the block comment without consuming it. |
591 | * Returns -1 if EOS is encountered in the middle of a comment. |
592 | */ |
593 | int JSON::() { |
594 | const char* current; |
595 | |
596 | // Check that we are not called in error. |
597 | if (peek() != '/' || peek(1) != '*') { |
598 | // Let expect handle EOS. |
599 | expect_string("/*" , "block comment start" , INTERNAL_ERROR); |
600 | return 0; |
601 | } |
602 | |
603 | current = pos; |
604 | for (;;) { |
605 | current = strchrnul_(current, '*'); |
606 | |
607 | if (current[0] == 0 || current[1] == 0) { |
608 | // Advance error marker to start of block comment |
609 | mark_pos(); |
610 | error(SYNTAX_ERROR, "Block comment started here never ended. Expected \"*/\" before EOS." ); |
611 | return -1; |
612 | } |
613 | |
614 | if (current[1] == '/') { |
615 | pos = current; |
616 | if (expect_string("*/" , "block comment end" , INTERNAL_ERROR) == false) { |
617 | return -1; |
618 | } |
619 | // Found block comment end |
620 | return peek(); |
621 | } |
622 | current++; |
623 | } |
624 | } |
625 | |
626 | const char* JSON::strerror(JSON_ERROR e) { |
627 | switch (e) { |
628 | case SYNTAX_ERROR: |
629 | return "Syntax error" ; |
630 | case INTERNAL_ERROR: |
631 | return "Internal error" ; |
632 | case KEY_ERROR: |
633 | return "Key error" ; |
634 | case VALUE_ERROR: |
635 | return "Value error" ; |
636 | default: |
637 | ShouldNotReachHere(); |
638 | return "Unknown error" ; |
639 | } |
640 | } |
641 | |
642 | void JSON::error(JSON_ERROR e, const char* format, ...) { |
643 | _valid = false; |
644 | |
645 | if (!silent) { |
646 | const char* line_start; |
647 | const char* tmp; |
648 | size_t line_length; |
649 | va_list args; |
650 | u_char c; |
651 | |
652 | _st->print("%s on line %u byte %u: " , JSON::strerror(e), line, column + 1); |
653 | va_start(args, format); |
654 | _st->vprint(format, args); |
655 | _st->cr(); |
656 | va_end(args); |
657 | |
658 | line_start = mark - column; |
659 | assert(line_start >= start, "out of bounds" ); |
660 | assert(line_start <= mark, "out of bounds" ); |
661 | assert(line_start == start || line_start[-1] == '\n', "line counting error" ); |
662 | |
663 | c = *pos; |
664 | if (c == 0) { |
665 | _st->print(" Got " ); |
666 | _st->print_cr("EOS." ); |
667 | } |
668 | tmp = mark; |
669 | c = *tmp; |
670 | if (c > ' ') { |
671 | _st->print(" At " ); |
672 | _st->print("'" ); |
673 | while (c > ' ') { |
674 | _st->print("%c" , c); |
675 | tmp++; |
676 | c = *tmp; |
677 | } |
678 | _st->print_cr("'." ); |
679 | } |
680 | |
681 | // Skip to newline or EOS |
682 | tmp = strchrnul_(mark, '\n'); |
683 | line_length = tmp - line_start; |
684 | |
685 | _st->print_cr("%s" , line_start); |
686 | } |
687 | } |
688 | |
689 | |