1/*
2 * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25/*
26 * This is not really json in the state it is now.
27 * Some differences:
28 * - Double quotes around the key in an object is not enforced.
29 * i.e you can write: { foo : "bar" } instead of { "foo" : "bar" }.
30 * - Comments are allowed.
31 * - The last element in an object or array can have an ending comma.
32 */
33
34#include "precompiled.hpp"
35#include "utilities/json.hpp"
36#include "utilities/ostream.hpp"
37#include <math.h>
38
39const char* strchrnul_(const char *s, int c) {
40 const char* tmp = strchr(s, c);
41 return tmp == NULL ? s + strlen(s) : tmp;
42}
43
44JSON::JSON(const char* text, bool silent, outputStream* st)
45: _st(st), start(text), pos(text), mark(text),
46 level(0), line(1), column(0), silent(silent), _valid(true)
47{
48}
49
50void JSON::parse() {
51 assert(start != NULL, "Need something to parse");
52 if (start == NULL) {
53 _valid = false;
54 error(INTERNAL_ERROR, "JSON parser was called with a string that was NULL.");
55 } else {
56 _valid = parse_json_value();
57 }
58}
59
60bool JSON::valid() {
61 return _valid;
62}
63
64bool JSON::parse_json_value() {
65 int c;
66
67 c = skip_to_token();
68 if (c == -1) {
69 return false;
70 }
71
72 // Must start with object or array
73 if (level == 0) {
74
75 switch (c) {
76 case '{':
77 if (parse_json_object() == false) {
78 return false;
79 }
80 c = skip_to_token();
81 if (c > 0) {
82 mark_pos();
83 error(SYNTAX_ERROR, "Only one top level object/array is allowed.");
84 return false;
85 } else if (c < 0) {
86 return false;
87 }
88 return true;
89
90 case '[':
91 if (parse_json_array() == false) {
92 return false;
93 }
94 c = skip_to_token();
95 if (c > 0) {
96 mark_pos();
97 error(SYNTAX_ERROR, "Only one top level object/array is allowed.");
98 return false;
99 } else if (c < 0) {
100 return false;
101 }
102 return true;
103
104 case 0:
105 error(SYNTAX_ERROR, "EOS was encountered before any json declarations");
106 return false;
107
108 default:
109 error(SYNTAX_ERROR, "Json must start with an object or an array.");
110 return false;
111 }
112 } else { // level > 0
113 switch (c) {
114 case '{':
115 return parse_json_object();
116
117 case '[':
118 return parse_json_array();
119
120 case '"':
121 return parse_json_string();
122
123 case '-': case '0':
124 case '1': case '2': case '3':
125 case '4': case '5': case '6':
126 case '7': case '8': case '9':
127 return parse_json_number();
128
129 case 't':
130 return parse_json_symbol("true", JSON_TRUE);
131
132 case 'f':
133 return parse_json_symbol("false", JSON_FALSE);
134
135 case 'n':
136 return parse_json_symbol("null", JSON_NULL);
137
138 case 0:
139 error(SYNTAX_ERROR, "EOS was encountered when expecting a json value.");
140 return false;
141
142 default:
143 error(SYNTAX_ERROR, "Could not parse as a json value (did you forget to quote your strings?).");
144 return false;
145 }
146 }
147}
148
149// Should only be called when we actually have the start of an object
150// Otherwise it is an internal error
151bool JSON::parse_json_object() {
152 NOT_PRODUCT(const char* prev_pos);
153 int c;
154
155 mark_pos();
156 // Check that we are not called in error
157 if (expect_any("{", "object start", INTERNAL_ERROR) <= 0) {
158 return false;
159 }
160
161 if (!callback(JSON_OBJECT_BEGIN, NULL, level++)) {
162 return false;
163 }
164
165 for (;;) {
166 mark_pos();
167 c = skip_to_token();
168 if (c == 0) {
169 error(SYNTAX_ERROR, "EOS when expecting an object key or object end");
170 return false;
171 } else if (c < 0) {
172 return false;
173 } else if (c == '}') {
174 // We got here from either empty object "{}" or ending comma "{a:1,}"
175 next();
176 break;
177 }
178
179 NOT_PRODUCT(prev_pos = pos);
180 if (parse_json_key() == false) {
181 return false;
182 }
183 assert(pos > prev_pos, "parsing stalled");
184
185 skip_to_token();
186 mark_pos();
187 if (expect_any(":", "object key-value separator") <= 0) {
188 return false;
189 }
190
191 skip_to_token();
192 mark_pos();
193 NOT_PRODUCT(prev_pos = pos);
194 if (parse_json_value() == false) {
195 return false;
196 }
197 assert(pos > prev_pos, "parsing stalled");
198
199 c = skip_to_token();
200 mark_pos();
201 if (expect_any(",}", "value separator or object end") <= 0) {
202 return false;
203 }
204 if (c == '}') {
205 break;
206 }
207 }
208
209 assert(c == '}', "array parsing ended without object end token ('}')");
210 return callback(JSON_OBJECT_END, NULL, --level);
211}
212
213// Should only be called when we actually have the start of an array
214// Otherwise it is an internal error
215bool JSON::parse_json_array() {
216 NOT_PRODUCT(const char* prev_pos);
217 int c;
218
219 mark_pos();
220 // Check that we are not called in error
221 if (expect_any("[", "array start character", INTERNAL_ERROR) <= 0) {
222 return false;
223 }
224
225 if (!callback(JSON_ARRAY_BEGIN, NULL, level++)) {
226 return false;
227 }
228
229 for (;;) {
230 mark_pos();
231 c = skip_to_token();
232 if (c == 0) {
233 error(SYNTAX_ERROR, "EOS when expecting a json value or array end");
234 return false;
235 } else if (c < 0) {
236 return false;
237 } else if (c == ']') {
238 // We got here from either empty array "[]" or ending comma "[1,]"
239 next();
240 break;
241 }
242
243 mark_pos();
244 NOT_PRODUCT(prev_pos = pos);
245 if (parse_json_value() == false) {
246 return false;
247 }
248 assert(pos > prev_pos, "parsing stalled");
249
250 c = skip_to_token();
251 mark_pos();
252 if (expect_any(",]", "value separator or array end") <= 0) {
253 return false;
254 }
255 if (c == ']') {
256 break;
257 }
258 }
259
260 assert(c == ']', "array parsing ended without array end token (']')");
261 return callback(JSON_ARRAY_END, NULL, --level);
262}
263
264bool JSON::parse_json_string(bool key) {
265 const char* end;
266 JSON_VAL v;
267
268 mark_pos();
269 if (expect_any("\"", "string start character", INTERNAL_ERROR) <= 0) {
270 return false;
271 }
272
273 end = strchr(pos, '"'); // TODO: escapes
274 if (end == NULL) {
275 error(SYNTAX_ERROR, "String started here never ended. Expected \'\"\' before EOS.");
276 return false;
277 }
278
279 v.str.start = pos;
280 v.str.length = end - pos;
281 skip(end - pos);
282
283 if (expect_any("\"", "string end character", INTERNAL_ERROR) <= 0) {
284 return false;
285 }
286
287 if (key == true) {
288 return callback(JSON_KEY, &v, level);
289 } else {
290 return callback(JSON_STRING, &v, level);
291 }
292}
293
294// TODO: hotspot equivalents?
295static bool is_alpha(u_char c) {
296 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
297}
298static bool is_numeric(u_char c) {
299 return (c >= '0' && c <= '9');
300}
301static bool is_alnum(u_char c) {
302 return is_alpha(c) || is_numeric(c);
303}
304static bool is_word(u_char c) {
305 return c == '_' || is_alnum(c);
306}
307
308// Allow object keys to be without quotation,
309// but then restrict to ([a-zA-Z0-9_])+
310bool JSON::parse_json_key() {
311 const char* begin;
312 JSON_VAL v;
313 u_char c;
314
315 mark_pos();
316 c = peek();
317 if (c == '"') {
318 return parse_json_string(true);
319 }
320
321 begin = pos;
322 c = peek();
323 if (c == 0) {
324 error(SYNTAX_ERROR, "Got EOS when expecting an object key.");
325 return false;
326 } else if (is_word(c) == false) {
327 error(SYNTAX_ERROR, "Expected an object key, which can be a double-quoted (\") string or a simple string (only alphanumeric characters and underscore, separated by whitespace) that doesn't need to be quoted.");
328 return false;
329 }
330
331 for (;;) {
332 c = peek();
333 // Allow the key to be delimited by control characters and the object key-value separator ':'
334 if (c <= ' ' || c == ':') {
335 break;
336 } else if (is_word(c) == false) {
337 error(SYNTAX_ERROR, "Object key need to be quoted, or consist entirely of alphanumeric characters and underscores.");
338 return false;
339 }
340 next();
341 }
342
343 v.str.start = begin;
344 v.str.length = pos - begin;
345 return callback(JSON_KEY, &v, level);
346}
347
348bool JSON::parse_json_number() {
349 double double_value;
350 int tokens, read;
351 JSON_VAL v;
352
353 mark_pos();
354
355 // Parsing number - for simplicity ints are limited to 2**53
356 // sscanf as a double and check if part is 0.
357 tokens = sscanf(pos, "%lf%n", &double_value, &read);
358 assert(tokens <= 1, "scanf implementation that counts as a token, parsing json numbers will always fail");
359 if (tokens == 1) {
360 assert(read > 0, "sanity");
361
362 if (floor(double_value) == double_value) {
363 // No exponent - treat as an int
364 v.int_value = (int)double_value;
365 if (!callback(JSON_NUMBER_INT, &v, level)) {
366 return false;
367 }
368 } else {
369 v.double_value = double_value;
370 if (!callback(JSON_NUMBER_FLOAT, &v, level)) {
371 return false;
372 }
373 }
374 skip(read);
375 return true;
376 }
377
378 error(SYNTAX_ERROR, "Couldn't parse json number (note that exponents are not supported).");
379 return false;
380}
381
382bool JSON::parse_json_symbol(const char* name, JSON_TYPE symbol) {
383 if (expect_string(name, "maybe you forgot to quote your strings?") == false) {
384 mark_pos();
385 return false;
386 }
387 return callback(symbol, NULL, level);
388}
389
390void JSON::mark_pos() {
391 assert((mark == start || *(mark - 1)) != 0, "buffer overrun");
392 assert(mark <= pos, "mark runahead");
393
394 u_char c;
395
396 while (mark < pos) {
397 c = *mark;
398 assert(c != 0, "pos buffer overrun?");
399 if (c != 0) {
400 mark++;
401 column++;
402 }
403 if (c == '\n') {
404 line++;
405 column = 0;
406 }
407 }
408
409 assert(mark <= pos, "mark runahead");
410}
411
412u_char JSON::next() {
413 assert((pos == start || *(pos - 1)) != 0, "buffer overrun");
414
415 u_char c = *pos;
416 if (c != 0) {
417 pos++;
418 }
419 return c;
420}
421
422u_char JSON::peek() {
423 return *pos;
424}
425
426// Peek ahead i chars (0 is same as peek())
427u_char JSON::peek(size_t i) {
428 u_char c;
429 const char* p;
430
431 p = pos;
432 c = *p;
433 while (i > 0 && c != 0) {
434 i--;
435 p++;
436 c = *p;
437 }
438 return c;
439}
440
441/*
442 * Check that one of the expected characters is next in the stream.
443 * If not, it is an error.
444 * Returns 0 if EOS is encountered.
445 * Returns -1 if the next character was not one of the expected.
446 * Otherwise consumes and returns the expected character that was encountered.
447 */
448int JSON::expect_any(const char* valid_chars, const char* error_msg, JSON_ERROR e) {
449 size_t len;
450 u_char c;
451
452 len = strlen(valid_chars);
453 assert(len > 0, "need non-empty string");
454
455 c = peek();
456 if (c == 0) {
457 error(e, "Got EOS when expecting %s (%s\'%s\').", error_msg, len > 1 ? "one of " : "", valid_chars);
458 return 0;
459 }
460 for (size_t i = 0; i < len; i++) {
461 if (c == valid_chars[i]) {
462 return next();
463 }
464 }
465 error(e, "Expected %s (%s\'%s\').", error_msg, len > 1 ? "one of " : "", valid_chars);
466 return -1;
467}
468
469/*
470 * Check that the expected string is next in the stream.
471 * If not, it is an error.
472 * Consumes the expected characters if they are present.
473 * Returns true if the expected characters were present, otherwise false.
474 */
475bool JSON::expect_string(const char* expected_string, const char* error_msg, JSON_ERROR e) {
476 u_char c, expected_char;
477 size_t len;
478
479 assert(expected_string != NULL, "need non-null string");
480 len = strlen(expected_string);
481 assert(len > 0, "need non-empty string");
482
483 for (size_t i = 0; i < len; i++) {
484 expected_char = expected_string[i];
485 assert(expected_char > ' ', "not sane for control characters");
486 if (expected_char <= ' ') {
487 error(INTERNAL_ERROR, "expect got a control char");
488 }
489 c = pos[i];
490 if (c == 0) {
491 error(e, "EOS encountered when expecting %s (\"%s\")", error_msg, expected_string);
492 return false;
493 } else if (c != expected_char) {
494 error(e, "Expected \"%s\" (%s)", expected_string, error_msg);
495 return false;
496 }
497 }
498 skip(len);
499 return true;
500}
501
502/*
503 * Skip i characters.
504 * Returns number of characters skipped.
505 */
506size_t JSON::skip(size_t i) {
507 u_char c;
508 size_t j;
509
510 c = peek();
511 for (j = i; c != 0 && j > 0; j--) {
512 c = next();
513 }
514 return i - j;
515}
516
517/*
518 * Skip whitespace and comments.
519 * Returns the first token after whitespace/comments without consuming it
520 * Returns 0 if EOS is encountered.
521 * Returns -1 if there is an error
522 */
523int JSON::skip_to_token() {
524 for (;;) {
525 int c = peek(0);
526 if (c == '/') {
527 u_char c2 = peek(1);
528 if (c2 == '/') {
529 c = skip_line_comment();
530 } else if (c2 == '*') {
531 c = skip_block_comment();
532 if (c < 0) {
533 return -1;
534 }
535 }
536 // Fall through to keep checking if there
537 // are more whitespace / comments to skip
538 }
539 if (c == 0 || c > ' ') {
540 return c;
541 }
542 next();
543 }
544 return 0;
545}
546
547/*
548 * Skip to, and return the wanted char without consuming it
549 * Returns 0 if EOS is encountered.
550 */
551u_char JSON::skip_to(u_char want) {
552 // We want the bookkeeping done in next().
553 // Otherwise strchr could have been used.
554 u_char c;
555 for(;;) {
556 c = peek();
557 if (c == 0 || c == want) {
558 return c;
559 }
560 next();
561 }
562}
563
564/*
565 * Should only be called when we actually have a line comment to skip.
566 * Otherwise it is an internal error.
567 *
568 * Will return the first token after the line comment without consuming it.
569 * Returns 0 if EOS is encoutered.
570 */
571u_char JSON::skip_line_comment() {
572 u_char c;
573
574 // Check that we are not called in error
575 expect_any("/", "line comment start", INTERNAL_ERROR);
576 expect_any("/", "line comment start", INTERNAL_ERROR);
577
578 c = skip_to('\n');
579 if (c == 0) {
580 return 0;
581 }
582 next();
583 return next();
584}
585
586/*
587 * Should only be called when we actually have a block comment to skip.
588 * Otherwise it is an internal error.
589 *
590 * Returns the first token after the block comment without consuming it.
591 * Returns -1 if EOS is encountered in the middle of a comment.
592 */
593int JSON::skip_block_comment() {
594 const char* current;
595
596 // Check that we are not called in error.
597 if (peek() != '/' || peek(1) != '*') {
598 // Let expect handle EOS.
599 expect_string("/*", "block comment start", INTERNAL_ERROR);
600 return 0;
601 }
602
603 current = pos;
604 for (;;) {
605 current = strchrnul_(current, '*');
606
607 if (current[0] == 0 || current[1] == 0) {
608 // Advance error marker to start of block comment
609 mark_pos();
610 error(SYNTAX_ERROR, "Block comment started here never ended. Expected \"*/\" before EOS.");
611 return -1;
612 }
613
614 if (current[1] == '/') {
615 pos = current;
616 if (expect_string("*/", "block comment end", INTERNAL_ERROR) == false) {
617 return -1;
618 }
619 // Found block comment end
620 return peek();
621 }
622 current++;
623 }
624}
625
626const char* JSON::strerror(JSON_ERROR e) {
627 switch (e) {
628 case SYNTAX_ERROR:
629 return "Syntax error";
630 case INTERNAL_ERROR:
631 return "Internal error";
632 case KEY_ERROR:
633 return "Key error";
634 case VALUE_ERROR:
635 return "Value error";
636 default:
637 ShouldNotReachHere();
638 return "Unknown error";
639 }
640}
641
642void JSON::error(JSON_ERROR e, const char* format, ...) {
643 _valid = false;
644
645 if (!silent) {
646 const char* line_start;
647 const char* tmp;
648 size_t line_length;
649 va_list args;
650 u_char c;
651
652 _st->print("%s on line %u byte %u: ", JSON::strerror(e), line, column + 1);
653 va_start(args, format);
654 _st->vprint(format, args);
655 _st->cr();
656 va_end(args);
657
658 line_start = mark - column;
659 assert(line_start >= start, "out of bounds");
660 assert(line_start <= mark, "out of bounds");
661 assert(line_start == start || line_start[-1] == '\n', "line counting error");
662
663 c = *pos;
664 if (c == 0) {
665 _st->print(" Got ");
666 _st->print_cr("EOS.");
667 }
668 tmp = mark;
669 c = *tmp;
670 if (c > ' ') {
671 _st->print(" At ");
672 _st->print("'");
673 while (c > ' ') {
674 _st->print("%c", c);
675 tmp++;
676 c = *tmp;
677 }
678 _st->print_cr("'.");
679 }
680
681 // Skip to newline or EOS
682 tmp = strchrnul_(mark, '\n');
683 line_length = tmp - line_start;
684
685 _st->print_cr("%s", line_start);
686 }
687}
688
689