1 | /* |
2 | * Copyright (c) 2009-2012 Petri Lehtinen <petri@digip.org> |
3 | * |
4 | * Jansson is free software; you can redistribute it and/or modify |
5 | * it under the terms of the MIT license. See LICENSE for details. |
6 | */ |
7 | |
8 | #ifndef _GNU_SOURCE |
9 | #define _GNU_SOURCE |
10 | #endif |
11 | |
12 | #include <errno.h> |
13 | #include <limits.h> |
14 | #include <stdio.h> |
15 | #include <stdlib.h> |
16 | #include <string.h> |
17 | #include <assert.h> |
18 | |
19 | #include "jansson.h" |
20 | #include "jansson_private.h" |
21 | #include "strbuffer.h" |
22 | #include "utf.h" |
23 | |
24 | #define STREAM_STATE_OK 0 |
25 | #define STREAM_STATE_EOF -1 |
26 | #define STREAM_STATE_ERROR -2 |
27 | |
28 | #define TOKEN_INVALID -1 |
29 | #define TOKEN_EOF 0 |
30 | #define TOKEN_STRING 256 |
31 | #define TOKEN_INTEGER 257 |
32 | #define TOKEN_REAL 258 |
33 | #define TOKEN_TRUE 259 |
34 | #define TOKEN_FALSE 260 |
35 | #define TOKEN_NULL 261 |
36 | |
37 | /* Locale independent versions of isxxx() functions */ |
38 | #define l_isupper(c) ('A' <= (c) && (c) <= 'Z') |
39 | #define l_islower(c) ('a' <= (c) && (c) <= 'z') |
40 | #define l_isalpha(c) (l_isupper(c) || l_islower(c)) |
41 | #define l_isdigit(c) ('0' <= (c) && (c) <= '9') |
42 | #define l_isxdigit(c) \ |
43 | (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f')) |
44 | |
45 | /* Read one byte from stream, convert to unsigned char, then int, and |
46 | return. return EOF on end of file. This corresponds to the |
47 | behaviour of fgetc(). */ |
48 | typedef int (*get_func)(void *data); |
49 | |
50 | typedef struct { |
51 | get_func get; |
52 | void *data; |
53 | char buffer[5]; |
54 | size_t buffer_pos; |
55 | int state; |
56 | int line; |
57 | int column, last_column; |
58 | size_t position; |
59 | } stream_t; |
60 | |
61 | typedef struct { |
62 | stream_t stream; |
63 | strbuffer_t saved_text; |
64 | int token; |
65 | union { |
66 | char *string; |
67 | json_int_t integer; |
68 | double real; |
69 | } value; |
70 | } lex_t; |
71 | |
72 | #define stream_to_lex(stream) container_of(stream, lex_t, stream) |
73 | |
74 | |
75 | /*** error reporting ***/ |
76 | |
77 | static void error_set(json_error_t *error, const lex_t *lex, |
78 | const char *msg, ...) |
79 | { |
80 | va_list ap; |
81 | char msg_text[JSON_ERROR_TEXT_LENGTH]; |
82 | char msg_with_context[JSON_ERROR_TEXT_LENGTH]; |
83 | |
84 | int line = -1, col = -1; |
85 | size_t pos = 0; |
86 | const char *result = msg_text; |
87 | |
88 | if(!error) |
89 | return; |
90 | |
91 | va_start(ap, msg); |
92 | vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap); |
93 | msg_text[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; |
94 | va_end(ap); |
95 | |
96 | if(lex) |
97 | { |
98 | const char *saved_text = strbuffer_value(&lex->saved_text); |
99 | |
100 | line = lex->stream.line; |
101 | col = lex->stream.column; |
102 | pos = lex->stream.position; |
103 | |
104 | if(saved_text && saved_text[0]) |
105 | { |
106 | if(lex->saved_text.length <= 20) { |
107 | snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, |
108 | "%s near '%s'" , msg_text, saved_text); |
109 | msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; |
110 | result = msg_with_context; |
111 | } |
112 | } |
113 | else |
114 | { |
115 | if(lex->stream.state == STREAM_STATE_ERROR) { |
116 | /* No context for UTF-8 decoding errors */ |
117 | result = msg_text; |
118 | } |
119 | else { |
120 | snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, |
121 | "%s near end of file" , msg_text); |
122 | msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; |
123 | result = msg_with_context; |
124 | } |
125 | } |
126 | } |
127 | |
128 | jsonp_error_set(error, line, col, pos, "%s" , result); |
129 | } |
130 | |
131 | |
132 | /*** lexical analyzer ***/ |
133 | |
134 | static void |
135 | stream_init(stream_t *stream, get_func get, void *data) |
136 | { |
137 | stream->get = get; |
138 | stream->data = data; |
139 | stream->buffer[0] = '\0'; |
140 | stream->buffer_pos = 0; |
141 | |
142 | stream->state = STREAM_STATE_OK; |
143 | stream->line = 1; |
144 | stream->column = 0; |
145 | stream->position = 0; |
146 | } |
147 | |
148 | static int stream_get(stream_t *stream, json_error_t *error) |
149 | { |
150 | int c; |
151 | |
152 | if(stream->state != STREAM_STATE_OK) |
153 | return stream->state; |
154 | |
155 | if(!stream->buffer[stream->buffer_pos]) |
156 | { |
157 | c = stream->get(stream->data); |
158 | if(c == EOF) { |
159 | stream->state = STREAM_STATE_EOF; |
160 | return STREAM_STATE_EOF; |
161 | } |
162 | |
163 | stream->buffer[0] = c; |
164 | stream->buffer_pos = 0; |
165 | |
166 | if(0x80 <= c && c <= 0xFF) |
167 | { |
168 | /* multi-byte UTF-8 sequence */ |
169 | int i, count; |
170 | |
171 | count = utf8_check_first(c); |
172 | if(!count) |
173 | goto out; |
174 | |
175 | assert(count >= 2); |
176 | |
177 | for(i = 1; i < count; i++) |
178 | stream->buffer[i] = stream->get(stream->data); |
179 | |
180 | if(!utf8_check_full(stream->buffer, count, NULL)) |
181 | goto out; |
182 | |
183 | stream->buffer[count] = '\0'; |
184 | } |
185 | else |
186 | stream->buffer[1] = '\0'; |
187 | } |
188 | |
189 | c = stream->buffer[stream->buffer_pos++]; |
190 | |
191 | stream->position++; |
192 | if(c == '\n') { |
193 | stream->line++; |
194 | stream->last_column = stream->column; |
195 | stream->column = 0; |
196 | } |
197 | else if(utf8_check_first(c)) { |
198 | /* track the Unicode character column, so increment only if |
199 | this is the first character of a UTF-8 sequence */ |
200 | stream->column++; |
201 | } |
202 | |
203 | return c; |
204 | |
205 | out: |
206 | stream->state = STREAM_STATE_ERROR; |
207 | error_set(error, stream_to_lex(stream), "unable to decode byte 0x%x" , c); |
208 | return STREAM_STATE_ERROR; |
209 | } |
210 | |
211 | static void stream_unget(stream_t *stream, int c) |
212 | { |
213 | if(c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR) |
214 | return; |
215 | |
216 | stream->position--; |
217 | if(c == '\n') { |
218 | stream->line--; |
219 | stream->column = stream->last_column; |
220 | } |
221 | else if(utf8_check_first(c)) |
222 | stream->column--; |
223 | |
224 | assert(stream->buffer_pos > 0); |
225 | stream->buffer_pos--; |
226 | assert(stream->buffer[stream->buffer_pos] == c); |
227 | } |
228 | |
229 | |
230 | static int lex_get(lex_t *lex, json_error_t *error) |
231 | { |
232 | return stream_get(&lex->stream, error); |
233 | } |
234 | |
235 | static void lex_save(lex_t *lex, int c) |
236 | { |
237 | strbuffer_append_byte(&lex->saved_text, c); |
238 | } |
239 | |
240 | static int lex_get_save(lex_t *lex, json_error_t *error) |
241 | { |
242 | int c = stream_get(&lex->stream, error); |
243 | if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) |
244 | lex_save(lex, c); |
245 | return c; |
246 | } |
247 | |
248 | static void lex_unget(lex_t *lex, int c) |
249 | { |
250 | stream_unget(&lex->stream, c); |
251 | } |
252 | |
253 | static void lex_unget_unsave(lex_t *lex, int c) |
254 | { |
255 | if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) { |
256 | char d; |
257 | stream_unget(&lex->stream, c); |
258 | d = strbuffer_pop(&lex->saved_text); |
259 | assert(c == d); |
260 | } |
261 | } |
262 | |
263 | static void lex_save_cached(lex_t *lex) |
264 | { |
265 | while(lex->stream.buffer[lex->stream.buffer_pos] != '\0') |
266 | { |
267 | lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]); |
268 | lex->stream.buffer_pos++; |
269 | lex->stream.position++; |
270 | } |
271 | } |
272 | |
273 | /* assumes that str points to 'u' plus at least 4 valid hex digits */ |
274 | static int32_t decode_unicode_escape(const char *str) |
275 | { |
276 | int i; |
277 | int32_t value = 0; |
278 | |
279 | assert(str[0] == 'u'); |
280 | |
281 | for(i = 1; i <= 4; i++) { |
282 | char c = str[i]; |
283 | value <<= 4; |
284 | if(l_isdigit(c)) |
285 | value += c - '0'; |
286 | else if(l_islower(c)) |
287 | value += c - 'a' + 10; |
288 | else if(l_isupper(c)) |
289 | value += c - 'A' + 10; |
290 | else |
291 | assert(0); |
292 | } |
293 | |
294 | return value; |
295 | } |
296 | |
297 | static void lex_scan_string(lex_t *lex, json_error_t *error) |
298 | { |
299 | int c; |
300 | const char *p; |
301 | char *t; |
302 | int i; |
303 | |
304 | lex->value.string = NULL; |
305 | lex->token = TOKEN_INVALID; |
306 | |
307 | c = lex_get_save(lex, error); |
308 | |
309 | while(c != '"') { |
310 | if(c == STREAM_STATE_ERROR) |
311 | goto out; |
312 | |
313 | else if(c == STREAM_STATE_EOF) { |
314 | error_set(error, lex, "premature end of input" ); |
315 | goto out; |
316 | } |
317 | |
318 | else if(0 <= c && c <= 0x1F) { |
319 | /* control character */ |
320 | lex_unget_unsave(lex, c); |
321 | if(c == '\n') |
322 | error_set(error, lex, "unexpected newline" , c); |
323 | else |
324 | error_set(error, lex, "control character 0x%x" , c); |
325 | goto out; |
326 | } |
327 | |
328 | else if(c == '\\') { |
329 | c = lex_get_save(lex, error); |
330 | if(c == 'u') { |
331 | c = lex_get_save(lex, error); |
332 | for(i = 0; i < 4; i++) { |
333 | if(!l_isxdigit(c)) { |
334 | error_set(error, lex, "invalid escape" ); |
335 | goto out; |
336 | } |
337 | c = lex_get_save(lex, error); |
338 | } |
339 | } |
340 | else if(c == '"' || c == '\\' || c == '/' || c == 'b' || |
341 | c == 'f' || c == 'n' || c == 'r' || c == 't') |
342 | c = lex_get_save(lex, error); |
343 | else { |
344 | error_set(error, lex, "invalid escape" ); |
345 | goto out; |
346 | } |
347 | } |
348 | else |
349 | c = lex_get_save(lex, error); |
350 | } |
351 | |
352 | /* the actual value is at most of the same length as the source |
353 | string, because: |
354 | - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte |
355 | - a single \uXXXX escape (length 6) is converted to at most 3 bytes |
356 | - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair |
357 | are converted to 4 bytes |
358 | */ |
359 | lex->value.string = jsonp_malloc(lex->saved_text.length + 1); |
360 | if(!lex->value.string) { |
361 | /* this is not very nice, since TOKEN_INVALID is returned */ |
362 | goto out; |
363 | } |
364 | |
365 | /* the target */ |
366 | t = lex->value.string; |
367 | |
368 | /* + 1 to skip the " */ |
369 | p = strbuffer_value(&lex->saved_text) + 1; |
370 | |
371 | while(*p != '"') { |
372 | if(*p == '\\') { |
373 | p++; |
374 | if(*p == 'u') { |
375 | char buffer[4]; |
376 | int length; |
377 | int32_t value; |
378 | |
379 | value = decode_unicode_escape(p); |
380 | p += 5; |
381 | |
382 | if(0xD800 <= value && value <= 0xDBFF) { |
383 | /* surrogate pair */ |
384 | if(*p == '\\' && *(p + 1) == 'u') { |
385 | int32_t value2 = decode_unicode_escape(++p); |
386 | p += 5; |
387 | |
388 | if(0xDC00 <= value2 && value2 <= 0xDFFF) { |
389 | /* valid second surrogate */ |
390 | value = |
391 | ((value - 0xD800) << 10) + |
392 | (value2 - 0xDC00) + |
393 | 0x10000; |
394 | } |
395 | else { |
396 | /* invalid second surrogate */ |
397 | error_set(error, lex, |
398 | "invalid Unicode '\\u%04X\\u%04X'" , |
399 | value, value2); |
400 | goto out; |
401 | } |
402 | } |
403 | else { |
404 | /* no second surrogate */ |
405 | error_set(error, lex, "invalid Unicode '\\u%04X'" , |
406 | value); |
407 | goto out; |
408 | } |
409 | } |
410 | else if(0xDC00 <= value && value <= 0xDFFF) { |
411 | error_set(error, lex, "invalid Unicode '\\u%04X'" , value); |
412 | goto out; |
413 | } |
414 | else if(value == 0) |
415 | { |
416 | error_set(error, lex, "\\u0000 is not allowed" ); |
417 | goto out; |
418 | } |
419 | |
420 | if(utf8_encode(value, buffer, &length)) |
421 | assert(0); |
422 | |
423 | memcpy(t, buffer, length); |
424 | t += length; |
425 | } |
426 | else { |
427 | switch(*p) { |
428 | case '"': case '\\': case '/': |
429 | *t = *p; break; |
430 | case 'b': *t = '\b'; break; |
431 | case 'f': *t = '\f'; break; |
432 | case 'n': *t = '\n'; break; |
433 | case 'r': *t = '\r'; break; |
434 | case 't': *t = '\t'; break; |
435 | default: assert(0); |
436 | } |
437 | t++; |
438 | p++; |
439 | } |
440 | } |
441 | else |
442 | *(t++) = *(p++); |
443 | } |
444 | *t = '\0'; |
445 | lex->token = TOKEN_STRING; |
446 | return; |
447 | |
448 | out: |
449 | jsonp_free(lex->value.string); |
450 | } |
451 | |
452 | #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */ |
453 | #if JSON_INTEGER_IS_LONG_LONG |
454 | #ifdef _MSC_VER /* Microsoft Visual Studio */ |
455 | #define json_strtoint _strtoi64 |
456 | #else |
457 | #define json_strtoint strtoll |
458 | #endif |
459 | #else |
460 | #define json_strtoint strtol |
461 | #endif |
462 | #endif |
463 | |
464 | static int lex_scan_number(lex_t *lex, int c, json_error_t *error) |
465 | { |
466 | const char *saved_text; |
467 | char *end; |
468 | double value; |
469 | |
470 | lex->token = TOKEN_INVALID; |
471 | |
472 | if(c == '-') |
473 | c = lex_get_save(lex, error); |
474 | |
475 | if(c == '0') { |
476 | c = lex_get_save(lex, error); |
477 | if(l_isdigit(c)) { |
478 | lex_unget_unsave(lex, c); |
479 | goto out; |
480 | } |
481 | } |
482 | else if(l_isdigit(c)) { |
483 | c = lex_get_save(lex, error); |
484 | while(l_isdigit(c)) |
485 | c = lex_get_save(lex, error); |
486 | } |
487 | else { |
488 | lex_unget_unsave(lex, c); |
489 | goto out; |
490 | } |
491 | |
492 | if(c != '.' && c != 'E' && c != 'e') { |
493 | json_int_t value; |
494 | |
495 | lex_unget_unsave(lex, c); |
496 | |
497 | saved_text = strbuffer_value(&lex->saved_text); |
498 | |
499 | errno = 0; |
500 | value = json_strtoint(saved_text, &end, 10); |
501 | if(errno == ERANGE) { |
502 | if(value < 0) |
503 | error_set(error, lex, "too big negative integer" ); |
504 | else |
505 | error_set(error, lex, "too big integer" ); |
506 | goto out; |
507 | } |
508 | |
509 | assert(end == saved_text + lex->saved_text.length); |
510 | |
511 | lex->token = TOKEN_INTEGER; |
512 | lex->value.integer = value; |
513 | return 0; |
514 | } |
515 | |
516 | if(c == '.') { |
517 | c = lex_get(lex, error); |
518 | if(!l_isdigit(c)) { |
519 | lex_unget(lex, c); |
520 | goto out; |
521 | } |
522 | lex_save(lex, c); |
523 | |
524 | c = lex_get_save(lex, error); |
525 | while(l_isdigit(c)) |
526 | c = lex_get_save(lex, error); |
527 | } |
528 | |
529 | if(c == 'E' || c == 'e') { |
530 | c = lex_get_save(lex, error); |
531 | if(c == '+' || c == '-') |
532 | c = lex_get_save(lex, error); |
533 | |
534 | if(!l_isdigit(c)) { |
535 | lex_unget_unsave(lex, c); |
536 | goto out; |
537 | } |
538 | |
539 | c = lex_get_save(lex, error); |
540 | while(l_isdigit(c)) |
541 | c = lex_get_save(lex, error); |
542 | } |
543 | |
544 | lex_unget_unsave(lex, c); |
545 | |
546 | if(jsonp_strtod(&lex->saved_text, &value)) { |
547 | error_set(error, lex, "real number overflow" ); |
548 | goto out; |
549 | } |
550 | |
551 | lex->token = TOKEN_REAL; |
552 | lex->value.real = value; |
553 | return 0; |
554 | |
555 | out: |
556 | return -1; |
557 | } |
558 | |
559 | static int lex_scan(lex_t *lex, json_error_t *error) |
560 | { |
561 | int c; |
562 | |
563 | strbuffer_clear(&lex->saved_text); |
564 | |
565 | if(lex->token == TOKEN_STRING) { |
566 | jsonp_free(lex->value.string); |
567 | lex->value.string = NULL; |
568 | } |
569 | |
570 | c = lex_get(lex, error); |
571 | while(c == ' ' || c == '\t' || c == '\n' || c == '\r') |
572 | c = lex_get(lex, error); |
573 | |
574 | if(c == STREAM_STATE_EOF) { |
575 | lex->token = TOKEN_EOF; |
576 | goto out; |
577 | } |
578 | |
579 | if(c == STREAM_STATE_ERROR) { |
580 | lex->token = TOKEN_INVALID; |
581 | goto out; |
582 | } |
583 | |
584 | lex_save(lex, c); |
585 | |
586 | if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',') |
587 | lex->token = c; |
588 | |
589 | else if(c == '"') |
590 | lex_scan_string(lex, error); |
591 | |
592 | else if(l_isdigit(c) || c == '-') { |
593 | if(lex_scan_number(lex, c, error)) |
594 | goto out; |
595 | } |
596 | |
597 | else if(l_isalpha(c)) { |
598 | /* eat up the whole identifier for clearer error messages */ |
599 | const char *saved_text; |
600 | |
601 | c = lex_get_save(lex, error); |
602 | while(l_isalpha(c)) |
603 | c = lex_get_save(lex, error); |
604 | lex_unget_unsave(lex, c); |
605 | |
606 | saved_text = strbuffer_value(&lex->saved_text); |
607 | |
608 | if(strcmp(saved_text, "true" ) == 0) |
609 | lex->token = TOKEN_TRUE; |
610 | else if(strcmp(saved_text, "false" ) == 0) |
611 | lex->token = TOKEN_FALSE; |
612 | else if(strcmp(saved_text, "null" ) == 0) |
613 | lex->token = TOKEN_NULL; |
614 | else |
615 | lex->token = TOKEN_INVALID; |
616 | } |
617 | |
618 | else { |
619 | /* save the rest of the input UTF-8 sequence to get an error |
620 | message of valid UTF-8 */ |
621 | lex_save_cached(lex); |
622 | lex->token = TOKEN_INVALID; |
623 | } |
624 | |
625 | out: |
626 | return lex->token; |
627 | } |
628 | |
629 | static char *lex_steal_string(lex_t *lex) |
630 | { |
631 | char *result = NULL; |
632 | if(lex->token == TOKEN_STRING) |
633 | { |
634 | result = lex->value.string; |
635 | lex->value.string = NULL; |
636 | } |
637 | return result; |
638 | } |
639 | |
640 | static int lex_init(lex_t *lex, get_func get, void *data) |
641 | { |
642 | stream_init(&lex->stream, get, data); |
643 | if(strbuffer_init(&lex->saved_text)) |
644 | return -1; |
645 | |
646 | lex->token = TOKEN_INVALID; |
647 | return 0; |
648 | } |
649 | |
650 | static void lex_close(lex_t *lex) |
651 | { |
652 | if(lex->token == TOKEN_STRING) |
653 | jsonp_free(lex->value.string); |
654 | strbuffer_close(&lex->saved_text); |
655 | } |
656 | |
657 | |
658 | /*** parser ***/ |
659 | |
660 | static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error); |
661 | |
662 | static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) |
663 | { |
664 | json_t *object = json_object(); |
665 | if(!object) |
666 | return NULL; |
667 | |
668 | lex_scan(lex, error); |
669 | if(lex->token == '}') |
670 | return object; |
671 | |
672 | while(1) { |
673 | char *key; |
674 | json_t *value; |
675 | |
676 | if(lex->token != TOKEN_STRING) { |
677 | error_set(error, lex, "string or '}' expected" ); |
678 | goto error; |
679 | } |
680 | |
681 | key = lex_steal_string(lex); |
682 | if(!key) |
683 | return NULL; |
684 | |
685 | if(flags & JSON_REJECT_DUPLICATES) { |
686 | if(json_object_get(object, key)) { |
687 | jsonp_free(key); |
688 | error_set(error, lex, "duplicate object key" ); |
689 | goto error; |
690 | } |
691 | } |
692 | |
693 | lex_scan(lex, error); |
694 | if(lex->token != ':') { |
695 | jsonp_free(key); |
696 | error_set(error, lex, "':' expected" ); |
697 | goto error; |
698 | } |
699 | |
700 | lex_scan(lex, error); |
701 | value = parse_value(lex, flags, error); |
702 | if(!value) { |
703 | jsonp_free(key); |
704 | goto error; |
705 | } |
706 | |
707 | if(json_object_set_nocheck(object, key, value)) { |
708 | jsonp_free(key); |
709 | json_decref(value); |
710 | goto error; |
711 | } |
712 | |
713 | json_decref(value); |
714 | jsonp_free(key); |
715 | |
716 | lex_scan(lex, error); |
717 | if(lex->token != ',') |
718 | break; |
719 | |
720 | lex_scan(lex, error); |
721 | } |
722 | |
723 | if(lex->token != '}') { |
724 | error_set(error, lex, "'}' expected" ); |
725 | goto error; |
726 | } |
727 | |
728 | return object; |
729 | |
730 | error: |
731 | json_decref(object); |
732 | return NULL; |
733 | } |
734 | |
735 | static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error) |
736 | { |
737 | json_t *array = json_array(); |
738 | if(!array) |
739 | return NULL; |
740 | |
741 | lex_scan(lex, error); |
742 | if(lex->token == ']') |
743 | return array; |
744 | |
745 | while(lex->token) { |
746 | json_t *elem = parse_value(lex, flags, error); |
747 | if(!elem) |
748 | goto error; |
749 | |
750 | if(json_array_append(array, elem)) { |
751 | json_decref(elem); |
752 | goto error; |
753 | } |
754 | json_decref(elem); |
755 | |
756 | lex_scan(lex, error); |
757 | if(lex->token != ',') |
758 | break; |
759 | |
760 | lex_scan(lex, error); |
761 | } |
762 | |
763 | if(lex->token != ']') { |
764 | error_set(error, lex, "']' expected" ); |
765 | goto error; |
766 | } |
767 | |
768 | return array; |
769 | |
770 | error: |
771 | json_decref(array); |
772 | return NULL; |
773 | } |
774 | |
775 | static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) |
776 | { |
777 | json_t *json; |
778 | |
779 | switch(lex->token) { |
780 | case TOKEN_STRING: { |
781 | json = json_string_nocheck(lex->value.string); |
782 | break; |
783 | } |
784 | |
785 | case TOKEN_INTEGER: { |
786 | json = json_integer(lex->value.integer); |
787 | break; |
788 | } |
789 | |
790 | case TOKEN_REAL: { |
791 | json = json_real(lex->value.real); |
792 | break; |
793 | } |
794 | |
795 | case TOKEN_TRUE: |
796 | json = json_true(); |
797 | break; |
798 | |
799 | case TOKEN_FALSE: |
800 | json = json_false(); |
801 | break; |
802 | |
803 | case TOKEN_NULL: |
804 | json = json_null(); |
805 | break; |
806 | |
807 | case '{': |
808 | json = parse_object(lex, flags, error); |
809 | break; |
810 | |
811 | case '[': |
812 | json = parse_array(lex, flags, error); |
813 | break; |
814 | |
815 | case TOKEN_INVALID: |
816 | error_set(error, lex, "invalid token" ); |
817 | return NULL; |
818 | |
819 | default: |
820 | error_set(error, lex, "unexpected token" ); |
821 | return NULL; |
822 | } |
823 | |
824 | if(!json) |
825 | return NULL; |
826 | |
827 | return json; |
828 | } |
829 | |
830 | static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error) |
831 | { |
832 | json_t *result; |
833 | |
834 | lex_scan(lex, error); |
835 | if(!(flags & JSON_DECODE_ANY)) { |
836 | if(lex->token != '[' && lex->token != '{') { |
837 | error_set(error, lex, "'[' or '{' expected" ); |
838 | return NULL; |
839 | } |
840 | } |
841 | |
842 | result = parse_value(lex, flags, error); |
843 | if(!result) |
844 | return NULL; |
845 | |
846 | if(!(flags & JSON_DISABLE_EOF_CHECK)) { |
847 | lex_scan(lex, error); |
848 | if(lex->token != TOKEN_EOF) { |
849 | error_set(error, lex, "end of file expected" ); |
850 | json_decref(result); |
851 | return NULL; |
852 | } |
853 | } |
854 | |
855 | if(error) { |
856 | /* Save the position even though there was no error */ |
857 | error->position = lex->stream.position; |
858 | } |
859 | |
860 | return result; |
861 | } |
862 | |
863 | typedef struct |
864 | { |
865 | const char *data; |
866 | int pos; |
867 | } string_data_t; |
868 | |
869 | static int string_get(void *data) |
870 | { |
871 | char c; |
872 | string_data_t *stream = (string_data_t *)data; |
873 | c = stream->data[stream->pos]; |
874 | if(c == '\0') |
875 | return EOF; |
876 | else |
877 | { |
878 | stream->pos++; |
879 | return (unsigned char)c; |
880 | } |
881 | } |
882 | |
883 | json_t *json_loads(const char *string, size_t flags, json_error_t *error) |
884 | { |
885 | lex_t lex; |
886 | json_t *result; |
887 | string_data_t stream_data; |
888 | |
889 | jsonp_error_init(error, "<string>" ); |
890 | |
891 | if (string == NULL) { |
892 | error_set(error, NULL, "wrong arguments" ); |
893 | return NULL; |
894 | } |
895 | |
896 | stream_data.data = string; |
897 | stream_data.pos = 0; |
898 | |
899 | if(lex_init(&lex, string_get, (void *)&stream_data)) |
900 | return NULL; |
901 | |
902 | result = parse_json(&lex, flags, error); |
903 | |
904 | lex_close(&lex); |
905 | return result; |
906 | } |
907 | |
908 | typedef struct |
909 | { |
910 | const char *data; |
911 | size_t len; |
912 | size_t pos; |
913 | } buffer_data_t; |
914 | |
915 | static int buffer_get(void *data) |
916 | { |
917 | char c; |
918 | buffer_data_t *stream = data; |
919 | if(stream->pos >= stream->len) |
920 | return EOF; |
921 | |
922 | c = stream->data[stream->pos]; |
923 | stream->pos++; |
924 | return (unsigned char)c; |
925 | } |
926 | |
927 | json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error) |
928 | { |
929 | lex_t lex; |
930 | json_t *result; |
931 | buffer_data_t stream_data; |
932 | |
933 | jsonp_error_init(error, "<buffer>" ); |
934 | |
935 | if (buffer == NULL) { |
936 | error_set(error, NULL, "wrong arguments" ); |
937 | return NULL; |
938 | } |
939 | |
940 | stream_data.data = buffer; |
941 | stream_data.pos = 0; |
942 | stream_data.len = buflen; |
943 | |
944 | if(lex_init(&lex, buffer_get, (void *)&stream_data)) |
945 | return NULL; |
946 | |
947 | result = parse_json(&lex, flags, error); |
948 | |
949 | lex_close(&lex); |
950 | return result; |
951 | } |
952 | |
953 | json_t *json_loadf(FILE *input, size_t flags, json_error_t *error) |
954 | { |
955 | lex_t lex; |
956 | const char *source; |
957 | json_t *result; |
958 | |
959 | if(input == stdin) |
960 | source = "<stdin>" ; |
961 | else |
962 | source = "<stream>" ; |
963 | |
964 | jsonp_error_init(error, source); |
965 | |
966 | if (input == NULL) { |
967 | error_set(error, NULL, "wrong arguments" ); |
968 | return NULL; |
969 | } |
970 | |
971 | if(lex_init(&lex, (get_func)fgetc, input)) |
972 | return NULL; |
973 | |
974 | result = parse_json(&lex, flags, error); |
975 | |
976 | lex_close(&lex); |
977 | return result; |
978 | } |
979 | |
980 | json_t *json_load_file(const char *path, size_t flags, json_error_t *error) |
981 | { |
982 | json_t *result; |
983 | FILE *fp; |
984 | |
985 | jsonp_error_init(error, path); |
986 | |
987 | if (path == NULL) { |
988 | error_set(error, NULL, "wrong arguments" ); |
989 | return NULL; |
990 | } |
991 | |
992 | fp = fopen(path, "rb" ); |
993 | if(!fp) |
994 | { |
995 | error_set(error, NULL, "unable to open %s: %s" , |
996 | path, strerror(errno)); |
997 | return NULL; |
998 | } |
999 | |
1000 | result = json_loadf(fp, flags, error); |
1001 | |
1002 | fclose(fp); |
1003 | return result; |
1004 | } |
1005 | |
1006 | #define MAX_BUF_LEN 1024 |
1007 | |
1008 | typedef struct |
1009 | { |
1010 | char data[MAX_BUF_LEN]; |
1011 | size_t len; |
1012 | size_t pos; |
1013 | json_load_callback_t callback; |
1014 | void *arg; |
1015 | } callback_data_t; |
1016 | |
1017 | static int callback_get(void *data) |
1018 | { |
1019 | char c; |
1020 | callback_data_t *stream = data; |
1021 | |
1022 | if(stream->pos >= stream->len) { |
1023 | stream->pos = 0; |
1024 | stream->len = stream->callback(stream->data, MAX_BUF_LEN, stream->arg); |
1025 | if(stream->len == 0 || stream->len == (size_t)-1) |
1026 | return EOF; |
1027 | } |
1028 | |
1029 | c = stream->data[stream->pos]; |
1030 | stream->pos++; |
1031 | return (unsigned char)c; |
1032 | } |
1033 | |
1034 | json_t *json_load_callback(json_load_callback_t callback, void *arg, size_t flags, json_error_t *error) |
1035 | { |
1036 | lex_t lex; |
1037 | json_t *result; |
1038 | |
1039 | callback_data_t stream_data; |
1040 | |
1041 | memset(&stream_data, 0, sizeof(stream_data)); |
1042 | stream_data.callback = callback; |
1043 | stream_data.arg = arg; |
1044 | |
1045 | jsonp_error_init(error, "<callback>" ); |
1046 | |
1047 | if (callback == NULL) { |
1048 | error_set(error, NULL, "wrong arguments" ); |
1049 | return NULL; |
1050 | } |
1051 | |
1052 | if(lex_init(&lex, (get_func)callback_get, &stream_data)) |
1053 | return NULL; |
1054 | |
1055 | result = parse_json(&lex, flags, error); |
1056 | |
1057 | lex_close(&lex); |
1058 | return result; |
1059 | } |
1060 | |