1#include <stdio.h>
2#include <stdbool.h>
3#include <stdlib.h>
4#include <string.h>
5#include <ctype.h>
6#include <errno.h>
7#include "pd_json.h"
8
9#define json_error(json, format, ...) \
10 if (!json->error) { \
11 json->error = 1; \
12 snprintf(json->errmsg, sizeof(json->errmsg), \
13 "error: %lu: " format, \
14 (unsigned long) json->lineno, \
15 __VA_ARGS__); \
16 } \
17
18#define STACK_INC 4
19
20static enum json_type
21push(json_stream *json, enum json_type type)
22{
23 json->stack_top++;
24
25 if (json->stack_top >= json->stack_size) {
26 struct json_stack *stack;
27 stack = json->alloc.realloc(json->stack,
28 (json->stack_size + STACK_INC) * sizeof(*json->stack));
29 if (stack == NULL) {
30 json_error(json, "%s", strerror(errno));
31 return JSON_ERROR;
32 }
33
34 json->stack_size += STACK_INC;
35 json->stack = stack;
36 }
37
38 json->stack[json->stack_top].type = type;
39 json->stack[json->stack_top].count = 0;
40
41 return type;
42}
43
44static enum json_type
45pop(json_stream *json, int c, enum json_type expected)
46{
47 if (json->stack == NULL || json->stack[json->stack_top].type != expected) {
48 json_error(json, "unexpected byte, '%c'", c);
49 json->alloc.free(json->stack);
50 return JSON_ERROR;
51 }
52 json->stack_top--;
53 return expected == JSON_ARRAY ? JSON_ARRAY_END : JSON_OBJECT_END;
54}
55
56static void pop_all(json_stream *json)
57{
58 json->alloc.free(json->stack);
59}
60
61static int buffer_peek(struct json_source *source)
62{
63 if (source->position < source->source.buffer.length)
64 return source->source.buffer.buffer[source->position];
65 else
66 return EOF;
67}
68
69static int buffer_get(struct json_source *source)
70{
71 int c = source->peek(source);
72 source->position++;
73 return c;
74}
75
76static int stream_get(struct json_source *source)
77{
78 source->position++;
79 return fgetc(source->source.stream.stream);
80}
81
82static int stream_peek(struct json_source *source)
83{
84 int c = fgetc(source->source.stream.stream);
85 ungetc(c, source->source.stream.stream);
86 return c;
87}
88
89static void init(json_stream *json)
90{
91 json->lineno = 1;
92 json->error = 0;
93 json->errmsg[0] = '\0';
94 json->ntokens = 0;
95 json->next = 0;
96 json->streaming = true;
97
98 json->stack = NULL;
99 json->stack_top = -1;
100 json->stack_size = 0;
101
102 json->data.string = NULL;
103 json->data.string_size = 0;
104 json->data.string_fill = 0;
105 json->source.position = 0;
106
107 json->alloc.malloc = malloc;
108 json->alloc.realloc = realloc;
109 json->alloc.free = free;
110}
111
112static enum json_type
113is_match(json_stream *json, const char *pattern, enum json_type type)
114{
115 for (const char *p = pattern; *p; p++)
116 if (*p != json->source.get(&json->source))
117 return JSON_ERROR;
118 return type;
119}
120
121static int pushchar(json_stream *json, int c)
122{
123 if (json->data.string_fill == json->data.string_size) {
124 size_t size = json->data.string_size * 2;
125 char *buffer = json->alloc.realloc(json->data.string, size);
126 if (buffer == NULL) {
127 json_error(json, "%s", strerror(errno));
128 return -1;
129 } else {
130 json->data.string_size = size;
131 json->data.string = buffer;
132 }
133 }
134 json->data.string[json->data.string_fill++] = c;
135 return 0;
136}
137
138static int init_string(json_stream *json)
139{
140 json->data.string_fill = 0;
141 if (json->data.string == NULL) {
142 json->data.string_size = 1024;
143 json->data.string = json->alloc.malloc(json->data.string_size);
144 if (json->data.string == NULL) {
145 json_error(json, "%s", strerror(errno));
146 return -1;
147 }
148 }
149 json->data.string[0] = '\0';
150 return 0;
151}
152
153static int encode_utf8(json_stream *json, unsigned long c)
154{
155 if (c < 0x80UL) {
156 return pushchar(json, c);
157 } else if (c < 0x0800UL) {
158 return !((pushchar(json, (c >> 6 & 0x1F) | 0xC0) == 0) &&
159 (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
160 } else if (c < 0x010000UL) {
161 if (c >= 0xd800 && c <= 0xdfff) {
162 json_error(json, "invalid codepoint %06lx", c);
163 return -1;
164 }
165 return !((pushchar(json, (c >> 12 & 0x0F) | 0xE0) == 0) &&
166 (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) &&
167 (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
168 } else if (c < 0x110000UL) {
169 return !((pushchar(json, (c >> 18 & 0x07) | 0xF0) == 0) &&
170 (pushchar(json, (c >> 12 & 0x3F) | 0x80) == 0) &&
171 (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) &&
172 (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
173 } else {
174 json_error(json, "can't encode UTF-8 for %06lx", c);
175 return -1;
176 }
177}
178
179static int hexchar(int c)
180{
181 switch (c) {
182 case '0': return 0;
183 case '1': return 1;
184 case '2': return 2;
185 case '3': return 3;
186 case '4': return 4;
187 case '5': return 5;
188 case '6': return 6;
189 case '7': return 7;
190 case '8': return 8;
191 case '9': return 9;
192 case 'a':
193 case 'A': return 10;
194 case 'b':
195 case 'B': return 11;
196 case 'c':
197 case 'C': return 12;
198 case 'd':
199 case 'D': return 13;
200 case 'e':
201 case 'E': return 14;
202 case 'f':
203 case 'F': return 15;
204 default:
205 return -1;
206 }
207}
208
209static long
210read_unicode_cp(json_stream *json)
211{
212 long cp = 0;
213 int shift = 12;
214
215 for (size_t i = 0; i < 4; i++) {
216 int c = json->source.get(&json->source);
217 int hc;
218
219 if (c == EOF) {
220 json_error(json, "%s", "unterminated string literal in unicode");
221 return -1;
222 } else if ((hc = hexchar(c)) == -1) {
223 json_error(json, "bad escape unicode byte, '%c'", c);
224 return -1;
225 }
226
227 cp += hc * (1 << shift);
228 shift -= 4;
229 }
230
231
232 return cp;
233}
234
235static int read_unicode(json_stream *json)
236{
237 long cp, h, l;
238
239 if ((cp = read_unicode_cp(json)) == -1) {
240 return -1;
241 }
242
243 if (cp >= 0xd800 && cp <= 0xdbff) {
244 /* This is the high portion of a surrogate pair; we need to read the
245 * lower portion to get the codepoint
246 */
247 h = cp;
248
249 int c = json->source.get(&json->source);
250 if (c == EOF) {
251 json_error(json, "%s", "unterminated string literal in unicode");
252 return -1;
253 } else if (c != '\\') {
254 json_error(json, "invalid continuation for surrogate pair: '%c', "
255 "expected '\\'", c);
256 return -1;
257 }
258
259 c = json->source.get(&json->source);
260 if (c == EOF) {
261 json_error(json, "%s", "unterminated string literal in unicode");
262 return -1;
263 } else if (c != 'u') {
264 json_error(json, "invalid continuation for surrogate pair: '%c', "
265 "expected 'u'", c);
266 return -1;
267 }
268
269 if ((l = read_unicode_cp(json)) == -1) {
270 return -1;
271 }
272
273 if (l < 0xdc00 || l > 0xdfff) {
274 json_error(json, "invalid surrogate pair continuation \\u%04lx out "
275 "of range (dc00-dfff)", l);
276 return -1;
277 }
278
279 cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000);
280 } else if (cp >= 0xdc00 && cp <= 0xdfff) {
281 json_error(json, "dangling surrogate \\u%04lx", cp);
282 return -1;
283 }
284
285 return encode_utf8(json, cp);
286}
287
288int read_escaped(json_stream *json)
289{
290 int c = json->source.get(&json->source);
291 if (c == EOF) {
292 json_error(json, "%s", "unterminated string literal in escape");
293 return -1;
294 } else if (c == 'u') {
295 if (read_unicode(json) != 0)
296 return -1;
297 } else {
298 switch (c) {
299 case '\\':
300 case 'b':
301 case 'f':
302 case 'n':
303 case 'r':
304 case 't':
305 case '/':
306 case '"':
307 {
308 const char *codes = "\\bfnrt/\"";
309 char *p = strchr(codes, c);
310 if (pushchar(json, "\\\b\f\n\r\t/\""[p - codes]) != 0)
311 return -1;
312 }
313 break;
314 default:
315 json_error(json, "bad escaped byte, '%c'", c);
316 return -1;
317 }
318 }
319 return 0;
320}
321
322static int
323char_needs_escaping(int c)
324{
325 if ((c >= 0) && (c < 0x20 || c == 0x22 || c == 0x5c)) {
326 return 1;
327 }
328
329 return 0;
330}
331
332static int
333utf8_seq_length(char byte)
334{
335 unsigned char u = (unsigned char) byte;
336 if (u < 0x80) return 1;
337
338 if (0x80 <= u && u <= 0xBF)
339 {
340 // second, third or fourth byte of a multi-byte
341 // sequence, i.e. a "continuation byte"
342 return 0;
343 }
344 else if (u == 0xC0 || u == 0xC1)
345 {
346 // overlong encoding of an ASCII byte
347 return 0;
348 }
349 else if (0xC2 <= u && u <= 0xDF)
350 {
351 // 2-byte sequence
352 return 2;
353 }
354 else if (0xE0 <= u && u <= 0xEF)
355 {
356 // 3-byte sequence
357 return 3;
358 }
359 else if (0xF0 <= u && u <= 0xF4)
360 {
361 // 4-byte sequence
362 return 4;
363 }
364 else
365 {
366 // u >= 0xF5
367 // Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8
368 return 0;
369 }
370}
371
372static int
373is_legal_utf8(const unsigned char *bytes, int length)
374{
375 if (0 == bytes || 0 == length) return 0;
376
377 unsigned char a;
378 const unsigned char* srcptr = bytes + length;
379 switch (length)
380 {
381 default:
382 return 0;
383 // Everything else falls through when true.
384 case 4:
385 if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
386 case 3:
387 if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
388 case 2:
389 a = (*--srcptr);
390 switch (*bytes)
391 {
392 case 0xE0:
393 if (a < 0xA0 || a > 0xBF) return 0;
394 break;
395 case 0xED:
396 if (a < 0x80 || a > 0x9F) return 0;
397 break;
398 case 0xF0:
399 if (a < 0x90 || a > 0xBF) return 0;
400 break;
401 case 0xF4:
402 if (a < 0x80 || a > 0x8F) return 0;
403 break;
404 default:
405 if (a < 0x80 || a > 0xBF) return 0;
406 }
407 case 1:
408 if (*bytes >= 0x80 && *bytes < 0xC2) return 0;
409 }
410 return *bytes <= 0xF4;
411}
412
413static int
414read_utf8(json_stream* json, int next_char)
415{
416 int count = utf8_seq_length(next_char);
417 if (!count)
418 {
419 json_error(json, "%s", "Bad character.");
420 return -1;
421 }
422
423 char buffer[4];
424 buffer[0] = next_char;
425 for (int i = 1; i < count; ++i)
426 {
427 buffer[i] = json->source.get(&json->source);;
428 }
429
430 if (!is_legal_utf8((unsigned char*) buffer, count))
431 {
432 json_error(json, "%s", "No legal UTF8 found");
433 return -1;
434 }
435
436 for (int i = 0; i < count; ++i)
437 {
438 if (pushchar(json, buffer[i]) != 0)
439 return -1;
440 }
441 return 0;
442}
443
444static enum json_type
445read_string(json_stream *json)
446{
447 if (init_string(json) != 0)
448 return JSON_ERROR;
449 while (1) {
450 int c = json->source.get(&json->source);
451 if (c == EOF) {
452 json_error(json, "%s", "unterminated string literal");
453 return JSON_ERROR;
454 } else if (c == '"') {
455 if (pushchar(json, '\0') == 0)
456 return JSON_STRING;
457 else
458 return JSON_ERROR;
459 } else if (c == '\\') {
460 if (read_escaped(json) != 0)
461 return JSON_ERROR;
462 } else if ((unsigned) c >= 0x80) {
463 if (read_utf8(json, c) != 0)
464 return JSON_ERROR;
465 } else {
466 if (char_needs_escaping(c)) {
467 json_error(json, "%s:%u", "unescaped control character in string", (unsigned)c);
468 return JSON_ERROR;
469 }
470
471 if (pushchar(json, c) != 0)
472 return JSON_ERROR;
473 }
474 }
475 return JSON_ERROR;
476}
477
478static int
479is_digit(int c)
480{
481 return c >= 48 /*0*/ && c <= 57 /*9*/;
482}
483
484static int
485read_digits(json_stream *json)
486{
487 unsigned nread = 0;
488 while (is_digit(json->source.peek(&json->source))) {
489 if (pushchar(json, json->source.get(&json->source)) != 0)
490 return -1;
491
492 nread++;
493 }
494
495 if (nread == 0) {
496 return -1;
497 }
498
499 return 0;
500}
501
502static enum json_type
503read_number(json_stream *json, int c)
504{
505 if (pushchar(json, c) != 0)
506 return JSON_ERROR;
507 if (c == '-') {
508 c = json->source.get(&json->source);
509 if (is_digit(c)) {
510 return read_number(json, c);
511 } else {
512 json_error(json, "unexpected byte, '%c'", c);
513 }
514 } else if (strchr("123456789", c) != NULL) {
515 c = json->source.peek(&json->source);
516 if (is_digit(c)) {
517 if (read_digits(json) != 0)
518 return JSON_ERROR;
519 }
520 }
521 /* Up to decimal or exponent has been read. */
522 c = json->source.peek(&json->source);
523 if (strchr(".eE", c) == NULL) {
524 if (pushchar(json, '\0') != 0)
525 return JSON_ERROR;
526 else
527 return JSON_NUMBER;
528 }
529 if (c == '.') {
530 json->source.get(&json->source); // consume .
531 if (pushchar(json, c) != 0)
532 return JSON_ERROR;
533 if (read_digits(json) != 0)
534 return JSON_ERROR;
535 }
536 /* Check for exponent. */
537 c = json->source.peek(&json->source);
538 if (c == 'e' || c == 'E') {
539 json->source.get(&json->source); // consume e/E
540 if (pushchar(json, c) != 0)
541 return JSON_ERROR;
542 c = json->source.peek(&json->source);
543 if (c == '+' || c == '-') {
544 json->source.get(&json->source); // consume
545 if (pushchar(json, c) != 0)
546 return JSON_ERROR;
547 if (read_digits(json) != 0)
548 return JSON_ERROR;
549 } else if (is_digit(c)) {
550 if (read_digits(json) != 0)
551 return JSON_ERROR;
552 } else {
553 json_error(json, "unexpected byte in number, '%c'", c);
554 return JSON_ERROR;
555 }
556 }
557 if (pushchar(json, '\0') != 0)
558 return JSON_ERROR;
559 else
560 return JSON_NUMBER;
561}
562
563static int
564json_isspace(int c)
565{
566 switch (c) {
567 case 0x09:
568 case 0x0a:
569 case 0x0d:
570 case 0x20:
571 return 1;
572 }
573
574 return 0;
575}
576
577/* Returns the next non-whitespace character in the stream. */
578static int next(json_stream *json)
579{
580 int c;
581 while (json_isspace(c = json->source.get(&json->source)))
582 if (c == '\n')
583 json->lineno++;
584 return c;
585}
586
587static enum json_type
588read_value(json_stream *json, int c)
589{
590 json->ntokens++;
591 switch (c) {
592 case EOF:
593 json_error(json, "%s", "unexpected end of data");
594 return JSON_ERROR;
595 case '{':
596 return push(json, JSON_OBJECT);
597 case '[':
598 return push(json, JSON_ARRAY);
599 case '"':
600 return read_string(json);
601 case 'n':
602 return is_match(json, "ull", JSON_NULL);
603 case 'f':
604 return is_match(json, "alse", JSON_FALSE);
605 case 't':
606 return is_match(json, "rue", JSON_TRUE);
607 case '0':
608 case '1':
609 case '2':
610 case '3':
611 case '4':
612 case '5':
613 case '6':
614 case '7':
615 case '8':
616 case '9':
617 case '-':
618 if (init_string(json) != 0)
619 return JSON_ERROR;
620 return read_number(json, c);
621 default:
622 json_error(json, "unexpected byte, '%c'", c);
623 return JSON_ERROR;
624 }
625}
626
627enum json_type json_peek(json_stream *json)
628{
629 enum json_type next = json_next(json);
630 json->next = next;
631 return next;
632}
633
634enum json_type json_next(json_stream *json)
635{
636 if (json->error)
637 return JSON_ERROR;
638 if (json->next != 0) {
639 enum json_type next = json->next;
640 json->next = 0;
641 return next;
642 }
643 if (json->ntokens > 0 && json->stack_top == (size_t)-1) {
644 int c;
645
646 do {
647 c = json->source.peek(&json->source);
648 if (json_isspace(c)) {
649 c = json->source.get(&json->source);
650 }
651 } while (json_isspace(c));
652 if (!json->streaming && c != EOF) {
653 return JSON_ERROR;
654 }
655 return JSON_DONE;
656 }
657 int c = next(json);
658 if (json->stack == NULL)
659 return read_value(json, c);
660 if (json->stack[json->stack_top].type == JSON_ARRAY) {
661 if (json->stack[json->stack_top].count == 0) {
662 if (c == ']') {
663 return pop(json, c, JSON_ARRAY);
664 }
665 json->stack[json->stack_top].count++;
666 return read_value(json, c);
667 } else if (c == ',') {
668 json->stack[json->stack_top].count++;
669 return read_value(json, next(json));
670 } else if (c == ']') {
671 return pop(json, c, JSON_ARRAY);
672 } else {
673 json_error(json, "unexpected byte, '%c'", c);
674 return JSON_ERROR;
675 }
676 } else if (json->stack[json->stack_top].type == JSON_OBJECT) {
677 if (json->stack[json->stack_top].count == 0) {
678 if (c == '}') {
679 return pop(json, c, JSON_OBJECT);
680 }
681
682 /* No property value pairs yet. */
683 enum json_type value = read_value(json, c);
684 if (value != JSON_STRING) {
685 json_error(json, "%s", "expected property name or '}'");
686 return JSON_ERROR;
687 } else {
688 json->stack[json->stack_top].count++;
689 return value;
690 }
691 } else if ((json->stack[json->stack_top].count % 2) == 0) {
692 /* Expecting comma followed by property name. */
693 if (c != ',' && c != '}') {
694 json_error(json, "%s", "expected ',' or '}'");
695 return JSON_ERROR;
696 } else if (c == '}') {
697 return pop(json, c, JSON_OBJECT);
698 } else {
699 enum json_type value = read_value(json, next(json));
700 if (value != JSON_STRING) {
701 json_error(json, "%s", "expected property name");
702 return JSON_ERROR;
703 } else {
704 json->stack[json->stack_top].count++;
705 return value;
706 }
707 }
708 } else if ((json->stack[json->stack_top].count % 2) == 1) {
709 /* Expecting colon followed by value. */
710 if (c != ':') {
711 json_error(json, "%s", "expected ':' after property name");
712 return JSON_ERROR;
713 } else {
714 json->stack[json->stack_top].count++;
715 return read_value(json, next(json));
716 }
717 }
718 }
719 json_error(json, "%s", "invalid parser state");
720 return JSON_ERROR;
721}
722
723void json_reset(json_stream *json)
724{
725 pop_all(json);
726 json->ntokens = 0;
727 json->error = 0;
728 json->errmsg[0] = '\0';
729}
730
731const char *json_get_string(json_stream *json, size_t *length)
732{
733 if (length != NULL)
734 *length = json->data.string_fill;
735 if (json->data.string == NULL)
736 return "";
737 else
738 return json->data.string;
739}
740
741double json_get_number(json_stream *json)
742{
743 char *p = json->data.string;
744 return p == NULL ? 0 : strtod(p, NULL);
745}
746
747const char *json_get_error(json_stream *json)
748{
749 return json->error ? json->errmsg : NULL;
750}
751
752size_t json_get_lineno(json_stream *json)
753{
754 return json->lineno;
755}
756
757size_t json_get_position(json_stream *json)
758{
759 return json->source.position;
760}
761
762size_t json_get_depth(json_stream *json)
763{
764 return json->stack_top + 1;
765}
766
767void json_open_buffer(json_stream *json, const void *buffer, size_t size)
768{
769 init(json);
770 json->source.get = buffer_get;
771 json->source.peek = buffer_peek;
772 json->source.source.buffer.buffer = buffer;
773 json->source.source.buffer.length = size;
774}
775
776void json_open_string(json_stream *json, const char *string)
777{
778 json_open_buffer(json, string, strlen(string));
779}
780
781void json_open_stream(json_stream *json, FILE * stream)
782{
783 init(json);
784 json->source.get = stream_get;
785 json->source.peek = stream_peek;
786 json->source.source.stream.stream = stream;
787}
788
789void json_set_allocator(json_stream *json, json_allocator *a)
790{
791 json->alloc = *a;
792}
793
794void json_set_streaming(json_stream *json, bool streaming)
795{
796 json->streaming = streaming;
797}
798
799void json_close(json_stream *json)
800{
801 pop_all(json);
802 json->alloc.free(json->data.string);
803}
804