1#define _POSIX_C_SOURCE 200112L
2#include <stdio.h>
3#include <stdbool.h>
4#include <stdlib.h>
5#include <string.h>
6#include <ctype.h>
7#include <errno.h>
8#include "pdjson.h"
9
10#define JSON_FLAG_ERROR (1u << 0)
11#define JSON_FLAG_STREAMING (1u << 1)
12
13#define json_error(json, format, ...) \
14 if (!(json->flags & JSON_FLAG_ERROR)) { \
15 json->flags |= JSON_FLAG_ERROR; \
16 snprintf(json->errmsg, sizeof(json->errmsg), \
17 "error: %lu: " format, \
18 (unsigned long) json->lineno, \
19 __VA_ARGS__); \
20 } \
21
22#define STACK_INC 4
23
24#if defined(_MSC_VER) || defined(__MINGW32__)
25#define strerror_r(err, buf, len) strerror_s(buf, len, err)
26#endif
27
28const char *json_typename[] = {
29 [JSON_ERROR] = "ERROR",
30 [JSON_DONE] = "DONE",
31 [JSON_OBJECT] = "OBJECT",
32 [JSON_OBJECT_END] = "OBJECT_END",
33 [JSON_ARRAY] = "ARRAY",
34 [JSON_ARRAY_END] = "ARRAY_END",
35 [JSON_STRING] = "STRING",
36 [JSON_NUMBER] = "NUMBER",
37 [JSON_TRUE] = "TRUE",
38 [JSON_FALSE] = "FALSE",
39 [JSON_NULL] = "NULL",
40};
41
42struct json_stack {
43 enum json_type type;
44 long count;
45};
46
47static void json_error_s(json_stream *json, int err)
48{
49 char errbuf[1024] = {0};
50 strerror_r(err, errbuf, sizeof(errbuf));
51 json_error(json, "%s", errbuf);
52}
53
54static enum json_type
55push(json_stream *json, enum json_type type)
56{
57 json->stack_top++;
58
59 if (json->stack_top >= json->stack_size) {
60 struct json_stack *stack;
61 stack = json->alloc.realloc(json->stack,
62 (json->stack_size + STACK_INC) * sizeof(*json->stack));
63 if (stack == NULL) {
64 json_error_s(json, errno);
65 return JSON_ERROR;
66 }
67
68 json->stack_size += STACK_INC;
69 json->stack = stack;
70 }
71
72 json->stack[json->stack_top].type = type;
73 json->stack[json->stack_top].count = 0;
74
75 return type;
76}
77
78static enum json_type
79pop(json_stream *json, int c, enum json_type expected)
80{
81 if (json->stack == NULL || json->stack[json->stack_top].type != expected) {
82 json_error(json, "unexpected byte, '%c'", c);
83 return JSON_ERROR;
84 }
85 json->stack_top--;
86 return expected == JSON_ARRAY ? JSON_ARRAY_END : JSON_OBJECT_END;
87}
88
89static int buffer_peek(struct json_source *source)
90{
91 if (source->position < source->source.buffer.length)
92 return source->source.buffer.buffer[source->position];
93 else
94 return EOF;
95}
96
97static int buffer_get(struct json_source *source)
98{
99 int c = source->peek(source);
100 source->position++;
101 return c;
102}
103
104static int stream_get(struct json_source *source)
105{
106 source->position++;
107 return fgetc(source->source.stream.stream);
108}
109
110static int stream_peek(struct json_source *source)
111{
112 int c = fgetc(source->source.stream.stream);
113 ungetc(c, source->source.stream.stream);
114 return c;
115}
116
117static void init(json_stream *json)
118{
119 json->lineno = 1;
120 json->flags = JSON_FLAG_STREAMING;
121 json->errmsg[0] = '\0';
122 json->ntokens = 0;
123 json->next = 0;
124
125 json->stack = NULL;
126 json->stack_top = (size_t)(-1);
127 json->stack_size = 0;
128
129 json->data.string = NULL;
130 json->data.string_size = 0;
131 json->data.string_fill = 0;
132 json->source.position = 0;
133
134 json->alloc.malloc = malloc;
135 json->alloc.realloc = realloc;
136 json->alloc.free = free;
137}
138
139static enum json_type
140is_match(json_stream *json, const char *pattern, enum json_type type)
141{
142 for (const char *p = pattern; *p; p++)
143 if (*p != json->source.get(&json->source))
144 return JSON_ERROR;
145 return type;
146}
147
148static int pushchar(json_stream *json, int c)
149{
150 if (json->data.string_fill == json->data.string_size) {
151 size_t size = json->data.string_size * 2;
152 char *buffer = json->alloc.realloc(json->data.string, size);
153 if (buffer == NULL) {
154 json_error_s(json, errno);
155 return -1;
156 } else {
157 json->data.string_size = size;
158 json->data.string = buffer;
159 }
160 }
161 json->data.string[json->data.string_fill++] = (char)(c);
162 return 0;
163}
164
165static int init_string(json_stream *json)
166{
167 json->data.string_fill = 0;
168 if (json->data.string == NULL) {
169 json->data.string_size = 1024;
170 json->data.string = json->alloc.malloc(json->data.string_size);
171 if (json->data.string == NULL) {
172 json_error_s(json, errno);
173 return -1;
174 }
175 }
176 json->data.string[0] = '\0';
177 return 0;
178}
179
180static int encode_utf8(json_stream *json, unsigned long c)
181{
182 if (c < 0x80UL) {
183 return pushchar(json, c);
184 } else if (c < 0x0800UL) {
185 return !((pushchar(json, (c >> 6 & 0x1F) | 0xC0) == 0) &&
186 (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
187 } else if (c < 0x010000UL) {
188 if (c >= 0xd800 && c <= 0xdfff) {
189 json_error(json, "invalid codepoint %06lx", c);
190 return -1;
191 }
192 return !((pushchar(json, (c >> 12 & 0x0F) | 0xE0) == 0) &&
193 (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) &&
194 (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
195 } else if (c < 0x110000UL) {
196 return !((pushchar(json, (c >> 18 & 0x07) | 0xF0) == 0) &&
197 (pushchar(json, (c >> 12 & 0x3F) | 0x80) == 0) &&
198 (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) &&
199 (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
200 } else {
201 json_error(json, "can't encode UTF-8 for %06lx", c);
202 return -1;
203 }
204}
205
206static int hexchar(int c)
207{
208 switch (c) {
209 case '0': return 0;
210 case '1': return 1;
211 case '2': return 2;
212 case '3': return 3;
213 case '4': return 4;
214 case '5': return 5;
215 case '6': return 6;
216 case '7': return 7;
217 case '8': return 8;
218 case '9': return 9;
219 case 'a':
220 case 'A': return 10;
221 case 'b':
222 case 'B': return 11;
223 case 'c':
224 case 'C': return 12;
225 case 'd':
226 case 'D': return 13;
227 case 'e':
228 case 'E': return 14;
229 case 'f':
230 case 'F': return 15;
231 default:
232 return -1;
233 }
234}
235
236static long
237read_unicode_cp(json_stream *json)
238{
239 long cp = 0;
240 int shift = 12;
241
242 for (size_t i = 0; i < 4; i++) {
243 int c = json->source.get(&json->source);
244 int hc;
245
246 if (c == EOF) {
247 json_error(json, "%s", "unterminated string literal in unicode");
248 return -1;
249 } else if ((hc = hexchar(c)) == -1) {
250 json_error(json, "bad escape unicode byte, '%c'", c);
251 return -1;
252 }
253
254 cp += hc * (1 << shift);
255 shift -= 4;
256 }
257
258
259 return cp;
260}
261
262static int read_unicode(json_stream *json)
263{
264 long cp, h, l;
265
266 if ((cp = read_unicode_cp(json)) == -1) {
267 return -1;
268 }
269
270 if (cp >= 0xd800 && cp <= 0xdbff) {
271 /* This is the high portion of a surrogate pair; we need to read the
272 * lower portion to get the codepoint
273 */
274 h = cp;
275
276 int c = json->source.get(&json->source);
277 if (c == EOF) {
278 json_error(json, "%s", "unterminated string literal in unicode");
279 return -1;
280 } else if (c != '\\') {
281 json_error(json, "invalid continuation for surrogate pair: '%c', "
282 "expected '\\'", c);
283 return -1;
284 }
285
286 c = json->source.get(&json->source);
287 if (c == EOF) {
288 json_error(json, "%s", "unterminated string literal in unicode");
289 return -1;
290 } else if (c != 'u') {
291 json_error(json, "invalid continuation for surrogate pair: '%c', "
292 "expected 'u'", c);
293 return -1;
294 }
295
296 if ((l = read_unicode_cp(json)) == -1) {
297 return -1;
298 }
299
300 if (l < 0xdc00 || l > 0xdfff) {
301 json_error(json, "invalid surrogate pair continuation \\u%04lx out "
302 "of range (dc00-dfff)", l);
303 return -1;
304 }
305
306 cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000);
307 } else if (cp >= 0xdc00 && cp <= 0xdfff) {
308 json_error(json, "dangling surrogate \\u%04lx", cp);
309 return -1;
310 }
311
312 return encode_utf8(json, cp);
313}
314
315int read_escaped(json_stream *json)
316{
317 int c = json->source.get(&json->source);
318 if (c == EOF) {
319 json_error(json, "%s", "unterminated string literal in escape");
320 return -1;
321 } else if (c == 'u') {
322 if (read_unicode(json) != 0)
323 return -1;
324 } else {
325 switch (c) {
326 case '\\':
327 case 'b':
328 case 'f':
329 case 'n':
330 case 'r':
331 case 't':
332 case '/':
333 case '"':
334 {
335 const char *codes = "\\bfnrt/\"";
336 char *p = strchr(codes, c);
337 if (pushchar(json, "\\\b\f\n\r\t/\""[p - codes]) != 0)
338 return -1;
339 }
340 break;
341 default:
342 json_error(json, "bad escaped byte, '%c'", c);
343 return -1;
344 }
345 }
346 return 0;
347}
348
349static int
350char_needs_escaping(int c)
351{
352 if ((c >= 0) && (c < 0x20 || c == 0x22 || c == 0x5c)) {
353 return 1;
354 }
355
356 return 0;
357}
358
359static int
360utf8_seq_length(char byte)
361{
362 unsigned char u = (unsigned char) byte;
363 if (u < 0x80) return 1;
364
365 if (0x80 <= u && u <= 0xBF)
366 {
367 // second, third or fourth byte of a multi-byte
368 // sequence, i.e. a "continuation byte"
369 return 0;
370 }
371 else if (u == 0xC0 || u == 0xC1)
372 {
373 // overlong encoding of an ASCII byte
374 return 0;
375 }
376 else if (0xC2 <= u && u <= 0xDF)
377 {
378 // 2-byte sequence
379 return 2;
380 }
381 else if (0xE0 <= u && u <= 0xEF)
382 {
383 // 3-byte sequence
384 return 3;
385 }
386 else if (0xF0 <= u && u <= 0xF4)
387 {
388 // 4-byte sequence
389 return 4;
390 }
391 else
392 {
393 // u >= 0xF5
394 // Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8
395 return 0;
396 }
397}
398
399static int
400is_legal_utf8(const unsigned char *bytes, int length)
401{
402 if (0 == bytes || 0 == length) return 0;
403
404 unsigned char a;
405 const unsigned char* srcptr = bytes + length;
406 switch (length)
407 {
408 default:
409 return 0;
410 // Everything else falls through when true.
411 case 4:
412 if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
413 case 3:
414 if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
415 case 2:
416 a = (*--srcptr);
417 switch (*bytes)
418 {
419 case 0xE0:
420 if (a < 0xA0 || a > 0xBF) return 0;
421 break;
422 case 0xED:
423 if (a < 0x80 || a > 0x9F) return 0;
424 break;
425 case 0xF0:
426 if (a < 0x90 || a > 0xBF) return 0;
427 break;
428 case 0xF4:
429 if (a < 0x80 || a > 0x8F) return 0;
430 break;
431 default:
432 if (a < 0x80 || a > 0xBF) return 0;
433 }
434 case 1:
435 if (*bytes >= 0x80 && *bytes < 0xC2) return 0;
436 }
437 return *bytes <= 0xF4;
438}
439
440static int
441read_utf8(json_stream* json, int next_char)
442{
443 int count = utf8_seq_length((char)(next_char));
444 if (!count)
445 {
446 json_error(json, "%s", "Bad character.");
447 return -1;
448 }
449
450 char buffer[4];
451 buffer[0] = (char)(next_char);
452 for (int i = 1; i < count; ++i)
453 {
454 buffer[i] = (char)(json->source.get(&json->source));
455 }
456
457 if (!is_legal_utf8((unsigned char*) buffer, count))
458 {
459 json_error(json, "%s", "No legal UTF8 found");
460 return -1;
461 }
462
463 for (int i = 0; i < count; ++i)
464 {
465 if (pushchar(json, buffer[i]) != 0)
466 return -1;
467 }
468 return 0;
469}
470
471static enum json_type
472read_string(json_stream *json)
473{
474 if (init_string(json) != 0)
475 return JSON_ERROR;
476 while (1) {
477 int c = json->source.get(&json->source);
478 if (c == EOF) {
479 json_error(json, "%s", "unterminated string literal");
480 return JSON_ERROR;
481 } else if (c == '"') {
482 if (pushchar(json, '\0') == 0)
483 return JSON_STRING;
484 else
485 return JSON_ERROR;
486 } else if (c == '\\') {
487 if (read_escaped(json) != 0)
488 return JSON_ERROR;
489 } else if ((unsigned) c >= 0x80) {
490 if (read_utf8(json, c) != 0)
491 return JSON_ERROR;
492 } else {
493 if (char_needs_escaping(c)) {
494 json_error(json, "%s", "unescaped control character in string");
495 return JSON_ERROR;
496 }
497
498 if (pushchar(json, c) != 0)
499 return JSON_ERROR;
500 }
501 }
502 return JSON_ERROR;
503}
504
505static int
506is_digit(int c)
507{
508 return c >= 48 /*0*/ && c <= 57 /*9*/;
509}
510
511static int
512read_digits(json_stream *json)
513{
514 unsigned nread = 0;
515 while (is_digit(json->source.peek(&json->source))) {
516 if (pushchar(json, json->source.get(&json->source)) != 0)
517 return -1;
518
519 nread++;
520 }
521
522 if (nread == 0) {
523 return -1;
524 }
525
526 return 0;
527}
528
529static enum json_type
530read_number(json_stream *json, int c)
531{
532 if (pushchar(json, c) != 0)
533 return JSON_ERROR;
534 if (c == '-') {
535 c = json->source.get(&json->source);
536 if (is_digit(c)) {
537 return read_number(json, c);
538 } else {
539 json_error(json, "unexpected byte, '%c'", c);
540 }
541 } else if (strchr("123456789", c) != NULL) {
542 c = json->source.peek(&json->source);
543 if (is_digit(c)) {
544 if (read_digits(json) != 0)
545 return JSON_ERROR;
546 }
547 }
548 /* Up to decimal or exponent has been read. */
549 c = json->source.peek(&json->source);
550 if (strchr(".eE", c) == NULL) {
551 if (pushchar(json, '\0') != 0)
552 return JSON_ERROR;
553 else
554 return JSON_NUMBER;
555 }
556 if (c == '.') {
557 json->source.get(&json->source); // consume .
558 if (pushchar(json, c) != 0)
559 return JSON_ERROR;
560 if (read_digits(json) != 0)
561 return JSON_ERROR;
562 }
563 /* Check for exponent. */
564 c = json->source.peek(&json->source);
565 if (c == 'e' || c == 'E') {
566 json->source.get(&json->source); // consume e/E
567 if (pushchar(json, c) != 0)
568 return JSON_ERROR;
569 c = json->source.peek(&json->source);
570 if (c == '+' || c == '-') {
571 json->source.get(&json->source); // consume
572 if (pushchar(json, c) != 0)
573 return JSON_ERROR;
574 if (read_digits(json) != 0)
575 return JSON_ERROR;
576 } else if (is_digit(c)) {
577 if (read_digits(json) != 0)
578 return JSON_ERROR;
579 } else {
580 json_error(json, "unexpected byte in number, '%c'", c);
581 return JSON_ERROR;
582 }
583 }
584 if (pushchar(json, '\0') != 0)
585 return JSON_ERROR;
586 else
587 return JSON_NUMBER;
588}
589
590static int
591json_isspace(int c)
592{
593 switch (c) {
594 case 0x09:
595 case 0x0a:
596 case 0x0d:
597 case 0x20:
598 return 1;
599 }
600
601 return 0;
602}
603
604/* Returns the next non-whitespace character in the stream. */
605static int next(json_stream *json)
606{
607 int c;
608 while (json_isspace(c = json->source.get(&json->source)))
609 if (c == '\n')
610 json->lineno++;
611 return c;
612}
613
614static enum json_type
615read_value(json_stream *json, int c)
616{
617 json->ntokens++;
618 switch (c) {
619 case EOF:
620 json_error(json, "%s", "unexpected end of data");
621 return JSON_ERROR;
622 case '{':
623 return push(json, JSON_OBJECT);
624 case '[':
625 return push(json, JSON_ARRAY);
626 case '"':
627 return read_string(json);
628 case 'n':
629 return is_match(json, "ull", JSON_NULL);
630 case 'f':
631 return is_match(json, "alse", JSON_FALSE);
632 case 't':
633 return is_match(json, "rue", JSON_TRUE);
634 case '0':
635 case '1':
636 case '2':
637 case '3':
638 case '4':
639 case '5':
640 case '6':
641 case '7':
642 case '8':
643 case '9':
644 case '-':
645 if (init_string(json) != 0)
646 return JSON_ERROR;
647 return read_number(json, c);
648 default:
649 json_error(json, "unexpected byte, '%c'", c);
650 return JSON_ERROR;
651 }
652}
653
654enum json_type json_peek(json_stream *json)
655{
656 enum json_type next = json_next(json);
657 json->next = next;
658 return next;
659}
660
661enum json_type json_next(json_stream *json)
662{
663 if (json->flags & JSON_FLAG_ERROR)
664 return JSON_ERROR;
665 if (json->next != 0) {
666 enum json_type next = json->next;
667 json->next = 0;
668 return next;
669 }
670 if (json->ntokens > 0 && json->stack_top == (size_t)-1) {
671 int c;
672
673 do {
674 c = json->source.peek(&json->source);
675 if (json_isspace(c)) {
676 c = json->source.get(&json->source);
677 }
678 } while (json_isspace(c));
679
680 if (!(json->flags & JSON_FLAG_STREAMING) && c != EOF) {
681 return JSON_ERROR;
682 }
683
684 return JSON_DONE;
685 }
686 int c = next(json);
687 if (json->stack_top == (size_t)-1)
688 return read_value(json, c);
689 if (json->stack[json->stack_top].type == JSON_ARRAY) {
690 if (json->stack[json->stack_top].count == 0) {
691 if (c == ']') {
692 return pop(json, c, JSON_ARRAY);
693 }
694 json->stack[json->stack_top].count++;
695 return read_value(json, c);
696 } else if (c == ',') {
697 json->stack[json->stack_top].count++;
698 return read_value(json, next(json));
699 } else if (c == ']') {
700 return pop(json, c, JSON_ARRAY);
701 } else {
702 json_error(json, "unexpected byte, '%c'", c);
703 return JSON_ERROR;
704 }
705 } else if (json->stack[json->stack_top].type == JSON_OBJECT) {
706 if (json->stack[json->stack_top].count == 0) {
707 if (c == '}') {
708 return pop(json, c, JSON_OBJECT);
709 }
710
711 /* No property value pairs yet. */
712 enum json_type value = read_value(json, c);
713 if (value != JSON_STRING) {
714 json_error(json, "%s", "expected property name or '}'");
715 return JSON_ERROR;
716 } else {
717 json->stack[json->stack_top].count++;
718 return value;
719 }
720 } else if ((json->stack[json->stack_top].count % 2) == 0) {
721 /* Expecting comma followed by property name. */
722 if (c != ',' && c != '}') {
723 json_error(json, "%s", "expected ',' or '}'");
724 return JSON_ERROR;
725 } else if (c == '}') {
726 return pop(json, c, JSON_OBJECT);
727 } else {
728 enum json_type value = read_value(json, next(json));
729 if (value != JSON_STRING) {
730 json_error(json, "%s", "expected property name");
731 return JSON_ERROR;
732 } else {
733 json->stack[json->stack_top].count++;
734 return value;
735 }
736 }
737 } else if ((json->stack[json->stack_top].count % 2) == 1) {
738 /* Expecting colon followed by value. */
739 if (c != ':') {
740 json_error(json, "%s", "expected ':' after property name");
741 return JSON_ERROR;
742 } else {
743 json->stack[json->stack_top].count++;
744 return read_value(json, next(json));
745 }
746 }
747 }
748 json_error(json, "%s", "invalid parser state");
749 return JSON_ERROR;
750}
751
752void json_reset(json_stream *json)
753{
754 json->stack_top = (size_t)(-1);
755 json->ntokens = 0;
756 json->flags &= ~JSON_FLAG_ERROR;
757 json->errmsg[0] = '\0';
758}
759
760const char *json_get_string(json_stream *json, size_t *length)
761{
762 if (length != NULL)
763 *length = json->data.string_fill;
764 if (json->data.string == NULL)
765 return "";
766 else
767 return json->data.string;
768}
769
770double json_get_number(json_stream *json)
771{
772 char *p = json->data.string;
773 return p == NULL ? 0 : strtod(p, NULL);
774}
775
776const char *json_get_error(json_stream *json)
777{
778 return json->flags & JSON_FLAG_ERROR ? json->errmsg : NULL;
779}
780
781size_t json_get_lineno(json_stream *json)
782{
783 return json->lineno;
784}
785
786size_t json_get_position(json_stream *json)
787{
788 return json->source.position;
789}
790
791size_t json_get_depth(json_stream *json)
792{
793 return json->stack_top + 1;
794}
795
796void json_open_buffer(json_stream *json, const void *buffer, size_t size)
797{
798 init(json);
799 json->source.get = buffer_get;
800 json->source.peek = buffer_peek;
801 json->source.source.buffer.buffer = buffer;
802 json->source.source.buffer.length = size;
803}
804
805void json_open_string(json_stream *json, const char *string)
806{
807 json_open_buffer(json, string, strlen(string));
808}
809
810void json_open_stream(json_stream *json, FILE * stream)
811{
812 init(json);
813 json->source.get = stream_get;
814 json->source.peek = stream_peek;
815 json->source.source.stream.stream = stream;
816}
817
818static int user_get(struct json_source *json)
819{
820 return json->source.user.get(json->source.user.ptr);
821}
822
823static int user_peek(struct json_source *json)
824{
825 return json->source.user.peek(json->source.user.ptr);
826}
827
828void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user)
829{
830 init(json);
831 json->source.get = user_get;
832 json->source.peek = user_peek;
833 json->source.source.user.ptr = user;
834 json->source.source.user.get = get;
835 json->source.source.user.peek = peek;
836}
837
838void json_set_allocator(json_stream *json, json_allocator *a)
839{
840 json->alloc = *a;
841}
842
843void json_set_streaming(json_stream *json, bool streaming)
844{
845 if (streaming)
846 json->flags |= JSON_FLAG_STREAMING;
847 else
848 json->flags &= ~JSON_FLAG_STREAMING;
849}
850
851void json_close(json_stream *json)
852{
853 json->alloc.free(json->stack);
854 json->alloc.free(json->data.string);
855}
856