1 | /* |
2 | * This file is part of the MicroPython project, http://micropython.org/ |
3 | * |
4 | * The MIT License (MIT) |
5 | * |
6 | * Copyright (c) 2014-2019 Damien P. George |
7 | * |
8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
9 | * of this software and associated documentation files (the "Software"), to deal |
10 | * in the Software without restriction, including without limitation the rights |
11 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
12 | * copies of the Software, and to permit persons to whom the Software is |
13 | * furnished to do so, subject to the following conditions: |
14 | * |
15 | * The above copyright notice and this permission notice shall be included in |
16 | * all copies or substantial portions of the Software. |
17 | * |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
23 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
24 | * THE SOFTWARE. |
25 | */ |
26 | |
27 | #include <stdio.h> |
28 | |
29 | #include "py/objlist.h" |
30 | #include "py/objstringio.h" |
31 | #include "py/parsenum.h" |
32 | #include "py/runtime.h" |
33 | #include "py/stream.h" |
34 | |
35 | #if MICROPY_PY_UJSON |
36 | |
37 | STATIC mp_obj_t mod_ujson_dump(mp_obj_t obj, mp_obj_t stream) { |
38 | mp_get_stream_raise(stream, MP_STREAM_OP_WRITE); |
39 | mp_print_t print = {MP_OBJ_TO_PTR(stream), mp_stream_write_adaptor}; |
40 | mp_obj_print_helper(&print, obj, PRINT_JSON); |
41 | return mp_const_none; |
42 | } |
43 | STATIC MP_DEFINE_CONST_FUN_OBJ_2(mod_ujson_dump_obj, mod_ujson_dump); |
44 | |
45 | STATIC mp_obj_t mod_ujson_dumps(mp_obj_t obj) { |
46 | vstr_t vstr; |
47 | mp_print_t print; |
48 | vstr_init_print(&vstr, 8, &print); |
49 | mp_obj_print_helper(&print, obj, PRINT_JSON); |
50 | return mp_obj_new_str_from_vstr(&mp_type_str, &vstr); |
51 | } |
52 | STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps); |
53 | |
54 | // The function below implements a simple non-recursive JSON parser. |
55 | // |
56 | // The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt |
57 | // The parser here will parse any valid JSON and return the correct |
58 | // corresponding Python object. It allows through a superset of JSON, since |
59 | // it treats commas and colons as "whitespace", and doesn't care if |
60 | // brackets/braces are correctly paired. It will raise a ValueError if the |
61 | // input is outside it's specs. |
62 | // |
63 | // Most of the work is parsing the primitives (null, false, true, numbers, |
64 | // strings). It does 1 pass over the input stream. It tries to be fast and |
65 | // small in code size, while not using more RAM than necessary. |
66 | |
67 | typedef struct _ujson_stream_t { |
68 | mp_obj_t stream_obj; |
69 | mp_uint_t (*read)(mp_obj_t obj, void *buf, mp_uint_t size, int *errcode); |
70 | int errcode; |
71 | byte cur; |
72 | } ujson_stream_t; |
73 | |
74 | #define S_EOF (0) // null is not allowed in json stream so is ok as EOF marker |
75 | #define S_END(s) ((s).cur == S_EOF) |
76 | #define S_CUR(s) ((s).cur) |
77 | #define S_NEXT(s) (ujson_stream_next(&(s))) |
78 | |
79 | STATIC byte ujson_stream_next(ujson_stream_t *s) { |
80 | mp_uint_t ret = s->read(s->stream_obj, &s->cur, 1, &s->errcode); |
81 | if (s->errcode != 0) { |
82 | mp_raise_OSError(s->errcode); |
83 | } |
84 | if (ret == 0) { |
85 | s->cur = S_EOF; |
86 | } |
87 | return s->cur; |
88 | } |
89 | |
90 | STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) { |
91 | const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ); |
92 | ujson_stream_t s = {stream_obj, stream_p->read, 0, 0}; |
93 | vstr_t vstr; |
94 | vstr_init(&vstr, 8); |
95 | mp_obj_list_t stack; // we use a list as a simple stack for nested JSON |
96 | stack.len = 0; |
97 | stack.items = NULL; |
98 | mp_obj_t stack_top = MP_OBJ_NULL; |
99 | const mp_obj_type_t *stack_top_type = NULL; |
100 | mp_obj_t stack_key = MP_OBJ_NULL; |
101 | S_NEXT(s); |
102 | for (;;) { |
103 | cont: |
104 | if (S_END(s)) { |
105 | break; |
106 | } |
107 | mp_obj_t next = MP_OBJ_NULL; |
108 | bool enter = false; |
109 | byte cur = S_CUR(s); |
110 | S_NEXT(s); |
111 | switch (cur) { |
112 | case ',': |
113 | case ':': |
114 | case ' ': |
115 | case '\t': |
116 | case '\n': |
117 | case '\r': |
118 | goto cont; |
119 | case 'n': |
120 | if (S_CUR(s) == 'u' && S_NEXT(s) == 'l' && S_NEXT(s) == 'l') { |
121 | S_NEXT(s); |
122 | next = mp_const_none; |
123 | } else { |
124 | goto fail; |
125 | } |
126 | break; |
127 | case 'f': |
128 | if (S_CUR(s) == 'a' && S_NEXT(s) == 'l' && S_NEXT(s) == 's' && S_NEXT(s) == 'e') { |
129 | S_NEXT(s); |
130 | next = mp_const_false; |
131 | } else { |
132 | goto fail; |
133 | } |
134 | break; |
135 | case 't': |
136 | if (S_CUR(s) == 'r' && S_NEXT(s) == 'u' && S_NEXT(s) == 'e') { |
137 | S_NEXT(s); |
138 | next = mp_const_true; |
139 | } else { |
140 | goto fail; |
141 | } |
142 | break; |
143 | case '"': |
144 | vstr_reset(&vstr); |
145 | for (; !S_END(s) && S_CUR(s) != '"';) { |
146 | byte c = S_CUR(s); |
147 | if (c == '\\') { |
148 | c = S_NEXT(s); |
149 | switch (c) { |
150 | case 'b': |
151 | c = 0x08; |
152 | break; |
153 | case 'f': |
154 | c = 0x0c; |
155 | break; |
156 | case 'n': |
157 | c = 0x0a; |
158 | break; |
159 | case 'r': |
160 | c = 0x0d; |
161 | break; |
162 | case 't': |
163 | c = 0x09; |
164 | break; |
165 | case 'u': { |
166 | mp_uint_t num = 0; |
167 | for (int i = 0; i < 4; i++) { |
168 | c = (S_NEXT(s) | 0x20) - '0'; |
169 | if (c > 9) { |
170 | c -= ('a' - ('9' + 1)); |
171 | } |
172 | num = (num << 4) | c; |
173 | } |
174 | vstr_add_char(&vstr, num); |
175 | goto str_cont; |
176 | } |
177 | } |
178 | } |
179 | vstr_add_byte(&vstr, c); |
180 | str_cont: |
181 | S_NEXT(s); |
182 | } |
183 | if (S_END(s)) { |
184 | goto fail; |
185 | } |
186 | S_NEXT(s); |
187 | next = mp_obj_new_str(vstr.buf, vstr.len); |
188 | break; |
189 | case '-': |
190 | case '0': |
191 | case '1': |
192 | case '2': |
193 | case '3': |
194 | case '4': |
195 | case '5': |
196 | case '6': |
197 | case '7': |
198 | case '8': |
199 | case '9': { |
200 | bool flt = false; |
201 | vstr_reset(&vstr); |
202 | for (;;) { |
203 | vstr_add_byte(&vstr, cur); |
204 | cur = S_CUR(s); |
205 | if (cur == '.' || cur == 'E' || cur == 'e') { |
206 | flt = true; |
207 | } else if (cur == '+' || cur == '-' || unichar_isdigit(cur)) { |
208 | // pass |
209 | } else { |
210 | break; |
211 | } |
212 | S_NEXT(s); |
213 | } |
214 | if (flt) { |
215 | next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL); |
216 | } else { |
217 | next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL); |
218 | } |
219 | break; |
220 | } |
221 | case '[': |
222 | next = mp_obj_new_list(0, NULL); |
223 | enter = true; |
224 | break; |
225 | case '{': |
226 | next = mp_obj_new_dict(0); |
227 | enter = true; |
228 | break; |
229 | case '}': |
230 | case ']': { |
231 | if (stack_top == MP_OBJ_NULL) { |
232 | // no object at all |
233 | goto fail; |
234 | } |
235 | if (stack.len == 0) { |
236 | // finished; compound object |
237 | goto success; |
238 | } |
239 | stack.len -= 1; |
240 | stack_top = stack.items[stack.len]; |
241 | stack_top_type = mp_obj_get_type(stack_top); |
242 | goto cont; |
243 | } |
244 | default: |
245 | goto fail; |
246 | } |
247 | if (stack_top == MP_OBJ_NULL) { |
248 | stack_top = next; |
249 | stack_top_type = mp_obj_get_type(stack_top); |
250 | if (!enter) { |
251 | // finished; single primitive only |
252 | goto success; |
253 | } |
254 | } else { |
255 | // append to list or dict |
256 | if (stack_top_type == &mp_type_list) { |
257 | mp_obj_list_append(stack_top, next); |
258 | } else { |
259 | if (stack_key == MP_OBJ_NULL) { |
260 | stack_key = next; |
261 | if (enter) { |
262 | goto fail; |
263 | } |
264 | } else { |
265 | mp_obj_dict_store(stack_top, stack_key, next); |
266 | stack_key = MP_OBJ_NULL; |
267 | } |
268 | } |
269 | if (enter) { |
270 | if (stack.items == NULL) { |
271 | mp_obj_list_init(&stack, 1); |
272 | stack.items[0] = stack_top; |
273 | } else { |
274 | mp_obj_list_append(MP_OBJ_FROM_PTR(&stack), stack_top); |
275 | } |
276 | stack_top = next; |
277 | stack_top_type = mp_obj_get_type(stack_top); |
278 | } |
279 | } |
280 | } |
281 | success: |
282 | // eat trailing whitespace |
283 | while (unichar_isspace(S_CUR(s))) { |
284 | S_NEXT(s); |
285 | } |
286 | if (!S_END(s)) { |
287 | // unexpected chars |
288 | goto fail; |
289 | } |
290 | if (stack_top == MP_OBJ_NULL || stack.len != 0) { |
291 | // not exactly 1 object |
292 | goto fail; |
293 | } |
294 | vstr_clear(&vstr); |
295 | return stack_top; |
296 | |
297 | fail: |
298 | mp_raise_ValueError(MP_ERROR_TEXT("syntax error in JSON" )); |
299 | } |
300 | STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_load_obj, mod_ujson_load); |
301 | |
302 | STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) { |
303 | mp_buffer_info_t bufinfo; |
304 | mp_get_buffer_raise(obj, &bufinfo, MP_BUFFER_READ); |
305 | vstr_t vstr = {bufinfo.len, bufinfo.len, (char *)bufinfo.buf, true}; |
306 | mp_obj_stringio_t sio = {{&mp_type_stringio}, &vstr, 0, MP_OBJ_NULL}; |
307 | return mod_ujson_load(MP_OBJ_FROM_PTR(&sio)); |
308 | } |
309 | STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_loads_obj, mod_ujson_loads); |
310 | |
311 | STATIC const mp_rom_map_elem_t mp_module_ujson_globals_table[] = { |
312 | { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_ujson) }, |
313 | { MP_ROM_QSTR(MP_QSTR_dump), MP_ROM_PTR(&mod_ujson_dump_obj) }, |
314 | { MP_ROM_QSTR(MP_QSTR_dumps), MP_ROM_PTR(&mod_ujson_dumps_obj) }, |
315 | { MP_ROM_QSTR(MP_QSTR_load), MP_ROM_PTR(&mod_ujson_load_obj) }, |
316 | { MP_ROM_QSTR(MP_QSTR_loads), MP_ROM_PTR(&mod_ujson_loads_obj) }, |
317 | }; |
318 | |
319 | STATIC MP_DEFINE_CONST_DICT(mp_module_ujson_globals, mp_module_ujson_globals_table); |
320 | |
321 | const mp_obj_module_t mp_module_ujson = { |
322 | .base = { &mp_type_module }, |
323 | .globals = (mp_obj_dict_t *)&mp_module_ujson_globals, |
324 | }; |
325 | |
326 | #endif // MICROPY_PY_UJSON |
327 | |