1 | #ifndef JSON_LIB_INCLUDED |
2 | #define JSON_LIB_INCLUDED |
3 | |
4 | #ifdef __cplusplus |
5 | extern "C" { |
6 | #endif |
7 | |
8 | #define JSON_DEPTH_LIMIT 32 |
9 | |
10 | /* |
11 | When error happens, the c_next of the JSON engine contains the |
12 | character that caused the error, and the c_str is the position |
13 | in string where the error occurs. |
14 | */ |
15 | enum json_errors { |
16 | JE_BAD_CHR= -1, /* Invalid character, charset handler cannot read it. */ |
17 | |
18 | JE_NOT_JSON_CHR= -2, /* Character met not used in JSON. */ |
19 | /* ASCII 00-08 for instance. */ |
20 | |
21 | JE_EOS= -3, /* Unexpected end of string. */ |
22 | |
23 | JE_SYN= -4, /* The next character breaks the JSON syntax. */ |
24 | |
25 | JE_STRING_CONST= -5, /* Character disallowed in string constant. */ |
26 | |
27 | JE_ESCAPING= -6, /* Error in the escaping. */ |
28 | |
29 | JE_DEPTH= -7, /* The limit on the JSON depth was overrun. */ |
30 | }; |
31 | |
32 | |
33 | typedef struct st_json_string_t |
34 | { |
35 | const uchar *c_str; /* Current position in JSON string */ |
36 | const uchar *str_end; /* The end on the string. */ |
37 | my_wc_t c_next; /* UNICODE of the last read character */ |
38 | int error; /* error code. */ |
39 | |
40 | CHARSET_INFO *cs; /* Character set of the JSON string. */ |
41 | |
42 | my_charset_conv_mb_wc wc; /* UNICODE conversion function. */ |
43 | /* It's taken out of the cs just to speed calls. */ |
44 | } json_string_t; |
45 | |
46 | |
47 | void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs); |
48 | void json_string_set_str(json_string_t *s, |
49 | const uchar *str, const uchar *end); |
50 | #define json_next_char(j) \ |
51 | (j)->wc((j)->cs, &(j)->c_next, (j)->c_str, (j)->str_end) |
52 | #define json_eos(j) ((j)->c_str >= (j)->str_end) |
53 | /* |
54 | read_string_const_chr() reads the next character of the string constant |
55 | and saves it to the js->c_next. |
56 | It takes into account possible escapings, so if for instance |
57 | the string is '\b', the read_string_const_chr() sets 8. |
58 | */ |
59 | int json_read_string_const_chr(json_string_t *js); |
60 | |
61 | |
62 | /* |
63 | Various JSON-related operations expect JSON path as a parameter. |
64 | The path is a string like this "$.keyA[2].*" |
65 | The path itself is a number of steps specifying either a key or a position |
66 | in an array. Some of them can be wildcards. |
67 | So the representation of the JSON path is the json_path_t class |
68 | containing an array of json_path_step_t objects. |
69 | */ |
70 | |
71 | |
72 | /* Path step types - actually bitmasks to let '&' or '|' operations. */ |
73 | enum json_path_step_types |
74 | { |
75 | JSON_PATH_KEY_NULL=0, |
76 | JSON_PATH_KEY=1, /* Must be equal to JSON_VALUE_OBJECT. */ |
77 | JSON_PATH_ARRAY=2, /* Must be equal to JSON_VALUE_ARRAY. */ |
78 | JSON_PATH_KEY_OR_ARRAY=3, |
79 | JSON_PATH_WILD=4, /* Step like .* or [*] */ |
80 | JSON_PATH_DOUBLE_WILD=8, /* Step like **.k or **[1] */ |
81 | JSON_PATH_KEY_WILD= 1+4, |
82 | JSON_PATH_KEY_DOUBLEWILD= 1+8, |
83 | JSON_PATH_ARRAY_WILD= 2+4, |
84 | JSON_PATH_ARRAY_DOUBLEWILD= 2+8 |
85 | }; |
86 | |
87 | |
88 | typedef struct st_json_path_step_t |
89 | { |
90 | enum json_path_step_types type; /* The type of the step - */ |
91 | /* see json_path_step_types */ |
92 | const uchar *key; /* Pointer to the beginning of the key. */ |
93 | const uchar *key_end; /* Pointer to the end of the key. */ |
94 | uint n_item; /* Item number in an array. No meaning for the key step. */ |
95 | } json_path_step_t; |
96 | |
97 | |
98 | typedef struct st_json_path_t |
99 | { |
100 | json_string_t s; /* The string to be parsed. */ |
101 | json_path_step_t steps[JSON_DEPTH_LIMIT]; /* Steps of the path. */ |
102 | json_path_step_t *last_step; /* Points to the last step. */ |
103 | |
104 | int mode_strict; /* TRUE if the path specified as 'strict' */ |
105 | enum json_path_step_types types_used; /* The '|' of all step's 'type'-s */ |
106 | } json_path_t; |
107 | |
108 | |
109 | int json_path_setup(json_path_t *p, |
110 | CHARSET_INFO *i_cs, const uchar *str, const uchar *end); |
111 | |
112 | |
113 | /* |
114 | The set of functions and structures below provides interface |
115 | to the JSON text parser. |
116 | Running the parser normally goes like this: |
117 | |
118 | json_engine_t j_eng; // structure keeps parser's data |
119 | json_scan_start(j_eng) // begin the parsing |
120 | |
121 | do |
122 | { |
123 | // The parser has read next piece of JSON |
124 | // and set fields of j_eng structure accordingly. |
125 | // So let's see what we have: |
126 | switch (j_eng.state) |
127 | { |
128 | case JST_KEY: |
129 | // Handle key name. See the json_read_keyname_chr() |
130 | // Probably compare it with the keyname we're looking for |
131 | case JST_VALUE: |
132 | // Handle value. It is either value of the key or an array item. |
133 | // see the json_read_value() |
134 | case JST_OBJ_START: |
135 | // parser found an object (the '{' in JSON) |
136 | case JST_OBJ_END: |
137 | // parser found the end of the object (the '}' in JSON) |
138 | case JST_ARRAY_START: |
139 | // parser found an array (the '[' in JSON) |
140 | case JST_ARRAY_END: |
141 | // parser found the end of the array (the ']' in JSON) |
142 | |
143 | }; |
144 | } while (json_scan_next() == 0); // parse next structure |
145 | |
146 | |
147 | if (j_eng.s.error) // we need to check why the loop ended. |
148 | // Did we get to the end of JSON, or came upon error. |
149 | { |
150 | signal_error_in_JSON() |
151 | } |
152 | |
153 | |
154 | Parts of JSON can be quickly skipped. If we are not interested |
155 | in a particular key, we can just skip it with json_skip_key() call. |
156 | Similarly json_skip_level() goes right to the end of an object |
157 | or an array. |
158 | */ |
159 | |
160 | |
161 | /* These are JSON parser states that user can expect and handle. */ |
162 | enum json_states { |
163 | JST_VALUE, /* value found */ |
164 | JST_KEY, /* key found */ |
165 | JST_OBJ_START, /* object */ |
166 | JST_OBJ_END, /* object ended */ |
167 | JST_ARRAY_START, /* array */ |
168 | JST_ARRAY_END, /* array ended */ |
169 | NR_JSON_USER_STATES |
170 | }; |
171 | |
172 | |
173 | enum json_value_types |
174 | { |
175 | JSON_VALUE_OBJECT=1, |
176 | JSON_VALUE_ARRAY=2, |
177 | JSON_VALUE_STRING, |
178 | JSON_VALUE_NUMBER, |
179 | JSON_VALUE_TRUE, |
180 | JSON_VALUE_FALSE, |
181 | JSON_VALUE_NULL |
182 | }; |
183 | |
184 | |
185 | enum json_num_flags |
186 | { |
187 | JSON_NUM_NEG=1, /* Number is negative. */ |
188 | JSON_NUM_FRAC_PART=2, /* The fractional part is not empty. */ |
189 | JSON_NUM_EXP=4, /* The number has the 'e' part. */ |
190 | }; |
191 | |
192 | |
193 | typedef struct st_json_engine_t |
194 | { |
195 | json_string_t s; /* String to parse. */ |
196 | int sav_c_len; /* Length of the current character. |
197 | Can be more than 1 for multibyte charsets */ |
198 | |
199 | int state; /* The state of the parser. One of 'enum json_states'. |
200 | It tells us what construction of JSON we've just read. */ |
201 | |
202 | /* These values are only set after the json_read_value() call. */ |
203 | enum json_value_types value_type; /* type of the value.*/ |
204 | const uchar *value; /* Points to the value. */ |
205 | const uchar *value_begin;/* Points to where the value starts in the JSON. */ |
206 | int value_escaped; /* Flag telling if the string value has escaping.*/ |
207 | uint num_flags; /* the details of the JSON_VALUE_NUMBER, is it negative, |
208 | or if it has the fractional part. |
209 | See the enum json_num_flags. */ |
210 | |
211 | /* |
212 | In most cases the 'value' and 'value_begin' are equal. |
213 | They only differ if the value is a string constants. Then 'value_begin' |
214 | points to the starting quotation mark, while the 'value' - to |
215 | the first character of the string. |
216 | */ |
217 | |
218 | const uchar *value_end; /* Points to the next character after the value. */ |
219 | int value_len; /* The length of the value. Does not count quotations for */ |
220 | /* string constants. */ |
221 | |
222 | int stack[JSON_DEPTH_LIMIT]; /* Keeps the stack of nested JSON structures. */ |
223 | int stack_p; /* The 'stack' pointer. */ |
224 | } json_engine_t; |
225 | |
226 | |
227 | int json_scan_start(json_engine_t *je, |
228 | CHARSET_INFO *i_cs, const uchar *str, const uchar *end); |
229 | int json_scan_next(json_engine_t *j); |
230 | |
231 | |
232 | /* |
233 | json_read_keyname_chr() function assists parsing the name of an JSON key. |
234 | It only can be called when the json_engine is in JST_KEY. |
235 | The json_read_keyname_chr() reads one character of the name of the key, |
236 | and puts it in j_eng.s.next_c. |
237 | Typical usage is like this: |
238 | |
239 | if (j_eng.state == JST_KEY) |
240 | { |
241 | while (json_read_keyname_chr(&j) == 0) |
242 | { |
243 | //handle next character i.e. match it against the pattern |
244 | } |
245 | } |
246 | */ |
247 | |
248 | int json_read_keyname_chr(json_engine_t *j); |
249 | |
250 | |
251 | /* |
252 | Check if the name of the current JSON key matches |
253 | the step of the path. |
254 | */ |
255 | int json_key_matches(json_engine_t *je, json_string_t *k); |
256 | |
257 | |
258 | /* |
259 | json_read_value() function parses the JSON value syntax, |
260 | so that we can handle the value of a key or an array item. |
261 | It only returns meaningful result when the engine is in |
262 | the JST_VALUE state. |
263 | |
264 | Typical usage is like this: |
265 | |
266 | if (j_eng.state == JST_VALUE) |
267 | { |
268 | json_read_value(&j_eng); |
269 | switch(j_eng.value_type) |
270 | { |
271 | case JSON_VALUE_STRING: |
272 | // get the string |
273 | str= j_eng.value; |
274 | str_length= j_eng.value_len; |
275 | case JSON_VALUE_NUMBER: |
276 | // get the number |
277 | ... etc |
278 | } |
279 | */ |
280 | int json_read_value(json_engine_t *j); |
281 | |
282 | |
283 | /* |
284 | json_skip_key() makes parser skip the content of the current |
285 | JSON key quickly. |
286 | It can be called only when the json_engine state is JST_KEY. |
287 | Typical usage is: |
288 | |
289 | if (j_eng.state == JST_KEY) |
290 | { |
291 | if (key_does_not_match(j_eng)) |
292 | json_skip_key(j_eng); |
293 | } |
294 | */ |
295 | |
296 | int json_skip_key(json_engine_t *j); |
297 | |
298 | |
299 | typedef const int *json_level_t; |
300 | |
301 | /* |
302 | json_skip_to_level() makes parser quickly get out of nested |
303 | loops and arrays. It is used when we're not interested in what is |
304 | there in the rest of these structures. |
305 | The 'level' should be remembered in advance. |
306 | json_level_t level= json_get_level(j); |
307 | .... // getting into the nested JSON structures |
308 | json_skip_to_level(j, level); |
309 | */ |
310 | #define json_get_level(j) (j->stack_p) |
311 | |
312 | int json_skip_to_level(json_engine_t *j, int level); |
313 | |
314 | /* |
315 | json_skip_level() works as above with just current structure. |
316 | So it gets to the end of the current JSON array or object. |
317 | */ |
318 | #define json_skip_level(json_engine) \ |
319 | json_skip_to_level((json_engine), (json_engine)->stack_p) |
320 | |
321 | |
322 | #define json_skip_array_item json_skip_key |
323 | |
324 | /* |
325 | Checks if the current value is of scalar type - |
326 | not an OBJECT nor ARRAY. |
327 | */ |
328 | #define json_value_scalar(je) ((je)->value_type > JSON_VALUE_ARRAY) |
329 | |
330 | |
331 | /* |
332 | Look for the JSON PATH in the json string. |
333 | Function can be called several times with same JSON/PATH to |
334 | find multiple matches. |
335 | On the first call, the json_engine_t parameter should be |
336 | initialized with the JSON string, and the json_path_t with the JSON path |
337 | appropriately. The 'p_cur_step' should point at the first |
338 | step of the path. |
339 | The 'array_counters' is the array of JSON_DEPTH_LIMIT size. |
340 | It stores the array counters of the parsed JSON. |
341 | If function returns 0, it means it found the match. The position of |
342 | the match is je->s.c_str. Then we can call the json_find_path() |
343 | with same engine/path/p_cur_step to get the next match. |
344 | Non-zero return means no matches found. |
345 | Check je->s.error to see if there was an error in JSON. |
346 | */ |
347 | int json_find_path(json_engine_t *je, |
348 | json_path_t *p, json_path_step_t **p_cur_step, |
349 | uint *array_counters); |
350 | |
351 | |
352 | typedef struct st_json_find_paths_t |
353 | { |
354 | uint n_paths; |
355 | json_path_t *paths; |
356 | uint cur_depth; |
357 | uint *path_depths; |
358 | uint array_counters[JSON_DEPTH_LIMIT]; |
359 | } json_find_paths_t; |
360 | |
361 | |
362 | int json_find_paths_first(json_engine_t *je, json_find_paths_t *state, |
363 | uint n_paths, json_path_t *paths, uint *path_depths); |
364 | int json_find_paths_next(json_engine_t *je, json_find_paths_t *state); |
365 | |
366 | |
367 | /* |
368 | Converst JSON string constant into ordinary string constant |
369 | which can involve unpacking json escapes and changing character set. |
370 | Returns negative integer in the case of an error, |
371 | the length of the result otherwise. |
372 | */ |
373 | int json_unescape(CHARSET_INFO *json_cs, |
374 | const uchar *json_str, const uchar *json_end, |
375 | CHARSET_INFO *res_cs, |
376 | uchar *res, uchar *res_end); |
377 | |
378 | /* |
379 | Converst ordinary string constant into JSON string constant. |
380 | which can involve appropriate escaping and changing character set. |
381 | Returns negative integer in the case of an error, |
382 | the length of the result otherwise. |
383 | */ |
384 | int json_escape(CHARSET_INFO *str_cs, const uchar *str, const uchar *str_end, |
385 | CHARSET_INFO *json_cs, uchar *json, uchar *json_end); |
386 | |
387 | |
388 | /* |
389 | Appends the ASCII string to the json with the charset conversion. |
390 | */ |
391 | int json_append_ascii(CHARSET_INFO *json_cs, |
392 | uchar *json, uchar *json_end, |
393 | const uchar *ascii, const uchar *ascii_end); |
394 | |
395 | |
396 | /* |
397 | Scan the JSON and return paths met one-by-one. |
398 | json_get_path_start(&p) |
399 | while (json_get_path_next(&p)) |
400 | { |
401 | handle_the_next_path(); |
402 | } |
403 | */ |
404 | |
405 | int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs, |
406 | const uchar *str, const uchar *end, |
407 | json_path_t *p); |
408 | |
409 | |
410 | int json_get_path_next(json_engine_t *je, json_path_t *p); |
411 | |
412 | |
413 | int json_path_parts_compare( |
414 | const json_path_step_t *a, const json_path_step_t *a_end, |
415 | const json_path_step_t *b, const json_path_step_t *b_end, |
416 | enum json_value_types vt); |
417 | int json_path_compare(const json_path_t *a, const json_path_t *b, |
418 | enum json_value_types vt); |
419 | |
420 | |
421 | #ifdef __cplusplus |
422 | } |
423 | #endif |
424 | |
425 | #endif /* JSON_LIB_INCLUDED */ |
426 | |
427 | |