1#ifndef JSON_LIB_INCLUDED
2#define JSON_LIB_INCLUDED
3
4#ifdef __cplusplus
5extern "C" {
6#endif
7
8#define JSON_DEPTH_LIMIT 32
9
10/*
11 When error happens, the c_next of the JSON engine contains the
12 character that caused the error, and the c_str is the position
13 in string where the error occurs.
14*/
15enum json_errors {
16 JE_BAD_CHR= -1, /* Invalid character, charset handler cannot read it. */
17
18 JE_NOT_JSON_CHR= -2, /* Character met not used in JSON. */
19 /* ASCII 00-08 for instance. */
20
21 JE_EOS= -3, /* Unexpected end of string. */
22
23 JE_SYN= -4, /* The next character breaks the JSON syntax. */
24
25 JE_STRING_CONST= -5, /* Character disallowed in string constant. */
26
27 JE_ESCAPING= -6, /* Error in the escaping. */
28
29 JE_DEPTH= -7, /* The limit on the JSON depth was overrun. */
30};
31
32
33typedef struct st_json_string_t
34{
35 const uchar *c_str; /* Current position in JSON string */
36 const uchar *str_end; /* The end on the string. */
37 my_wc_t c_next; /* UNICODE of the last read character */
38 int error; /* error code. */
39
40 CHARSET_INFO *cs; /* Character set of the JSON string. */
41
42 my_charset_conv_mb_wc wc; /* UNICODE conversion function. */
43 /* It's taken out of the cs just to speed calls. */
44} json_string_t;
45
46
47void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs);
48void json_string_set_str(json_string_t *s,
49 const uchar *str, const uchar *end);
50#define json_next_char(j) \
51 (j)->wc((j)->cs, &(j)->c_next, (j)->c_str, (j)->str_end)
52#define json_eos(j) ((j)->c_str >= (j)->str_end)
53/*
54 read_string_const_chr() reads the next character of the string constant
55 and saves it to the js->c_next.
56 It takes into account possible escapings, so if for instance
57 the string is '\b', the read_string_const_chr() sets 8.
58*/
59int json_read_string_const_chr(json_string_t *js);
60
61
62/*
63 Various JSON-related operations expect JSON path as a parameter.
64 The path is a string like this "$.keyA[2].*"
65 The path itself is a number of steps specifying either a key or a position
66 in an array. Some of them can be wildcards.
67 So the representation of the JSON path is the json_path_t class
68 containing an array of json_path_step_t objects.
69*/
70
71
72/* Path step types - actually bitmasks to let '&' or '|' operations. */
73enum json_path_step_types
74{
75 JSON_PATH_KEY_NULL=0,
76 JSON_PATH_KEY=1, /* Must be equal to JSON_VALUE_OBJECT. */
77 JSON_PATH_ARRAY=2, /* Must be equal to JSON_VALUE_ARRAY. */
78 JSON_PATH_KEY_OR_ARRAY=3,
79 JSON_PATH_WILD=4, /* Step like .* or [*] */
80 JSON_PATH_DOUBLE_WILD=8, /* Step like **.k or **[1] */
81 JSON_PATH_KEY_WILD= 1+4,
82 JSON_PATH_KEY_DOUBLEWILD= 1+8,
83 JSON_PATH_ARRAY_WILD= 2+4,
84 JSON_PATH_ARRAY_DOUBLEWILD= 2+8
85};
86
87
88typedef struct st_json_path_step_t
89{
90 enum json_path_step_types type; /* The type of the step - */
91 /* see json_path_step_types */
92 const uchar *key; /* Pointer to the beginning of the key. */
93 const uchar *key_end; /* Pointer to the end of the key. */
94 uint n_item; /* Item number in an array. No meaning for the key step. */
95} json_path_step_t;
96
97
98typedef struct st_json_path_t
99{
100 json_string_t s; /* The string to be parsed. */
101 json_path_step_t steps[JSON_DEPTH_LIMIT]; /* Steps of the path. */
102 json_path_step_t *last_step; /* Points to the last step. */
103
104 int mode_strict; /* TRUE if the path specified as 'strict' */
105 enum json_path_step_types types_used; /* The '|' of all step's 'type'-s */
106} json_path_t;
107
108
109int json_path_setup(json_path_t *p,
110 CHARSET_INFO *i_cs, const uchar *str, const uchar *end);
111
112
113/*
114 The set of functions and structures below provides interface
115 to the JSON text parser.
116 Running the parser normally goes like this:
117
118 json_engine_t j_eng; // structure keeps parser's data
119 json_scan_start(j_eng) // begin the parsing
120
121 do
122 {
123 // The parser has read next piece of JSON
124 // and set fields of j_eng structure accordingly.
125 // So let's see what we have:
126 switch (j_eng.state)
127 {
128 case JST_KEY:
129 // Handle key name. See the json_read_keyname_chr()
130 // Probably compare it with the keyname we're looking for
131 case JST_VALUE:
132 // Handle value. It is either value of the key or an array item.
133 // see the json_read_value()
134 case JST_OBJ_START:
135 // parser found an object (the '{' in JSON)
136 case JST_OBJ_END:
137 // parser found the end of the object (the '}' in JSON)
138 case JST_ARRAY_START:
139 // parser found an array (the '[' in JSON)
140 case JST_ARRAY_END:
141 // parser found the end of the array (the ']' in JSON)
142
143 };
144 } while (json_scan_next() == 0); // parse next structure
145
146
147 if (j_eng.s.error) // we need to check why the loop ended.
148 // Did we get to the end of JSON, or came upon error.
149 {
150 signal_error_in_JSON()
151 }
152
153
154 Parts of JSON can be quickly skipped. If we are not interested
155 in a particular key, we can just skip it with json_skip_key() call.
156 Similarly json_skip_level() goes right to the end of an object
157 or an array.
158*/
159
160
161/* These are JSON parser states that user can expect and handle. */
162enum json_states {
163 JST_VALUE, /* value found */
164 JST_KEY, /* key found */
165 JST_OBJ_START, /* object */
166 JST_OBJ_END, /* object ended */
167 JST_ARRAY_START, /* array */
168 JST_ARRAY_END, /* array ended */
169 NR_JSON_USER_STATES
170};
171
172
173enum json_value_types
174{
175 JSON_VALUE_OBJECT=1,
176 JSON_VALUE_ARRAY=2,
177 JSON_VALUE_STRING,
178 JSON_VALUE_NUMBER,
179 JSON_VALUE_TRUE,
180 JSON_VALUE_FALSE,
181 JSON_VALUE_NULL
182};
183
184
185enum json_num_flags
186{
187 JSON_NUM_NEG=1, /* Number is negative. */
188 JSON_NUM_FRAC_PART=2, /* The fractional part is not empty. */
189 JSON_NUM_EXP=4, /* The number has the 'e' part. */
190};
191
192
193typedef struct st_json_engine_t
194{
195 json_string_t s; /* String to parse. */
196 int sav_c_len; /* Length of the current character.
197 Can be more than 1 for multibyte charsets */
198
199 int state; /* The state of the parser. One of 'enum json_states'.
200 It tells us what construction of JSON we've just read. */
201
202 /* These values are only set after the json_read_value() call. */
203 enum json_value_types value_type; /* type of the value.*/
204 const uchar *value; /* Points to the value. */
205 const uchar *value_begin;/* Points to where the value starts in the JSON. */
206 int value_escaped; /* Flag telling if the string value has escaping.*/
207 uint num_flags; /* the details of the JSON_VALUE_NUMBER, is it negative,
208 or if it has the fractional part.
209 See the enum json_num_flags. */
210
211 /*
212 In most cases the 'value' and 'value_begin' are equal.
213 They only differ if the value is a string constants. Then 'value_begin'
214 points to the starting quotation mark, while the 'value' - to
215 the first character of the string.
216 */
217
218 const uchar *value_end; /* Points to the next character after the value. */
219 int value_len; /* The length of the value. Does not count quotations for */
220 /* string constants. */
221
222 int stack[JSON_DEPTH_LIMIT]; /* Keeps the stack of nested JSON structures. */
223 int stack_p; /* The 'stack' pointer. */
224} json_engine_t;
225
226
227int json_scan_start(json_engine_t *je,
228 CHARSET_INFO *i_cs, const uchar *str, const uchar *end);
229int json_scan_next(json_engine_t *j);
230
231
232/*
233 json_read_keyname_chr() function assists parsing the name of an JSON key.
234 It only can be called when the json_engine is in JST_KEY.
235 The json_read_keyname_chr() reads one character of the name of the key,
236 and puts it in j_eng.s.next_c.
237 Typical usage is like this:
238
239 if (j_eng.state == JST_KEY)
240 {
241 while (json_read_keyname_chr(&j) == 0)
242 {
243 //handle next character i.e. match it against the pattern
244 }
245 }
246*/
247
248int json_read_keyname_chr(json_engine_t *j);
249
250
251/*
252 Check if the name of the current JSON key matches
253 the step of the path.
254*/
255int json_key_matches(json_engine_t *je, json_string_t *k);
256
257
258/*
259 json_read_value() function parses the JSON value syntax,
260 so that we can handle the value of a key or an array item.
261 It only returns meaningful result when the engine is in
262 the JST_VALUE state.
263
264 Typical usage is like this:
265
266 if (j_eng.state == JST_VALUE)
267 {
268 json_read_value(&j_eng);
269 switch(j_eng.value_type)
270 {
271 case JSON_VALUE_STRING:
272 // get the string
273 str= j_eng.value;
274 str_length= j_eng.value_len;
275 case JSON_VALUE_NUMBER:
276 // get the number
277 ... etc
278 }
279*/
280int json_read_value(json_engine_t *j);
281
282
283/*
284 json_skip_key() makes parser skip the content of the current
285 JSON key quickly.
286 It can be called only when the json_engine state is JST_KEY.
287 Typical usage is:
288
289 if (j_eng.state == JST_KEY)
290 {
291 if (key_does_not_match(j_eng))
292 json_skip_key(j_eng);
293 }
294*/
295
296int json_skip_key(json_engine_t *j);
297
298
299typedef const int *json_level_t;
300
301/*
302 json_skip_to_level() makes parser quickly get out of nested
303 loops and arrays. It is used when we're not interested in what is
304 there in the rest of these structures.
305 The 'level' should be remembered in advance.
306 json_level_t level= json_get_level(j);
307 .... // getting into the nested JSON structures
308 json_skip_to_level(j, level);
309*/
310#define json_get_level(j) (j->stack_p)
311
312int json_skip_to_level(json_engine_t *j, int level);
313
314/*
315 json_skip_level() works as above with just current structure.
316 So it gets to the end of the current JSON array or object.
317*/
318#define json_skip_level(json_engine) \
319 json_skip_to_level((json_engine), (json_engine)->stack_p)
320
321
322#define json_skip_array_item json_skip_key
323
324/*
325 Checks if the current value is of scalar type -
326 not an OBJECT nor ARRAY.
327*/
328#define json_value_scalar(je) ((je)->value_type > JSON_VALUE_ARRAY)
329
330
331/*
332 Look for the JSON PATH in the json string.
333 Function can be called several times with same JSON/PATH to
334 find multiple matches.
335 On the first call, the json_engine_t parameter should be
336 initialized with the JSON string, and the json_path_t with the JSON path
337 appropriately. The 'p_cur_step' should point at the first
338 step of the path.
339 The 'array_counters' is the array of JSON_DEPTH_LIMIT size.
340 It stores the array counters of the parsed JSON.
341 If function returns 0, it means it found the match. The position of
342 the match is je->s.c_str. Then we can call the json_find_path()
343 with same engine/path/p_cur_step to get the next match.
344 Non-zero return means no matches found.
345 Check je->s.error to see if there was an error in JSON.
346*/
347int json_find_path(json_engine_t *je,
348 json_path_t *p, json_path_step_t **p_cur_step,
349 uint *array_counters);
350
351
352typedef struct st_json_find_paths_t
353{
354 uint n_paths;
355 json_path_t *paths;
356 uint cur_depth;
357 uint *path_depths;
358 uint array_counters[JSON_DEPTH_LIMIT];
359} json_find_paths_t;
360
361
362int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
363 uint n_paths, json_path_t *paths, uint *path_depths);
364int json_find_paths_next(json_engine_t *je, json_find_paths_t *state);
365
366
367/*
368 Converst JSON string constant into ordinary string constant
369 which can involve unpacking json escapes and changing character set.
370 Returns negative integer in the case of an error,
371 the length of the result otherwise.
372*/
373int json_unescape(CHARSET_INFO *json_cs,
374 const uchar *json_str, const uchar *json_end,
375 CHARSET_INFO *res_cs,
376 uchar *res, uchar *res_end);
377
378/*
379 Converst ordinary string constant into JSON string constant.
380 which can involve appropriate escaping and changing character set.
381 Returns negative integer in the case of an error,
382 the length of the result otherwise.
383*/
384int json_escape(CHARSET_INFO *str_cs, const uchar *str, const uchar *str_end,
385 CHARSET_INFO *json_cs, uchar *json, uchar *json_end);
386
387
388/*
389 Appends the ASCII string to the json with the charset conversion.
390*/
391int json_append_ascii(CHARSET_INFO *json_cs,
392 uchar *json, uchar *json_end,
393 const uchar *ascii, const uchar *ascii_end);
394
395
396/*
397 Scan the JSON and return paths met one-by-one.
398 json_get_path_start(&p)
399 while (json_get_path_next(&p))
400 {
401 handle_the_next_path();
402 }
403*/
404
405int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
406 const uchar *str, const uchar *end,
407 json_path_t *p);
408
409
410int json_get_path_next(json_engine_t *je, json_path_t *p);
411
412
413int json_path_parts_compare(
414 const json_path_step_t *a, const json_path_step_t *a_end,
415 const json_path_step_t *b, const json_path_step_t *b_end,
416 enum json_value_types vt);
417int json_path_compare(const json_path_t *a, const json_path_t *b,
418 enum json_value_types vt);
419
420
421#ifdef __cplusplus
422}
423#endif
424
425#endif /* JSON_LIB_INCLUDED */
426
427