| 1 | #ifndef JSON_LIB_INCLUDED |
| 2 | #define JSON_LIB_INCLUDED |
| 3 | |
| 4 | #ifdef __cplusplus |
| 5 | extern "C" { |
| 6 | #endif |
| 7 | |
| 8 | #define JSON_DEPTH_LIMIT 32 |
| 9 | |
| 10 | /* |
| 11 | When error happens, the c_next of the JSON engine contains the |
| 12 | character that caused the error, and the c_str is the position |
| 13 | in string where the error occurs. |
| 14 | */ |
| 15 | enum json_errors { |
| 16 | JE_BAD_CHR= -1, /* Invalid character, charset handler cannot read it. */ |
| 17 | |
| 18 | JE_NOT_JSON_CHR= -2, /* Character met not used in JSON. */ |
| 19 | /* ASCII 00-08 for instance. */ |
| 20 | |
| 21 | JE_EOS= -3, /* Unexpected end of string. */ |
| 22 | |
| 23 | JE_SYN= -4, /* The next character breaks the JSON syntax. */ |
| 24 | |
| 25 | JE_STRING_CONST= -5, /* Character disallowed in string constant. */ |
| 26 | |
| 27 | JE_ESCAPING= -6, /* Error in the escaping. */ |
| 28 | |
| 29 | JE_DEPTH= -7, /* The limit on the JSON depth was overrun. */ |
| 30 | }; |
| 31 | |
| 32 | |
| 33 | typedef struct st_json_string_t |
| 34 | { |
| 35 | const uchar *c_str; /* Current position in JSON string */ |
| 36 | const uchar *str_end; /* The end on the string. */ |
| 37 | my_wc_t c_next; /* UNICODE of the last read character */ |
| 38 | int error; /* error code. */ |
| 39 | |
| 40 | CHARSET_INFO *cs; /* Character set of the JSON string. */ |
| 41 | |
| 42 | my_charset_conv_mb_wc wc; /* UNICODE conversion function. */ |
| 43 | /* It's taken out of the cs just to speed calls. */ |
| 44 | } json_string_t; |
| 45 | |
| 46 | |
| 47 | void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs); |
| 48 | void json_string_set_str(json_string_t *s, |
| 49 | const uchar *str, const uchar *end); |
| 50 | #define json_next_char(j) \ |
| 51 | (j)->wc((j)->cs, &(j)->c_next, (j)->c_str, (j)->str_end) |
| 52 | #define json_eos(j) ((j)->c_str >= (j)->str_end) |
| 53 | /* |
| 54 | read_string_const_chr() reads the next character of the string constant |
| 55 | and saves it to the js->c_next. |
| 56 | It takes into account possible escapings, so if for instance |
| 57 | the string is '\b', the read_string_const_chr() sets 8. |
| 58 | */ |
| 59 | int json_read_string_const_chr(json_string_t *js); |
| 60 | |
| 61 | |
| 62 | /* |
| 63 | Various JSON-related operations expect JSON path as a parameter. |
| 64 | The path is a string like this "$.keyA[2].*" |
| 65 | The path itself is a number of steps specifying either a key or a position |
| 66 | in an array. Some of them can be wildcards. |
| 67 | So the representation of the JSON path is the json_path_t class |
| 68 | containing an array of json_path_step_t objects. |
| 69 | */ |
| 70 | |
| 71 | |
| 72 | /* Path step types - actually bitmasks to let '&' or '|' operations. */ |
| 73 | enum json_path_step_types |
| 74 | { |
| 75 | JSON_PATH_KEY_NULL=0, |
| 76 | JSON_PATH_KEY=1, /* Must be equal to JSON_VALUE_OBJECT. */ |
| 77 | JSON_PATH_ARRAY=2, /* Must be equal to JSON_VALUE_ARRAY. */ |
| 78 | JSON_PATH_KEY_OR_ARRAY=3, |
| 79 | JSON_PATH_WILD=4, /* Step like .* or [*] */ |
| 80 | JSON_PATH_DOUBLE_WILD=8, /* Step like **.k or **[1] */ |
| 81 | JSON_PATH_KEY_WILD= 1+4, |
| 82 | JSON_PATH_KEY_DOUBLEWILD= 1+8, |
| 83 | JSON_PATH_ARRAY_WILD= 2+4, |
| 84 | JSON_PATH_ARRAY_DOUBLEWILD= 2+8 |
| 85 | }; |
| 86 | |
| 87 | |
| 88 | typedef struct st_json_path_step_t |
| 89 | { |
| 90 | enum json_path_step_types type; /* The type of the step - */ |
| 91 | /* see json_path_step_types */ |
| 92 | const uchar *key; /* Pointer to the beginning of the key. */ |
| 93 | const uchar *key_end; /* Pointer to the end of the key. */ |
| 94 | uint n_item; /* Item number in an array. No meaning for the key step. */ |
| 95 | } json_path_step_t; |
| 96 | |
| 97 | |
| 98 | typedef struct st_json_path_t |
| 99 | { |
| 100 | json_string_t s; /* The string to be parsed. */ |
| 101 | json_path_step_t steps[JSON_DEPTH_LIMIT]; /* Steps of the path. */ |
| 102 | json_path_step_t *last_step; /* Points to the last step. */ |
| 103 | |
| 104 | int mode_strict; /* TRUE if the path specified as 'strict' */ |
| 105 | enum json_path_step_types types_used; /* The '|' of all step's 'type'-s */ |
| 106 | } json_path_t; |
| 107 | |
| 108 | |
| 109 | int json_path_setup(json_path_t *p, |
| 110 | CHARSET_INFO *i_cs, const uchar *str, const uchar *end); |
| 111 | |
| 112 | |
| 113 | /* |
| 114 | The set of functions and structures below provides interface |
| 115 | to the JSON text parser. |
| 116 | Running the parser normally goes like this: |
| 117 | |
| 118 | json_engine_t j_eng; // structure keeps parser's data |
| 119 | json_scan_start(j_eng) // begin the parsing |
| 120 | |
| 121 | do |
| 122 | { |
| 123 | // The parser has read next piece of JSON |
| 124 | // and set fields of j_eng structure accordingly. |
| 125 | // So let's see what we have: |
| 126 | switch (j_eng.state) |
| 127 | { |
| 128 | case JST_KEY: |
| 129 | // Handle key name. See the json_read_keyname_chr() |
| 130 | // Probably compare it with the keyname we're looking for |
| 131 | case JST_VALUE: |
| 132 | // Handle value. It is either value of the key or an array item. |
| 133 | // see the json_read_value() |
| 134 | case JST_OBJ_START: |
| 135 | // parser found an object (the '{' in JSON) |
| 136 | case JST_OBJ_END: |
| 137 | // parser found the end of the object (the '}' in JSON) |
| 138 | case JST_ARRAY_START: |
| 139 | // parser found an array (the '[' in JSON) |
| 140 | case JST_ARRAY_END: |
| 141 | // parser found the end of the array (the ']' in JSON) |
| 142 | |
| 143 | }; |
| 144 | } while (json_scan_next() == 0); // parse next structure |
| 145 | |
| 146 | |
| 147 | if (j_eng.s.error) // we need to check why the loop ended. |
| 148 | // Did we get to the end of JSON, or came upon error. |
| 149 | { |
| 150 | signal_error_in_JSON() |
| 151 | } |
| 152 | |
| 153 | |
| 154 | Parts of JSON can be quickly skipped. If we are not interested |
| 155 | in a particular key, we can just skip it with json_skip_key() call. |
| 156 | Similarly json_skip_level() goes right to the end of an object |
| 157 | or an array. |
| 158 | */ |
| 159 | |
| 160 | |
| 161 | /* These are JSON parser states that user can expect and handle. */ |
| 162 | enum json_states { |
| 163 | JST_VALUE, /* value found */ |
| 164 | JST_KEY, /* key found */ |
| 165 | JST_OBJ_START, /* object */ |
| 166 | JST_OBJ_END, /* object ended */ |
| 167 | JST_ARRAY_START, /* array */ |
| 168 | JST_ARRAY_END, /* array ended */ |
| 169 | NR_JSON_USER_STATES |
| 170 | }; |
| 171 | |
| 172 | |
| 173 | enum json_value_types |
| 174 | { |
| 175 | JSON_VALUE_OBJECT=1, |
| 176 | JSON_VALUE_ARRAY=2, |
| 177 | JSON_VALUE_STRING, |
| 178 | JSON_VALUE_NUMBER, |
| 179 | JSON_VALUE_TRUE, |
| 180 | JSON_VALUE_FALSE, |
| 181 | JSON_VALUE_NULL |
| 182 | }; |
| 183 | |
| 184 | |
| 185 | enum json_num_flags |
| 186 | { |
| 187 | JSON_NUM_NEG=1, /* Number is negative. */ |
| 188 | JSON_NUM_FRAC_PART=2, /* The fractional part is not empty. */ |
| 189 | JSON_NUM_EXP=4, /* The number has the 'e' part. */ |
| 190 | }; |
| 191 | |
| 192 | |
| 193 | typedef struct st_json_engine_t |
| 194 | { |
| 195 | json_string_t s; /* String to parse. */ |
| 196 | int sav_c_len; /* Length of the current character. |
| 197 | Can be more than 1 for multibyte charsets */ |
| 198 | |
| 199 | int state; /* The state of the parser. One of 'enum json_states'. |
| 200 | It tells us what construction of JSON we've just read. */ |
| 201 | |
| 202 | /* These values are only set after the json_read_value() call. */ |
| 203 | enum json_value_types value_type; /* type of the value.*/ |
| 204 | const uchar *value; /* Points to the value. */ |
| 205 | const uchar *value_begin;/* Points to where the value starts in the JSON. */ |
| 206 | int value_escaped; /* Flag telling if the string value has escaping.*/ |
| 207 | uint num_flags; /* the details of the JSON_VALUE_NUMBER, is it negative, |
| 208 | or if it has the fractional part. |
| 209 | See the enum json_num_flags. */ |
| 210 | |
| 211 | /* |
| 212 | In most cases the 'value' and 'value_begin' are equal. |
| 213 | They only differ if the value is a string constants. Then 'value_begin' |
| 214 | points to the starting quotation mark, while the 'value' - to |
| 215 | the first character of the string. |
| 216 | */ |
| 217 | |
| 218 | const uchar *value_end; /* Points to the next character after the value. */ |
| 219 | int value_len; /* The length of the value. Does not count quotations for */ |
| 220 | /* string constants. */ |
| 221 | |
| 222 | int stack[JSON_DEPTH_LIMIT]; /* Keeps the stack of nested JSON structures. */ |
| 223 | int stack_p; /* The 'stack' pointer. */ |
| 224 | } json_engine_t; |
| 225 | |
| 226 | |
| 227 | int json_scan_start(json_engine_t *je, |
| 228 | CHARSET_INFO *i_cs, const uchar *str, const uchar *end); |
| 229 | int json_scan_next(json_engine_t *j); |
| 230 | |
| 231 | |
| 232 | /* |
| 233 | json_read_keyname_chr() function assists parsing the name of an JSON key. |
| 234 | It only can be called when the json_engine is in JST_KEY. |
| 235 | The json_read_keyname_chr() reads one character of the name of the key, |
| 236 | and puts it in j_eng.s.next_c. |
| 237 | Typical usage is like this: |
| 238 | |
| 239 | if (j_eng.state == JST_KEY) |
| 240 | { |
| 241 | while (json_read_keyname_chr(&j) == 0) |
| 242 | { |
| 243 | //handle next character i.e. match it against the pattern |
| 244 | } |
| 245 | } |
| 246 | */ |
| 247 | |
| 248 | int json_read_keyname_chr(json_engine_t *j); |
| 249 | |
| 250 | |
| 251 | /* |
| 252 | Check if the name of the current JSON key matches |
| 253 | the step of the path. |
| 254 | */ |
| 255 | int json_key_matches(json_engine_t *je, json_string_t *k); |
| 256 | |
| 257 | |
| 258 | /* |
| 259 | json_read_value() function parses the JSON value syntax, |
| 260 | so that we can handle the value of a key or an array item. |
| 261 | It only returns meaningful result when the engine is in |
| 262 | the JST_VALUE state. |
| 263 | |
| 264 | Typical usage is like this: |
| 265 | |
| 266 | if (j_eng.state == JST_VALUE) |
| 267 | { |
| 268 | json_read_value(&j_eng); |
| 269 | switch(j_eng.value_type) |
| 270 | { |
| 271 | case JSON_VALUE_STRING: |
| 272 | // get the string |
| 273 | str= j_eng.value; |
| 274 | str_length= j_eng.value_len; |
| 275 | case JSON_VALUE_NUMBER: |
| 276 | // get the number |
| 277 | ... etc |
| 278 | } |
| 279 | */ |
| 280 | int json_read_value(json_engine_t *j); |
| 281 | |
| 282 | |
| 283 | /* |
| 284 | json_skip_key() makes parser skip the content of the current |
| 285 | JSON key quickly. |
| 286 | It can be called only when the json_engine state is JST_KEY. |
| 287 | Typical usage is: |
| 288 | |
| 289 | if (j_eng.state == JST_KEY) |
| 290 | { |
| 291 | if (key_does_not_match(j_eng)) |
| 292 | json_skip_key(j_eng); |
| 293 | } |
| 294 | */ |
| 295 | |
| 296 | int json_skip_key(json_engine_t *j); |
| 297 | |
| 298 | |
| 299 | typedef const int *json_level_t; |
| 300 | |
| 301 | /* |
| 302 | json_skip_to_level() makes parser quickly get out of nested |
| 303 | loops and arrays. It is used when we're not interested in what is |
| 304 | there in the rest of these structures. |
| 305 | The 'level' should be remembered in advance. |
| 306 | json_level_t level= json_get_level(j); |
| 307 | .... // getting into the nested JSON structures |
| 308 | json_skip_to_level(j, level); |
| 309 | */ |
| 310 | #define json_get_level(j) (j->stack_p) |
| 311 | |
| 312 | int json_skip_to_level(json_engine_t *j, int level); |
| 313 | |
| 314 | /* |
| 315 | json_skip_level() works as above with just current structure. |
| 316 | So it gets to the end of the current JSON array or object. |
| 317 | */ |
| 318 | #define json_skip_level(json_engine) \ |
| 319 | json_skip_to_level((json_engine), (json_engine)->stack_p) |
| 320 | |
| 321 | |
| 322 | #define json_skip_array_item json_skip_key |
| 323 | |
| 324 | /* |
| 325 | Checks if the current value is of scalar type - |
| 326 | not an OBJECT nor ARRAY. |
| 327 | */ |
| 328 | #define json_value_scalar(je) ((je)->value_type > JSON_VALUE_ARRAY) |
| 329 | |
| 330 | |
| 331 | /* |
| 332 | Look for the JSON PATH in the json string. |
| 333 | Function can be called several times with same JSON/PATH to |
| 334 | find multiple matches. |
| 335 | On the first call, the json_engine_t parameter should be |
| 336 | initialized with the JSON string, and the json_path_t with the JSON path |
| 337 | appropriately. The 'p_cur_step' should point at the first |
| 338 | step of the path. |
| 339 | The 'array_counters' is the array of JSON_DEPTH_LIMIT size. |
| 340 | It stores the array counters of the parsed JSON. |
| 341 | If function returns 0, it means it found the match. The position of |
| 342 | the match is je->s.c_str. Then we can call the json_find_path() |
| 343 | with same engine/path/p_cur_step to get the next match. |
| 344 | Non-zero return means no matches found. |
| 345 | Check je->s.error to see if there was an error in JSON. |
| 346 | */ |
| 347 | int json_find_path(json_engine_t *je, |
| 348 | json_path_t *p, json_path_step_t **p_cur_step, |
| 349 | uint *array_counters); |
| 350 | |
| 351 | |
| 352 | typedef struct st_json_find_paths_t |
| 353 | { |
| 354 | uint n_paths; |
| 355 | json_path_t *paths; |
| 356 | uint cur_depth; |
| 357 | uint *path_depths; |
| 358 | uint array_counters[JSON_DEPTH_LIMIT]; |
| 359 | } json_find_paths_t; |
| 360 | |
| 361 | |
| 362 | int json_find_paths_first(json_engine_t *je, json_find_paths_t *state, |
| 363 | uint n_paths, json_path_t *paths, uint *path_depths); |
| 364 | int json_find_paths_next(json_engine_t *je, json_find_paths_t *state); |
| 365 | |
| 366 | |
| 367 | /* |
| 368 | Converst JSON string constant into ordinary string constant |
| 369 | which can involve unpacking json escapes and changing character set. |
| 370 | Returns negative integer in the case of an error, |
| 371 | the length of the result otherwise. |
| 372 | */ |
| 373 | int json_unescape(CHARSET_INFO *json_cs, |
| 374 | const uchar *json_str, const uchar *json_end, |
| 375 | CHARSET_INFO *res_cs, |
| 376 | uchar *res, uchar *res_end); |
| 377 | |
| 378 | /* |
| 379 | Converst ordinary string constant into JSON string constant. |
| 380 | which can involve appropriate escaping and changing character set. |
| 381 | Returns negative integer in the case of an error, |
| 382 | the length of the result otherwise. |
| 383 | */ |
| 384 | int json_escape(CHARSET_INFO *str_cs, const uchar *str, const uchar *str_end, |
| 385 | CHARSET_INFO *json_cs, uchar *json, uchar *json_end); |
| 386 | |
| 387 | |
| 388 | /* |
| 389 | Appends the ASCII string to the json with the charset conversion. |
| 390 | */ |
| 391 | int json_append_ascii(CHARSET_INFO *json_cs, |
| 392 | uchar *json, uchar *json_end, |
| 393 | const uchar *ascii, const uchar *ascii_end); |
| 394 | |
| 395 | |
| 396 | /* |
| 397 | Scan the JSON and return paths met one-by-one. |
| 398 | json_get_path_start(&p) |
| 399 | while (json_get_path_next(&p)) |
| 400 | { |
| 401 | handle_the_next_path(); |
| 402 | } |
| 403 | */ |
| 404 | |
| 405 | int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs, |
| 406 | const uchar *str, const uchar *end, |
| 407 | json_path_t *p); |
| 408 | |
| 409 | |
| 410 | int json_get_path_next(json_engine_t *je, json_path_t *p); |
| 411 | |
| 412 | |
| 413 | int json_path_parts_compare( |
| 414 | const json_path_step_t *a, const json_path_step_t *a_end, |
| 415 | const json_path_step_t *b, const json_path_step_t *b_end, |
| 416 | enum json_value_types vt); |
| 417 | int json_path_compare(const json_path_t *a, const json_path_t *b, |
| 418 | enum json_value_types vt); |
| 419 | |
| 420 | |
| 421 | #ifdef __cplusplus |
| 422 | } |
| 423 | #endif |
| 424 | |
| 425 | #endif /* JSON_LIB_INCLUDED */ |
| 426 | |
| 427 | |