1#include <my_global.h>
2#include <string.h>
3#include <m_ctype.h>
4#include "json_lib.h"
5
6/*
7 JSON escaping lets user specify UTF16 codes of characters.
8 So we're going to need the UTF16 charset capabilities. Let's import
9 them from the utf16 charset.
10*/
11int my_utf16_uni(CHARSET_INFO *cs,
12 my_wc_t *pwc, const uchar *s, const uchar *e);
13int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e);
14
15
16void json_string_set_str(json_string_t *s,
17 const uchar *str, const uchar *end)
18{
19 s->c_str= str;
20 s->str_end= end;
21}
22
23
24void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
25{
26 s->cs= i_cs;
27 s->error= 0;
28 s->wc= i_cs->cset->mb_wc;
29}
30
31
32static void json_string_setup(json_string_t *s,
33 CHARSET_INFO *i_cs, const uchar *str,
34 const uchar *end)
35{
36 json_string_set_cs(s, i_cs);
37 json_string_set_str(s, str, end);
38}
39
40
41enum json_char_classes {
42 C_EOS, /* end of string */
43 C_LCURB, /* { */
44 C_RCURB, /* } */
45 C_LSQRB, /* [ */
46 C_RSQRB, /* ] */
47 C_COLON, /* : */
48 C_COMMA, /* , */
49 C_QUOTE, /* " */
50 C_DIGIT, /* -0123456789 */
51 C_LOW_F, /* 'f' (for "false") */
52 C_LOW_N, /* 'n' (for "null") */
53 C_LOW_T, /* 't' (for "true") */
54 C_ETC, /* everything else */
55 C_ERR, /* character disallowed in JSON */
56 C_BAD, /* invalid character, charset handler cannot read it */
57 NR_C_CLASSES, /* Counter for classes that handled with functions. */
58 C_SPACE /* space. Doesn't need specific handlers, so after the counter.*/
59};
60
61
62/*
63 This array maps first 128 Unicode Code Points into classes.
64 The remaining Unicode characters should be mapped to C_ETC.
65*/
66
67static enum json_char_classes json_chr_map[128] = {
68 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
69 C_ERR, C_SPACE, C_SPACE, C_ERR, C_ERR, C_SPACE, C_ERR, C_ERR,
70 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
71 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
72
73 C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
74 C_ETC, C_ETC, C_ETC, C_ETC, C_COMMA, C_DIGIT, C_ETC, C_ETC,
75 C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
76 C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
77
78 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
79 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
80 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
81 C_ETC, C_ETC, C_ETC, C_LSQRB, C_ETC, C_RSQRB, C_ETC, C_ETC,
82
83 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_F, C_ETC,
84 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_N, C_ETC,
85 C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_T, C_ETC, C_ETC, C_ETC,
86 C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC
87};
88
89
90/*
91 JSON parser actually has more states than the 'enum json_states'
92 declares. But the rest of the states aren't seen to the user so let's
93 specify them here to avoid confusion.
94*/
95
96enum json_all_states {
97 JST_DONE= NR_JSON_USER_STATES, /* ok to finish */
98 JST_OBJ_CONT= NR_JSON_USER_STATES+1, /* object continues */
99 JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues */
100 JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
101 NR_JSON_STATES= NR_JSON_USER_STATES+4
102};
103
104
105typedef int (*json_state_handler)(json_engine_t *);
106
107
108/* The string is broken. */
109static int unexpected_eos(json_engine_t *j)
110{
111 j->s.error= JE_EOS;
112 return 1;
113}
114
115
116/* This symbol here breaks the JSON syntax. */
117static int syntax_error(json_engine_t *j)
118{
119 j->s.error= JE_SYN;
120 return 1;
121}
122
123
124/* Value of object. */
125static int mark_object(json_engine_t *j)
126{
127 j->state= JST_OBJ_START;
128 if (++j->stack_p < JSON_DEPTH_LIMIT)
129 {
130 j->stack[j->stack_p]= JST_OBJ_CONT;
131 return 0;
132 }
133 j->s.error= JE_DEPTH;
134 return 1;
135}
136
137
138/* Read value of object. */
139static int read_obj(json_engine_t *j)
140{
141 j->state= JST_OBJ_START;
142 j->value_type= JSON_VALUE_OBJECT;
143 j->value= j->value_begin;
144 if (++j->stack_p < JSON_DEPTH_LIMIT)
145 {
146 j->stack[j->stack_p]= JST_OBJ_CONT;
147 return 0;
148 }
149 j->s.error= JE_DEPTH;
150 return 1;
151}
152
153
154/* Value of array. */
155static int mark_array(json_engine_t *j)
156{
157 j->state= JST_ARRAY_START;
158 if (++j->stack_p < JSON_DEPTH_LIMIT)
159 {
160 j->stack[j->stack_p]= JST_ARRAY_CONT;
161 j->value= j->value_begin;
162 return 0;
163 }
164 j->s.error= JE_DEPTH;
165 return 1;
166}
167
168/* Read value of object. */
169static int read_array(json_engine_t *j)
170{
171 j->state= JST_ARRAY_START;
172 j->value_type= JSON_VALUE_ARRAY;
173 j->value= j->value_begin;
174 if (++j->stack_p < JSON_DEPTH_LIMIT)
175 {
176 j->stack[j->stack_p]= JST_ARRAY_CONT;
177 return 0;
178 }
179 j->s.error= JE_DEPTH;
180 return 1;
181}
182
183
184
185/*
186 Character classes inside the JSON string constant.
187 We mostly need this to parse escaping properly.
188 Escapings available in JSON are:
189 \" - quotation mark
190 \\ - backslash
191 \b - backspace UNICODE 8
192 \f - formfeed UNICODE 12
193 \n - newline UNICODE 10
194 \r - carriage return UNICODE 13
195 \t - horizontal tab UNICODE 9
196 \u{four-hex-digits} - code in UCS16 character set
197*/
198enum json_string_char_classes {
199 S_0= 0,
200 S_1= 1,
201 S_2= 2,
202 S_3= 3,
203 S_4= 4,
204 S_5= 5,
205 S_6= 6,
206 S_7= 7,
207 S_8= 8,
208 S_9= 9,
209 S_A= 10,
210 S_B= 11,
211 S_C= 12,
212 S_D= 13,
213 S_E= 14,
214 S_F= 15,
215 S_ETC= 36, /* rest of characters. */
216 S_QUOTE= 37,
217 S_BKSL= 38, /* \ */
218 S_ERR= 100, /* disallowed */
219};
220
221
222/* This maps characters to their types inside a string constant. */
223static enum json_string_char_classes json_instr_chr_map[128] = {
224 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
225 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
226 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
227 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
228
229 S_ETC, S_ETC, S_QUOTE, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
230 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
231 S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,
232 S_8, S_9, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
233
234 S_ETC, S_A, S_B, S_C, S_D, S_E, S_F, S_ETC,
235 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
236 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
237 S_ETC, S_ETC, S_ETC, S_ETC, S_BKSL, S_ETC, S_ETC, S_ETC,
238
239 S_ETC, S_A, S_B, S_C, S_D, S_E, S_F, S_ETC,
240 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
241 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
242 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC
243};
244
245
246static int read_4_hexdigits(json_string_t *s, uchar *dest)
247{
248 int i, t, c_len;
249 for (i=0; i<4; i++)
250 {
251 if ((c_len= json_next_char(s)) <= 0)
252 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
253
254 if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) > S_F)
255 return s->error= JE_SYN;
256
257 s->c_str+= c_len;
258 dest[i/2]+= (i % 2) ? t : t*16;
259 }
260 return 0;
261}
262
263
264static int json_handle_esc(json_string_t *s)
265{
266 int t, c_len;
267
268 if ((c_len= json_next_char(s)) <= 0)
269 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
270
271 s->c_str+= c_len;
272 switch (s->c_next)
273 {
274 case 'b':
275 s->c_next= 8;
276 return 0;
277 case 'f':
278 s->c_next= 12;
279 return 0;
280 case 'n':
281 s->c_next= 10;
282 return 0;
283 case 'r':
284 s->c_next= 13;
285 return 0;
286 case 't':
287 s->c_next= 9;
288 return 0;
289 }
290
291 if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
292 {
293 s->c_str-= c_len;
294 return s->error= JE_ESCAPING;
295 }
296
297
298 if (s->c_next != 'u')
299 return 0;
300
301 {
302 /*
303 Read the four-hex-digits code.
304 If symbol is not in the Basic Multilingual Plane, we're reading
305 the string for the next four digits to compose the UTF-16 surrogate pair.
306 */
307 uchar code[4]= {0,0,0,0};
308
309 if (read_4_hexdigits(s, code))
310 return 1;
311
312 if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
313 return 0;
314
315 if (c_len != MY_CS_TOOSMALL4)
316 return s->error= JE_BAD_CHR;
317
318 if ((c_len= json_next_char(s)) <= 0)
319 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
320 if (s->c_next != '\\')
321 return s->error= JE_SYN;
322
323 if ((c_len= json_next_char(s)) <= 0)
324 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
325 if (s->c_next != 'u')
326 return s->error= JE_SYN;
327
328 if (read_4_hexdigits(s, code+2))
329 return 1;
330
331 if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 2)
332 return 0;
333 }
334 return s->error= JE_BAD_CHR;
335}
336
337
338int json_read_string_const_chr(json_string_t *js)
339{
340 int c_len;
341
342 if ((c_len= json_next_char(js)) > 0)
343 {
344 js->c_str+= c_len;
345 return (js->c_next == '\\') ? json_handle_esc(js) : 0;
346 }
347 js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR;
348 return 1;
349}
350
351
352static int skip_str_constant(json_engine_t *j)
353{
354 int t, c_len;
355 for (;;)
356 {
357 if ((c_len= json_next_char(&j->s)) > 0)
358 {
359 j->s.c_str+= c_len;
360 if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
361 continue;
362
363 if (j->s.c_next == '"')
364 break;
365 if (j->s.c_next == '\\')
366 {
367 j->value_escaped= 1;
368 if (json_handle_esc(&j->s))
369 return 1;
370 continue;
371 }
372 /* Symbol not allowed in JSON. */
373 return j->s.error= JE_NOT_JSON_CHR;
374 }
375 else
376 return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
377 }
378
379 j->state= j->stack[j->stack_p];
380 return 0;
381}
382
383
384/* Scalar string. */
385static int v_string(json_engine_t *j)
386{
387 return skip_str_constant(j) || json_scan_next(j);
388}
389
390
391/* Read scalar string. */
392static int read_strn(json_engine_t *j)
393{
394 j->value= j->s.c_str;
395 j->value_type= JSON_VALUE_STRING;
396 j->value_escaped= 0;
397
398 if (skip_str_constant(j))
399 return 1;
400
401 j->state= j->stack[j->stack_p];
402 j->value_len= (int)(j->s.c_str - j->value) - 1;
403 return 0;
404}
405
406
407/*
408 We have dedicated parser for numeric constants. It's similar
409 to the main JSON parser, we similarly define character classes,
410 map characters to classes and implement the state-per-class
411 table. Though we don't create functions that handle
412 particular classes, just specify what new state should parser
413 get in this case.
414*/
415enum json_num_char_classes {
416 N_MINUS,
417 N_PLUS,
418 N_ZERO,
419 N_DIGIT,
420 N_POINT,
421 N_E,
422 N_END,
423 N_EEND,
424 N_ERR,
425 N_NUM_CLASSES
426};
427
428
429static enum json_num_char_classes json_num_chr_map[128] = {
430 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
431 N_ERR, N_END, N_END, N_ERR, N_ERR, N_END, N_ERR, N_ERR,
432 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
433 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
434
435 N_END, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
436 N_EEND, N_EEND, N_EEND, N_PLUS, N_END, N_MINUS, N_POINT, N_EEND,
437 N_ZERO, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
438 N_DIGIT, N_DIGIT, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
439
440 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_E, N_EEND, N_EEND,
441 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
442 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
443 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_END, N_EEND, N_EEND,
444
445 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_E, N_EEND, N_EEND,
446 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
447 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
448 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_END, N_EEND, N_EEND,
449};
450
451
452enum json_num_states {
453 NS_OK, /* Number ended. */
454 NS_GO, /* Initial state. */
455 NS_GO1, /* If the number starts with '-'. */
456 NS_Z, /* If the number starts with '0'. */
457 NS_Z1, /* If the numbers starts with '-0'. */
458 NS_INT, /* Integer part. */
459 NS_FRAC,/* Fractional part. */
460 NS_EX, /* Exponential part begins. */
461 NS_EX1, /* Exponential part continues. */
462 NS_NUM_STATES
463};
464
465
466static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
467{
468/* - + 0 1..9 POINT E END_OK ERROR */
469/*OK*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
470/*GO*/ { NS_GO1, JE_SYN, NS_Z, NS_INT, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
471/*GO1*/ { JE_SYN, JE_SYN, NS_Z1, NS_INT, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
472/*ZERO*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, NS_FRAC, JE_SYN, NS_OK, JE_BAD_CHR },
473/*ZE1*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, NS_FRAC, JE_SYN, NS_OK, JE_BAD_CHR },
474/*INT*/ { JE_SYN, JE_SYN, NS_INT, NS_INT, NS_FRAC, NS_EX, NS_OK, JE_BAD_CHR },
475/*FRAC*/ { JE_SYN, JE_SYN, NS_FRAC, NS_FRAC,JE_SYN, NS_EX, NS_OK, JE_BAD_CHR },
476/*EX*/ { NS_EX, NS_EX, NS_EX1, NS_EX1, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
477/*EX1*/ { JE_SYN, JE_SYN, NS_EX1, NS_EX1, JE_SYN, JE_SYN, NS_OK, JE_BAD_CHR }
478};
479
480
481static uint json_num_state_flags[NS_NUM_STATES]=
482{
483/*OK*/ 0,
484/*GO*/ 0,
485/*GO1*/ JSON_NUM_NEG,
486/*ZERO*/ 0,
487/*ZE1*/ 0,
488/*INT*/ 0,
489/*FRAC*/ JSON_NUM_FRAC_PART,
490/*EX*/ JSON_NUM_EXP,
491/*EX1*/ 0,
492};
493
494
495static int skip_num_constant(json_engine_t *j)
496{
497 int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
498 int c_len;
499
500 j->num_flags= 0;
501 for (;;)
502 {
503 j->num_flags|= json_num_state_flags[state];
504 if ((c_len= json_next_char(&j->s)) > 0 && j->s.c_next < 128)
505 {
506 if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
507 {
508 j->s.c_str+= c_len;
509 continue;
510 }
511 break;
512 }
513
514 if ((j->s.error=
515 json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
516 return 1;
517 else
518 break;
519 }
520
521 j->state= j->stack[j->stack_p];
522 return 0;
523}
524
525
526/* Scalar numeric. */
527static int v_number(json_engine_t *j)
528{
529 return skip_num_constant(j) || json_scan_next(j);
530}
531
532
533/* Read numeric constant. */
534static int read_num(json_engine_t *j)
535{
536 j->value= j->value_begin;
537 if (skip_num_constant(j) == 0)
538 {
539 j->value_type= JSON_VALUE_NUMBER;
540 j->value_len= (int)(j->s.c_str - j->value_begin);
541 return 0;
542 }
543 return 1;
544}
545
546
547/* Check that the JSON string matches the argument and skip it. */
548static int skip_string_verbatim(json_string_t *s, const char *str)
549{
550 int c_len;
551 while (*str)
552 {
553 if ((c_len= json_next_char(s)) > 0)
554 {
555 if (s->c_next == (my_wc_t) *(str++))
556 {
557 s->c_str+= c_len;
558 continue;
559 }
560 return s->error= JE_SYN;
561 }
562 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
563 }
564
565 return 0;
566}
567
568
569/* Scalar false. */
570static int v_false(json_engine_t *j)
571{
572 if (skip_string_verbatim(&j->s, "alse"))
573 return 1;
574 j->state= j->stack[j->stack_p];
575 return json_scan_next(j);
576}
577
578
579/* Scalar null. */
580static int v_null(json_engine_t *j)
581{
582 if (skip_string_verbatim(&j->s, "ull"))
583 return 1;
584 j->state= j->stack[j->stack_p];
585 return json_scan_next(j);
586}
587
588
589/* Scalar true. */
590static int v_true(json_engine_t *j)
591{
592 if (skip_string_verbatim(&j->s, "rue"))
593 return 1;
594 j->state= j->stack[j->stack_p];
595 return json_scan_next(j);
596}
597
598
599/* Read false. */
600static int read_false(json_engine_t *j)
601{
602 j->value_type= JSON_VALUE_FALSE;
603 j->value= j->value_begin;
604 j->state= j->stack[j->stack_p];
605 j->value_len= 5;
606 return skip_string_verbatim(&j->s, "alse");
607}
608
609
610/* Read null. */
611static int read_null(json_engine_t *j)
612{
613 j->value_type= JSON_VALUE_NULL;
614 j->value= j->value_begin;
615 j->state= j->stack[j->stack_p];
616 j->value_len= 4;
617 return skip_string_verbatim(&j->s, "ull");
618}
619
620
621/* Read true. */
622static int read_true(json_engine_t *j)
623{
624 j->value_type= JSON_VALUE_TRUE;
625 j->value= j->value_begin;
626 j->state= j->stack[j->stack_p];
627 j->value_len= 4;
628 return skip_string_verbatim(&j->s, "rue");
629}
630
631
632/* Disallowed character. */
633static int not_json_chr(json_engine_t *j)
634{
635 j->s.error= JE_NOT_JSON_CHR;
636 return 1;
637}
638
639
640/* Bad character. */
641static int bad_chr(json_engine_t *j)
642{
643 j->s.error= JE_BAD_CHR;
644 return 1;
645}
646
647
648/* Correct finish. */
649static int done(json_engine_t *j __attribute__((unused)))
650{
651 return 1;
652}
653
654
655/* End of the object. */
656static int end_object(json_engine_t *j)
657{
658 j->stack_p--;
659 j->state= JST_OBJ_END;
660 return 0;
661}
662
663
664/* End of the array. */
665static int end_array(json_engine_t *j)
666{
667 j->stack_p--;
668 j->state= JST_ARRAY_END;
669 return 0;
670}
671
672
673/* Start reading key name. */
674static int read_keyname(json_engine_t *j)
675{
676 j->state= JST_KEY;
677 return 0;
678}
679
680
681static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
682{
683 do
684 {
685 if ((*c_len= json_next_char(js)) <= 0)
686 *t_next= json_eos(js) ? C_EOS : C_BAD;
687 else
688 {
689 *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
690 js->c_str+= *c_len;
691 }
692 } while (*t_next == C_SPACE);
693}
694
695
696/* Next key name. */
697static int next_key(json_engine_t *j)
698{
699 int t_next, c_len;
700 get_first_nonspace(&j->s, &t_next, &c_len);
701
702 if (t_next == C_QUOTE)
703 {
704 j->state= JST_KEY;
705 return 0;
706 }
707
708 j->s.error= (t_next == C_EOS) ? JE_EOS :
709 ((t_next == C_BAD) ? JE_BAD_CHR :
710 JE_SYN);
711 return 1;
712}
713
714
715/* Forward declarations. */
716static int skip_colon(json_engine_t *j);
717static int skip_key(json_engine_t *j);
718static int struct_end_cb(json_engine_t *j);
719static int struct_end_qb(json_engine_t *j);
720static int struct_end_cm(json_engine_t *j);
721static int struct_end_eos(json_engine_t *j);
722
723
724static int next_item(json_engine_t *j)
725{
726 j->state= JST_VALUE;
727 return 0;
728}
729
730
731static int array_item(json_engine_t *j)
732{
733 j->state= JST_VALUE;
734 j->s.c_str-= j->sav_c_len;
735 return 0;
736}
737
738
739static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
740/*
741 EOS { } [ ]
742 : , " -0..9 f
743 n t ETC ERR BAD
744*/
745{
746 {/*VALUE*/
747 unexpected_eos, mark_object, syntax_error, mark_array, syntax_error,
748 syntax_error, syntax_error,v_string, v_number, v_false,
749 v_null, v_true, syntax_error, not_json_chr, bad_chr},
750 {/*KEY*/
751 unexpected_eos, skip_key, skip_key, skip_key, skip_key,
752 skip_key, skip_key, skip_colon, skip_key, skip_key,
753 skip_key, skip_key, skip_key, not_json_chr, bad_chr},
754 {/*OBJ_START*/
755 unexpected_eos, syntax_error, end_object, syntax_error, syntax_error,
756 syntax_error, syntax_error, read_keyname, syntax_error, syntax_error,
757 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
758 {/*OBJ_END*/
759 struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
760 syntax_error, struct_end_cm,syntax_error, syntax_error, syntax_error,
761 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
762 {/*ARRAY_START*/
763 unexpected_eos, array_item, syntax_error, array_item, end_array,
764 syntax_error, syntax_error, array_item, array_item, array_item,
765 array_item, array_item, syntax_error, not_json_chr, bad_chr},
766 {/*ARRAY_END*/
767 struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
768 syntax_error, struct_end_cm, syntax_error, syntax_error, syntax_error,
769 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
770 {/*DONE*/
771 done, syntax_error, syntax_error, syntax_error, syntax_error,
772 syntax_error, syntax_error, syntax_error, syntax_error, syntax_error,
773 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
774 {/*OBJ_CONT*/
775 unexpected_eos, syntax_error, end_object, syntax_error, end_array,
776 syntax_error, next_key, syntax_error, syntax_error, syntax_error,
777 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
778 {/*ARRAY_CONT*/
779 unexpected_eos, syntax_error, syntax_error, syntax_error, end_array,
780 syntax_error, next_item, syntax_error, syntax_error, syntax_error,
781 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
782 {/*READ_VALUE*/
783 unexpected_eos, read_obj, syntax_error, read_array, syntax_error,
784 syntax_error, syntax_error, read_strn, read_num, read_false,
785 read_null, read_true, syntax_error, not_json_chr, bad_chr},
786};
787
788
789
790int json_scan_start(json_engine_t *je,
791 CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
792{
793 json_string_setup(&je->s, i_cs, str, end);
794 je->stack[0]= JST_DONE;
795 je->stack_p= 0;
796 je->state= JST_VALUE;
797 return 0;
798}
799
800
801/* Skip colon and the value. */
802static int skip_colon(json_engine_t *j)
803{
804 int t_next, c_len;
805
806 get_first_nonspace(&j->s, &t_next, &c_len);
807
808 if (t_next == C_COLON)
809 {
810 get_first_nonspace(&j->s, &t_next, &c_len);
811 return json_actions[JST_VALUE][t_next](j);
812 }
813
814 j->s.error= (t_next == C_EOS) ? JE_EOS :
815 ((t_next == C_BAD) ? JE_BAD_CHR:
816 JE_SYN);
817
818 return 1;
819}
820
821
822/* Skip colon and the value. */
823static int skip_key(json_engine_t *j)
824{
825 int t_next, c_len;
826 while (json_read_keyname_chr(j) == 0) {}
827
828 if (j->s.error)
829 return 1;
830
831 get_first_nonspace(&j->s, &t_next, &c_len);
832 return json_actions[JST_VALUE][t_next](j);
833}
834
835
836/*
837 Handle EOS after the end of an object or array.
838 To do that we should pop the stack to see if
839 we are inside an object, or an array, and
840 run our 'state machine' accordingly.
841*/
842static int struct_end_eos(json_engine_t *j)
843{ return json_actions[j->stack[j->stack_p]][C_EOS](j); }
844
845
846/*
847 Handle '}' after the end of an object or array.
848 To do that we should pop the stack to see if
849 we are inside an object, or an array, and
850 run our 'state machine' accordingly.
851*/
852static int struct_end_cb(json_engine_t *j)
853{ return json_actions[j->stack[j->stack_p]][C_RCURB](j); }
854
855
856/*
857 Handle ']' after the end of an object or array.
858 To do that we should pop the stack to see if
859 we are inside an object, or an array, and
860 run our 'state machine' accordingly.
861*/
862static int struct_end_qb(json_engine_t *j)
863{ return json_actions[j->stack[j->stack_p]][C_RSQRB](j); }
864
865
866/*
867 Handle ',' after the end of an object or array.
868 To do that we should pop the stack to see if
869 we are inside an object, or an array, and
870 run our 'state machine' accordingly.
871*/
872static int struct_end_cm(json_engine_t *j)
873{ return json_actions[j->stack[j->stack_p]][C_COMMA](j); }
874
875
876int json_read_keyname_chr(json_engine_t *j)
877{
878 int c_len, t;
879
880 if ((c_len= json_next_char(&j->s)) > 0)
881 {
882 j->s.c_str+= c_len;
883 if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
884 return 0;
885
886 switch (t)
887 {
888 case S_QUOTE:
889 for (;;) /* Skip spaces until ':'. */
890 {
891 if ((c_len= json_next_char(&j->s) > 0))
892 {
893 if (j->s.c_next == ':')
894 {
895 j->s.c_str+= c_len;
896 j->state= JST_VALUE;
897 return 1;
898 }
899
900 if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
901 {
902 j->s.c_str+= c_len;
903 continue;
904 }
905 j->s.error= JE_SYN;
906 break;
907 }
908 j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
909 break;
910 }
911 return 1;
912 case S_BKSL:
913 return json_handle_esc(&j->s);
914 case S_ERR:
915 j->s.c_str-= c_len;
916 j->s.error= JE_STRING_CONST;
917 return 1;
918 }
919 }
920 j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
921 return 1;
922}
923
924
925int json_read_value(json_engine_t *j)
926{
927 int t_next, c_len, res;
928
929 if (j->state == JST_KEY)
930 {
931 while (json_read_keyname_chr(j) == 0) {}
932
933 if (j->s.error)
934 return 1;
935 }
936
937 get_first_nonspace(&j->s, &t_next, &c_len);
938
939 j->value_begin= j->s.c_str-c_len;
940 res= json_actions[JST_READ_VALUE][t_next](j);
941 j->value_end= j->s.c_str;
942 return res;
943}
944
945
946int json_scan_next(json_engine_t *j)
947{
948 int t_next;
949
950 get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
951 return json_actions[j->state][t_next](j);
952}
953
954
955enum json_path_chr_classes {
956 P_EOS, /* end of string */
957 P_USD, /* $ */
958 P_ASTER, /* * */
959 P_LSQRB, /* [ */
960 P_RSQRB, /* ] */
961 P_POINT, /* . */
962 P_ZERO, /* 0 */
963 P_DIGIT, /* 123456789 */
964 P_L, /* l (for "lax") */
965 P_S, /* s (for "strict") */
966 P_SPACE, /* space */
967 P_BKSL, /* \ */
968 P_QUOTE, /* " */
969 P_ETC, /* everything else */
970 P_ERR, /* character disallowed in JSON*/
971 P_BAD, /* invalid character */
972 N_PATH_CLASSES,
973};
974
975
976static enum json_path_chr_classes json_path_chr_map[128] = {
977 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
978 P_ERR, P_SPACE, P_SPACE, P_ERR, P_ERR, P_SPACE, P_ERR, P_ERR,
979 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
980 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
981
982 P_SPACE, P_ETC, P_QUOTE, P_ETC, P_USD, P_ETC, P_ETC, P_ETC,
983 P_ETC, P_ETC, P_ASTER, P_ETC, P_ETC, P_ETC, P_POINT, P_ETC,
984 P_ZERO, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
985 P_DIGIT, P_DIGIT, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
986
987 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
988 P_ETC, P_ETC, P_ETC, P_ETC, P_L, P_ETC, P_ETC, P_ETC,
989 P_ETC, P_ETC, P_S, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
990 P_ETC, P_ETC, P_ETC, P_LSQRB, P_BKSL, P_RSQRB, P_ETC, P_ETC,
991
992 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
993 P_ETC, P_ETC, P_ETC, P_ETC, P_L, P_ETC, P_ETC, P_ETC,
994 P_ETC, P_ETC, P_S, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
995 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC
996};
997
998
999enum json_path_states {
1000 PS_GO, /* Initial state. */
1001 PS_LAX, /* Parse the 'lax' keyword. */
1002 PS_PT, /* New path's step begins. */
1003 PS_AR, /* Parse array step. */
1004 PS_SAR, /* space after the '['. */
1005 PS_AWD, /* Array wildcard. */
1006 PS_Z, /* '0' (as an array item number). */
1007 PS_INT, /* Parse integer (as an array item number). */
1008 PS_AS, /* Space. */
1009 PS_KEY, /* Key. */
1010 PS_KNM, /* Parse key name. */
1011 PS_KWD, /* Key wildcard. */
1012 PS_AST, /* Asterisk. */
1013 PS_DWD, /* Double wildcard. */
1014 PS_KEYX, /* Key started with quote ("). */
1015 PS_KNMX, /* Parse quoted key name. */
1016 N_PATH_STATES, /* Below are states that aren't in the transitions table. */
1017 PS_SCT, /* Parse the 'strict' keyword. */
1018 PS_EKY, /* '.' after the keyname so next step is the key. */
1019 PS_EKYX, /* Closing " for the quoted keyname. */
1020 PS_EAR, /* '[' after the keyname so next step is the array. */
1021 PS_ESC, /* Escaping in the keyname. */
1022 PS_ESCX, /* Escaping in the quoted keyname. */
1023 PS_OK, /* Path normally ended. */
1024 PS_KOK /* EOS after the keyname so end the path normally. */
1025};
1026
1027
1028static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
1029{
1030/*
1031 EOS $, * [ ] . 0
1032 1..9 L S SPACE \ " ETC
1033 ERR BAD
1034*/
1035/* GO */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1036 JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1037 JE_NOT_JSON_CHR, JE_BAD_CHR},
1038/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1039 JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1040 JE_NOT_JSON_CHR, JE_BAD_CHR},
1041/* PT */ { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1042 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1043 JE_NOT_JSON_CHR, JE_BAD_CHR},
1044/* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_Z,
1045 PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1046 JE_NOT_JSON_CHR, JE_BAD_CHR},
1047/* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN, PS_Z,
1048 PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1049 JE_NOT_JSON_CHR, JE_BAD_CHR},
1050/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1051 JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1052 JE_NOT_JSON_CHR, JE_BAD_CHR},
1053/* Z */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1054 JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1055 JE_NOT_JSON_CHR, JE_BAD_CHR},
1056/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_INT,
1057 PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1058 JE_NOT_JSON_CHR, JE_BAD_CHR},
1059/* AS */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN, JE_SYN,
1060 JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1061 JE_NOT_JSON_CHR, JE_BAD_CHR},
1062/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN, PS_KNM,
1063 PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
1064 JE_NOT_JSON_CHR, JE_BAD_CHR},
1065/* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_EKY, PS_KNM,
1066 PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
1067 JE_NOT_JSON_CHR, JE_BAD_CHR},
1068/* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, PS_EKY, JE_SYN,
1069 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1070 JE_NOT_JSON_CHR, JE_BAD_CHR},
1071/* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1072 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1073 JE_NOT_JSON_CHR, JE_BAD_CHR},
1074/* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1075 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1076 JE_NOT_JSON_CHR, JE_BAD_CHR},
1077/* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1078 PS_KNMX,PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1079 JE_NOT_JSON_CHR, JE_BAD_CHR},
1080/* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1081 PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,PS_ESCX, PS_EKYX, PS_KNMX,
1082 JE_NOT_JSON_CHR, JE_BAD_CHR},
1083};
1084
1085
1086int json_path_setup(json_path_t *p,
1087 CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
1088{
1089 int c_len, t_next, state= PS_GO;
1090 enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
1091
1092 json_string_setup(&p->s, i_cs, str, end);
1093
1094 p->steps[0].type= JSON_PATH_ARRAY_WILD;
1095 p->last_step= p->steps;
1096 p->mode_strict= FALSE;
1097 p->types_used= JSON_PATH_KEY_NULL;
1098
1099 do
1100 {
1101 if ((c_len= json_next_char(&p->s)) <= 0)
1102 t_next= json_eos(&p->s) ? P_EOS : P_BAD;
1103 else
1104 t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
1105
1106 if ((state= json_path_transitions[state][t_next]) < 0)
1107 return p->s.error= state;
1108
1109 p->s.c_str+= c_len;
1110
1111 switch (state)
1112 {
1113 case PS_LAX:
1114 if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
1115 return 1;
1116 p->mode_strict= FALSE;
1117 continue;
1118 case PS_SCT:
1119 if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
1120 return 1;
1121 p->mode_strict= TRUE;
1122 state= PS_LAX;
1123 continue;
1124 case PS_KWD:
1125 case PS_AWD:
1126 p->last_step->type|= JSON_PATH_WILD;
1127 p->types_used|= JSON_PATH_WILD;
1128 continue;
1129 case PS_INT:
1130 p->last_step->n_item*= 10;
1131 p->last_step->n_item+= p->s.c_next - '0';
1132 continue;
1133 case PS_EKYX:
1134 p->last_step->key_end= p->s.c_str - c_len;
1135 state= PS_PT;
1136 continue;
1137 case PS_EKY:
1138 p->last_step->key_end= p->s.c_str - c_len;
1139 state= PS_KEY;
1140 /* fall through */
1141 case PS_KEY:
1142 p->last_step++;
1143 if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1144 return p->s.error= JE_DEPTH;
1145 p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
1146 double_wildcard= JSON_PATH_KEY_NULL;
1147 /* fall through */
1148 case PS_KEYX:
1149 p->last_step->key= p->s.c_str;
1150 continue;
1151 case PS_EAR:
1152 p->last_step->key_end= p->s.c_str - c_len;
1153 state= PS_AR;
1154 /* fall through */
1155 case PS_AR:
1156 p->last_step++;
1157 if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1158 return p->s.error= JE_DEPTH;
1159 p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
1160 double_wildcard= JSON_PATH_KEY_NULL;
1161 p->last_step->n_item= 0;
1162 continue;
1163 case PS_ESC:
1164 if (json_handle_esc(&p->s))
1165 return 1;
1166 state= PS_KNM;
1167 continue;
1168 case PS_ESCX:
1169 if (json_handle_esc(&p->s))
1170 return 1;
1171 state= PS_KNMX;
1172 continue;
1173 case PS_KOK:
1174 p->last_step->key_end= p->s.c_str - c_len;
1175 state= PS_OK;
1176 break; /* 'break' as the loop supposed to end after that. */
1177 case PS_DWD:
1178 double_wildcard= JSON_PATH_DOUBLE_WILD;
1179 continue;
1180 };
1181 } while (state != PS_OK);
1182
1183 return double_wildcard ? (p->s.error= JE_SYN) : 0;
1184}
1185
1186
1187int json_skip_to_level(json_engine_t *j, int level)
1188{
1189 do {
1190 if (j->stack_p < level)
1191 return 0;
1192 } while (json_scan_next(j) == 0);
1193
1194 return 1;
1195}
1196
1197
1198int json_skip_key(json_engine_t *j)
1199{
1200 if (json_read_value(j))
1201 return 1;
1202
1203 if (json_value_scalar(j))
1204 return 0;
1205
1206 return json_skip_level(j);
1207}
1208
1209
1210#define SKIPPED_STEP_MARK ((uint) ~0)
1211
1212/*
1213 Current step of the patch matches the JSON construction.
1214 Now we should either stop the search or go to the next
1215 step of the path.
1216*/
1217static int handle_match(json_engine_t *je, json_path_t *p,
1218 json_path_step_t **p_cur_step, uint *array_counters)
1219{
1220 json_path_step_t *next_step= *p_cur_step + 1;
1221
1222 DBUG_ASSERT(*p_cur_step < p->last_step);
1223
1224 if (json_read_value(je))
1225 return 1;
1226
1227 if (json_value_scalar(je))
1228 {
1229 while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0)
1230 {
1231 if (++next_step > p->last_step)
1232 {
1233 je->s.c_str= je->value_begin;
1234 return 1;
1235 }
1236 }
1237 return 0;
1238 }
1239
1240 if (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0 &&
1241 je->value_type & JSON_VALUE_OBJECT)
1242 {
1243 do
1244 {
1245 array_counters[next_step - p->steps]= SKIPPED_STEP_MARK;
1246 if (++next_step > p->last_step)
1247 {
1248 je->s.c_str= je->value_begin;
1249 je->stack_p--;
1250 return 1;
1251 }
1252 } while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
1253 }
1254
1255
1256 array_counters[next_step - p->steps]= 0;
1257
1258 if ((int) je->value_type !=
1259 (int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
1260 return json_skip_level(je);
1261
1262 *p_cur_step= next_step;
1263 return 0;
1264}
1265
1266
1267/*
1268 Check if the name of the current JSON key matches
1269 the step of the path.
1270*/
1271int json_key_matches(json_engine_t *je, json_string_t *k)
1272{
1273 while (json_read_keyname_chr(je) == 0)
1274 {
1275 if (json_read_string_const_chr(k) ||
1276 je->s.c_next != k->c_next)
1277 return 0;
1278 }
1279
1280 return json_read_string_const_chr(k);
1281}
1282
1283
1284int json_find_path(json_engine_t *je,
1285 json_path_t *p, json_path_step_t **p_cur_step,
1286 uint *array_counters)
1287{
1288 json_string_t key_name;
1289
1290 json_string_set_cs(&key_name, p->s.cs);
1291
1292 do
1293 {
1294 json_path_step_t *cur_step= *p_cur_step;
1295 switch (je->state)
1296 {
1297 case JST_KEY:
1298 DBUG_ASSERT(cur_step->type & JSON_PATH_KEY);
1299 if (!(cur_step->type & JSON_PATH_WILD))
1300 {
1301 json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
1302 if (!json_key_matches(je, &key_name))
1303 {
1304 if (json_skip_key(je))
1305 goto exit;
1306 continue;
1307 }
1308 }
1309 if (cur_step == p->last_step ||
1310 handle_match(je, p, p_cur_step, array_counters))
1311 goto exit;
1312 break;
1313 case JST_VALUE:
1314 DBUG_ASSERT(cur_step->type & JSON_PATH_ARRAY);
1315 if (cur_step->type & JSON_PATH_WILD ||
1316 cur_step->n_item == array_counters[cur_step - p->steps]++)
1317 {
1318 /* Array item matches. */
1319 if (cur_step == p->last_step ||
1320 handle_match(je, p, p_cur_step, array_counters))
1321 goto exit;
1322 }
1323 else
1324 json_skip_array_item(je);
1325 break;
1326 case JST_OBJ_END:
1327 do
1328 {
1329 (*p_cur_step)--;
1330 } while (*p_cur_step > p->steps &&
1331 array_counters[*p_cur_step - p->steps] == SKIPPED_STEP_MARK);
1332 break;
1333 case JST_ARRAY_END:
1334 (*p_cur_step)--;
1335 break;
1336 default:
1337 DBUG_ASSERT(0);
1338 break;
1339 };
1340 } while (json_scan_next(je) == 0);
1341
1342 /* No luck. */
1343 return 1;
1344
1345exit:
1346 return je->s.error;
1347}
1348
1349
1350int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
1351 uint n_paths, json_path_t *paths, uint *path_depths)
1352{
1353 state->n_paths= n_paths;
1354 state->paths= paths;
1355 state->cur_depth= 0;
1356 state->path_depths= path_depths;
1357 return json_find_paths_next(je, state);
1358}
1359
1360
1361int json_find_paths_next(json_engine_t *je, json_find_paths_t *state)
1362{
1363 uint p_c;
1364 int path_found, no_match_found;
1365 do
1366 {
1367 switch (je->state)
1368 {
1369 case JST_KEY:
1370 path_found= FALSE;
1371 no_match_found= TRUE;
1372 for (p_c=0; p_c < state->n_paths; p_c++)
1373 {
1374 json_path_step_t *cur_step;
1375 if (state->path_depths[p_c] <
1376 state->cur_depth /* Path already failed. */ ||
1377 !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1378 JSON_PATH_KEY))
1379 continue;
1380
1381 if (!(cur_step->type & JSON_PATH_WILD))
1382 {
1383 json_string_t key_name;
1384 json_string_setup(&key_name, state->paths[p_c].s.cs,
1385 cur_step->key, cur_step->key_end);
1386 if (!json_key_matches(je, &key_name))
1387 continue;
1388 }
1389 if ((uint) (cur_step - state->paths[p_c].last_step) == state->cur_depth)
1390 path_found= TRUE;
1391 else
1392 {
1393 no_match_found= FALSE;
1394 state->path_depths[p_c]= state->cur_depth + 1;
1395 }
1396 }
1397 if (path_found)
1398 /* Return the result. */
1399 goto exit;
1400 if (no_match_found)
1401 {
1402 /* No possible paths left to check. Just skip the level. */
1403 if (json_skip_level(je))
1404 goto exit;
1405 }
1406
1407 break;
1408 case JST_VALUE:
1409 path_found= FALSE;
1410 no_match_found= TRUE;
1411 for (p_c=0; p_c < state->n_paths; p_c++)
1412 {
1413 json_path_step_t *cur_step;
1414 if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ ||
1415 !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1416 JSON_PATH_ARRAY))
1417 continue;
1418 if (cur_step->type & JSON_PATH_WILD ||
1419 cur_step->n_item == state->array_counters[state->cur_depth])
1420 {
1421 /* Array item matches. */
1422 if ((uint) (cur_step - state->paths[p_c].last_step) == state->cur_depth)
1423 path_found= TRUE;
1424 else
1425 {
1426 no_match_found= FALSE;
1427 state->path_depths[p_c]= state->cur_depth + 1;
1428 }
1429 }
1430 }
1431
1432 if (path_found)
1433 goto exit;
1434
1435 if (no_match_found)
1436 json_skip_array_item(je);
1437
1438 state->array_counters[state->cur_depth]++;
1439 break;
1440 case JST_OBJ_START:
1441 case JST_ARRAY_START:
1442 for (p_c=0; p_c < state->n_paths; p_c++)
1443 {
1444 if (state->path_depths[p_c] < state->cur_depth)
1445 /* Path already failed. */
1446 continue;
1447 if (state->paths[p_c].steps[state->cur_depth].type &
1448 ((je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY))
1449 state->path_depths[p_c]++;
1450 }
1451 state->cur_depth++;
1452 break;
1453 case JST_OBJ_END:
1454 case JST_ARRAY_END:
1455 for (p_c=0; p_c < state->n_paths; p_c++)
1456 {
1457 if (state->path_depths[p_c] < state->cur_depth)
1458 continue;
1459 state->path_depths[p_c]--;
1460 }
1461 state->cur_depth--;
1462 break;
1463 default:
1464 DBUG_ASSERT(0);
1465 break;
1466 };
1467 } while (json_scan_next(je) == 0);
1468
1469 /* No luck. */
1470 return 1;
1471
1472exit:
1473 return je->s.error;
1474}
1475
1476
1477int json_append_ascii(CHARSET_INFO *json_cs,
1478 uchar *json, uchar *json_end,
1479 const uchar *ascii, const uchar *ascii_end)
1480{
1481 const uchar *json_start= json;
1482 while (ascii < ascii_end)
1483 {
1484 int c_len;
1485 if ((c_len= json_cs->cset->wc_mb(json_cs, (my_wc_t) *ascii,
1486 json, json_end)) > 0)
1487 {
1488 json+= c_len;
1489 ascii++;
1490 continue;
1491 }
1492
1493 /* Error return. */
1494 return c_len;
1495 }
1496
1497 return (int)(json - json_start);
1498}
1499
1500
1501int json_unescape(CHARSET_INFO *json_cs,
1502 const uchar *json_str, const uchar *json_end,
1503 CHARSET_INFO *res_cs, uchar *res, uchar *res_end)
1504{
1505 json_string_t s;
1506 const uchar *res_b= res;
1507
1508 json_string_setup(&s, json_cs, json_str, json_end);
1509 while (json_read_string_const_chr(&s) == 0)
1510 {
1511 int c_len;
1512 if ((c_len= res_cs->cset->wc_mb(res_cs, s.c_next, res, res_end)) > 0)
1513 {
1514 res+= c_len;
1515 continue;
1516 }
1517 if (c_len == MY_CS_ILUNI)
1518 {
1519 /*
1520 Result charset doesn't support the json's character.
1521 Let's replace it with the '?' symbol.
1522 */
1523 if ((c_len= res_cs->cset->wc_mb(res_cs, '?', res, res_end)) > 0)
1524 {
1525 res+= c_len;
1526 continue;
1527 }
1528 }
1529 /* Result buffer is too small. */
1530 return -1;
1531 }
1532
1533 return s.error==JE_EOS ? (int)(res - res_b) : -1;
1534}
1535
1536
1537/* When we need to replace a character with the escaping. */
1538enum json_esc_char_classes {
1539 ESC_= 0, /* No need to escape. */
1540 ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */
1541 ESC_B= 'b', /* Backspace. Escape as \b */
1542 ESC_F= 'f', /* Formfeed. Escape as \f */
1543 ESC_N= 'n', /* Newline. Escape as \n */
1544 ESC_R= 'r', /* Return. Escape as \r */
1545 ESC_T= 't', /* Tab. Escape as \s */
1546 ESC_BS= '\\' /* Backslash or '"'. Escape by the \\ prefix. */
1547};
1548
1549
1550/* This specifies how we should escape the character. */
1551static enum json_esc_char_classes json_escape_chr_map[0x60] = {
1552 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1553 ESC_B, ESC_T, ESC_N, ESC_U, ESC_F, ESC_R, ESC_U, ESC_U,
1554 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1555 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1556
1557 ESC_, ESC_, ESC_BS, ESC_, ESC_, ESC_, ESC_, ESC_,
1558 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1559 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1560 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1561
1562 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1563 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1564 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1565 ESC_, ESC_, ESC_, ESC_, ESC_BS, ESC_, ESC_, ESC_,
1566};
1567
1568
1569static const char hexconv[16] = "0123456789ABCDEF";
1570
1571
1572int json_escape(CHARSET_INFO *str_cs,
1573 const uchar *str, const uchar *str_end,
1574 CHARSET_INFO *json_cs, uchar *json, uchar *json_end)
1575{
1576 const uchar *json_start= json;
1577
1578 while (str < str_end)
1579 {
1580 my_wc_t c_chr;
1581 int c_len;
1582 if ((c_len= str_cs->cset->mb_wc(str_cs, &c_chr, str, str_end)) > 0)
1583 {
1584 enum json_esc_char_classes c_class;
1585
1586 str+= c_len;
1587 if (c_chr >= 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_)
1588 {
1589 if ((c_len= json_cs->cset->wc_mb(json_cs, c_chr, json, json_end)) > 0)
1590 {
1591 json+= c_len;
1592 continue;
1593 }
1594 if (c_len < 0)
1595 {
1596 /* JSON buffer is depleted. */
1597 return -1;
1598 }
1599
1600 /* JSON charset cannot convert this character. */
1601 c_class= ESC_U;
1602 }
1603
1604 if ((c_len= json_cs->cset->wc_mb(json_cs, '\\', json, json_end)) <= 0 ||
1605 (c_len= json_cs->cset->wc_mb(json_cs,
1606 (c_class == ESC_BS) ? c_chr : c_class,
1607 json+= c_len, json_end)) <= 0)
1608 {
1609 /* JSON buffer is depleted. */
1610 return -1;
1611 }
1612 json+= c_len;
1613
1614 if (c_class != ESC_U)
1615 continue;
1616
1617 {
1618 /* We have to use /uXXXX escaping. */
1619 uchar utf16buf[4];
1620 uchar code_str[8];
1621 int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4);
1622
1623 code_str[0]= hexconv[utf16buf[0] >> 4];
1624 code_str[1]= hexconv[utf16buf[0] & 15];
1625 code_str[2]= hexconv[utf16buf[1] >> 4];
1626 code_str[3]= hexconv[utf16buf[1] & 15];
1627
1628 if (u_len > 2)
1629 {
1630 code_str[4]= hexconv[utf16buf[2] >> 4];
1631 code_str[5]= hexconv[utf16buf[2] & 15];
1632 code_str[6]= hexconv[utf16buf[3] >> 4];
1633 code_str[7]= hexconv[utf16buf[3] & 15];
1634 }
1635
1636 if ((c_len= json_append_ascii(json_cs, json, json_end,
1637 code_str, code_str+u_len*2)) > 0)
1638 {
1639 json+= c_len;
1640 continue;
1641 }
1642 /* JSON buffer is depleted. */
1643 return -1;
1644 }
1645 }
1646 }
1647
1648 return (int)(json - json_start);
1649}
1650
1651
1652int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
1653 const uchar *str, const uchar *end,
1654 json_path_t *p)
1655{
1656 json_scan_start(je, i_cs, str, end);
1657 p->last_step= p->steps - 1;
1658 return 0;
1659}
1660
1661
1662int json_get_path_next(json_engine_t *je, json_path_t *p)
1663{
1664 if (p->last_step < p->steps)
1665 {
1666 if (json_read_value(je))
1667 return 1;
1668
1669 p->last_step= p->steps;
1670 p->steps[0].type= JSON_PATH_ARRAY_WILD;
1671 p->steps[0].n_item= 0;
1672 return 0;
1673 }
1674 else
1675 {
1676 if (json_value_scalar(je))
1677 {
1678 if (p->last_step->type & JSON_PATH_ARRAY)
1679 p->last_step->n_item++;
1680 }
1681 else
1682 {
1683 p->last_step++;
1684 p->last_step->type= (enum json_path_step_types) je->value_type;
1685 p->last_step->n_item= 0;
1686 }
1687
1688 if (json_scan_next(je))
1689 return 1;
1690 }
1691
1692 do
1693 {
1694 switch (je->state)
1695 {
1696 case JST_KEY:
1697 p->last_step->key= je->s.c_str;
1698 do
1699 {
1700 p->last_step->key_end= je->s.c_str;
1701 } while (json_read_keyname_chr(je) == 0);
1702 if (je->s.error)
1703 return 1;
1704 /* Now we have je.state == JST_VALUE, so let's handle it. */
1705
1706 /* fall through */
1707 case JST_VALUE:
1708 if (json_read_value(je))
1709 return 1;
1710 return 0;
1711 case JST_OBJ_END:
1712 case JST_ARRAY_END:
1713 p->last_step--;
1714 if (p->last_step->type & JSON_PATH_ARRAY)
1715 p->last_step->n_item++;
1716 break;
1717 default:
1718 break;
1719 }
1720 } while (json_scan_next(je) == 0);
1721
1722 return 1;
1723}
1724
1725
1726int json_path_parts_compare(
1727 const json_path_step_t *a, const json_path_step_t *a_end,
1728 const json_path_step_t *b, const json_path_step_t *b_end,
1729 enum json_value_types vt)
1730{
1731 int res, res2;
1732
1733 while (a <= a_end)
1734 {
1735 if (b > b_end)
1736 {
1737 while (vt != JSON_VALUE_ARRAY &&
1738 (a->type & JSON_PATH_ARRAY_WILD) == JSON_PATH_ARRAY &&
1739 a->n_item == 0)
1740 {
1741 if (++a > a_end)
1742 return 0;
1743 }
1744 return -2;
1745 }
1746
1747 DBUG_ASSERT((b->type & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0);
1748
1749
1750 if (a->type & JSON_PATH_ARRAY)
1751 {
1752 if (b->type & JSON_PATH_ARRAY)
1753 {
1754 if ((a->type & JSON_PATH_WILD) || a->n_item == b->n_item)
1755 goto step_fits;
1756 goto step_failed;
1757 }
1758 if ((a->type & JSON_PATH_WILD) == 0 && a->n_item == 0)
1759 goto step_fits_autowrap;
1760 goto step_failed;
1761 }
1762 else /* JSON_PATH_KEY */
1763 {
1764 if (!(b->type & JSON_PATH_KEY))
1765 goto step_failed;
1766
1767 if (!(a->type & JSON_PATH_WILD) &&
1768 (a->key_end - a->key != b->key_end - b->key ||
1769 memcmp(a->key, b->key, a->key_end - a->key) != 0))
1770 goto step_failed;
1771
1772 goto step_fits;
1773 }
1774step_failed:
1775 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1776 return -1;
1777 b++;
1778 continue;
1779
1780step_fits:
1781 b++;
1782 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1783 {
1784 a++;
1785 continue;
1786 }
1787
1788 /* Double wild handling needs recursions. */
1789 res= json_path_parts_compare(a+1, a_end, b, b_end, vt);
1790 if (res == 0)
1791 return 0;
1792
1793 res2= json_path_parts_compare(a, a_end, b, b_end, vt);
1794
1795 return (res2 >= 0) ? res2 : res;
1796
1797step_fits_autowrap:
1798 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1799 {
1800 a++;
1801 continue;
1802 }
1803
1804 /* Double wild handling needs recursions. */
1805 res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt);
1806 if (res == 0)
1807 return 0;
1808
1809 res2= json_path_parts_compare(a, a_end, b+1, b_end, vt);
1810
1811 return (res2 >= 0) ? res2 : res;
1812
1813 }
1814
1815 return b <= b_end;
1816}
1817
1818
1819int json_path_compare(const json_path_t *a, const json_path_t *b,
1820 enum json_value_types vt)
1821{
1822 return json_path_parts_compare(a->steps+1, a->last_step,
1823 b->steps+1, b->last_step, vt);
1824}
1825
1826