1#include "jsi.h"
2#include "jslex.h"
3#include "utf.h"
4
5JS_NORETURN static void jsY_error(js_State *J, const char *fmt, ...) JS_PRINTFLIKE(2,3);
6
7static void jsY_error(js_State *J, const char *fmt, ...)
8{
9 va_list ap;
10 char buf[512];
11 char msgbuf[256];
12
13 va_start(ap, fmt);
14 vsnprintf(msgbuf, 256, fmt, ap);
15 va_end(ap);
16
17 snprintf(buf, 256, "%s:%d: ", J->filename, J->lexline);
18 strcat(buf, msgbuf);
19
20 js_newsyntaxerror(J, buf);
21 js_throw(J);
22}
23
24static const char *tokenstring[] = {
25 "(end-of-file)",
26 "'\\x01'", "'\\x02'", "'\\x03'", "'\\x04'", "'\\x05'", "'\\x06'", "'\\x07'",
27 "'\\x08'", "'\\x09'", "'\\x0A'", "'\\x0B'", "'\\x0C'", "'\\x0D'", "'\\x0E'", "'\\x0F'",
28 "'\\x10'", "'\\x11'", "'\\x12'", "'\\x13'", "'\\x14'", "'\\x15'", "'\\x16'", "'\\x17'",
29 "'\\x18'", "'\\x19'", "'\\x1A'", "'\\x1B'", "'\\x1C'", "'\\x1D'", "'\\x1E'", "'\\x1F'",
30 "' '", "'!'", "'\"'", "'#'", "'$'", "'%'", "'&'", "'\\''",
31 "'('", "')'", "'*'", "'+'", "','", "'-'", "'.'", "'/'",
32 "'0'", "'1'", "'2'", "'3'", "'4'", "'5'", "'6'", "'7'",
33 "'8'", "'9'", "':'", "';'", "'<'", "'='", "'>'", "'?'",
34 "'@'", "'A'", "'B'", "'C'", "'D'", "'E'", "'F'", "'G'",
35 "'H'", "'I'", "'J'", "'K'", "'L'", "'M'", "'N'", "'O'",
36 "'P'", "'Q'", "'R'", "'S'", "'T'", "'U'", "'V'", "'W'",
37 "'X'", "'Y'", "'Z'", "'['", "'\'", "']'", "'^'", "'_'",
38 "'`'", "'a'", "'b'", "'c'", "'d'", "'e'", "'f'", "'g'",
39 "'h'", "'i'", "'j'", "'k'", "'l'", "'m'", "'n'", "'o'",
40 "'p'", "'q'", "'r'", "'s'", "'t'", "'u'", "'v'", "'w'",
41 "'x'", "'y'", "'z'", "'{'", "'|'", "'}'", "'~'", "'\\x7F'",
42
43 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
44 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
45 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
46 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
47 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
48 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
49 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
50 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
51
52 "(identifier)", "(number)", "(string)", "(regexp)",
53
54 "'<='", "'>='", "'=='", "'!='", "'==='", "'!=='",
55 "'<<'", "'>>'", "'>>>'", "'&&'", "'||'",
56 "'+='", "'-='", "'*='", "'/='", "'%='",
57 "'<<='", "'>>='", "'>>>='", "'&='", "'|='", "'^='",
58 "'++'", "'--'",
59
60 "'break'", "'case'", "'catch'", "'continue'", "'debugger'",
61 "'default'", "'delete'", "'do'", "'else'", "'false'", "'finally'", "'for'",
62 "'function'", "'if'", "'in'", "'instanceof'", "'new'", "'null'", "'return'",
63 "'switch'", "'this'", "'throw'", "'true'", "'try'", "'typeof'", "'var'",
64 "'void'", "'while'", "'with'",
65};
66
67const char *jsY_tokenstring(int token)
68{
69 if (token >= 0 && token < (int)nelem(tokenstring))
70 if (tokenstring[token])
71 return tokenstring[token];
72 return "<unknown>";
73}
74
75static const char *keywords[] = {
76 "break", "case", "catch", "continue", "debugger", "default", "delete",
77 "do", "else", "false", "finally", "for", "function", "if", "in",
78 "instanceof", "new", "null", "return", "switch", "this", "throw",
79 "true", "try", "typeof", "var", "void", "while", "with",
80};
81
82int jsY_findword(const char *s, const char **list, int num)
83{
84 int l = 0;
85 int r = num - 1;
86 while (l <= r) {
87 int m = (l + r) >> 1;
88 int c = strcmp(s, list[m]);
89 if (c < 0)
90 r = m - 1;
91 else if (c > 0)
92 l = m + 1;
93 else
94 return m;
95 }
96 return -1;
97}
98
99static int jsY_findkeyword(js_State *J, const char *s)
100{
101 int i = jsY_findword(s, keywords, nelem(keywords));
102 if (i >= 0) {
103 J->text = keywords[i];
104 return TK_BREAK + i; /* first keyword + i */
105 }
106 J->text = js_intern(J, s);
107 return TK_IDENTIFIER;
108}
109
110int jsY_iswhite(int c)
111{
112 return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF;
113}
114
115int jsY_isnewline(int c)
116{
117 return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
118}
119
120#ifndef isalpha
121#define isalpha(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
122#endif
123#ifndef isdigit
124#define isdigit(c) (c >= '0' && c <= '9')
125#endif
126#ifndef ishex
127#define ishex(c) ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
128#endif
129
130static int jsY_isidentifierstart(int c)
131{
132 return isalpha(c) || c == '$' || c == '_' || isalpharune(c);
133}
134
135static int jsY_isidentifierpart(int c)
136{
137 return isdigit(c) || isalpha(c) || c == '$' || c == '_' || isalpharune(c);
138}
139
140static int jsY_isdec(int c)
141{
142 return isdigit(c);
143}
144
145int jsY_ishex(int c)
146{
147 return isdigit(c) || ishex(c);
148}
149
150int jsY_tohex(int c)
151{
152 if (c >= '0' && c <= '9') return c - '0';
153 if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
154 if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
155 return 0;
156}
157
158static void jsY_next(js_State *J)
159{
160 Rune c;
161 J->source += chartorune(&c, J->source);
162 /* consume CR LF as one unit */
163 if (c == '\r' && *J->source == '\n')
164 ++J->source;
165 if (jsY_isnewline(c)) {
166 J->line++;
167 c = '\n';
168 }
169 J->lexchar = c;
170}
171
172#define jsY_accept(J, x) (J->lexchar == x ? (jsY_next(J), 1) : 0)
173
174#define jsY_expect(J, x) if (!jsY_accept(J, x)) jsY_error(J, "expected '%c'", x)
175
176static void jsY_unescape(js_State *J)
177{
178 if (jsY_accept(J, '\\')) {
179 if (jsY_accept(J, 'u')) {
180 int x = 0;
181 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 12; jsY_next(J);
182 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 8; jsY_next(J);
183 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 4; jsY_next(J);
184 if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar);
185 J->lexchar = x;
186 return;
187 }
188error:
189 jsY_error(J, "unexpected escape sequence");
190 }
191}
192
193static void textinit(js_State *J)
194{
195 if (!J->lexbuf.text) {
196 J->lexbuf.cap = 4096;
197 J->lexbuf.text = js_malloc(J, J->lexbuf.cap);
198 }
199 J->lexbuf.len = 0;
200}
201
202static void textpush(js_State *J, Rune c)
203{
204 int n = runelen(c);
205 if (J->lexbuf.len + n > J->lexbuf.cap) {
206 J->lexbuf.cap = J->lexbuf.cap * 2;
207 J->lexbuf.text = js_realloc(J, J->lexbuf.text, J->lexbuf.cap);
208 }
209 J->lexbuf.len += runetochar(J->lexbuf.text + J->lexbuf.len, &c);
210}
211
212static char *textend(js_State *J)
213{
214 textpush(J, 0);
215 return J->lexbuf.text;
216}
217
218static void lexlinecomment(js_State *J)
219{
220 while (J->lexchar && J->lexchar != '\n')
221 jsY_next(J);
222}
223
224static int lexcomment(js_State *J)
225{
226 /* already consumed initial '/' '*' sequence */
227 while (J->lexchar != 0) {
228 if (jsY_accept(J, '*')) {
229 while (J->lexchar == '*')
230 jsY_next(J);
231 if (jsY_accept(J, '/'))
232 return 0;
233 }
234 else
235 jsY_next(J);
236 }
237 return -1;
238}
239
240static double lexhex(js_State *J)
241{
242 double n = 0;
243 if (!jsY_ishex(J->lexchar))
244 jsY_error(J, "malformed hexadecimal number");
245 while (jsY_ishex(J->lexchar)) {
246 n = n * 16 + jsY_tohex(J->lexchar);
247 jsY_next(J);
248 }
249 return n;
250}
251
252#if 0
253
254static double lexinteger(js_State *J)
255{
256 double n = 0;
257 if (!jsY_isdec(J->lexchar))
258 jsY_error(J, "malformed number");
259 while (jsY_isdec(J->lexchar)) {
260 n = n * 10 + (J->lexchar - '0');
261 jsY_next(J);
262 }
263 return n;
264}
265
266static double lexfraction(js_State *J)
267{
268 double n = 0;
269 double d = 1;
270 while (jsY_isdec(J->lexchar)) {
271 n = n * 10 + (J->lexchar - '0');
272 d = d * 10;
273 jsY_next(J);
274 }
275 return n / d;
276}
277
278static double lexexponent(js_State *J)
279{
280 double sign;
281 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
282 if (jsY_accept(J, '-')) sign = -1;
283 else if (jsY_accept(J, '+')) sign = 1;
284 else sign = 1;
285 return sign * lexinteger(J);
286 }
287 return 0;
288}
289
290static int lexnumber(js_State *J)
291{
292 double n;
293 double e;
294
295 if (jsY_accept(J, '0')) {
296 if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) {
297 J->number = lexhex(J);
298 return TK_NUMBER;
299 }
300 if (jsY_isdec(J->lexchar))
301 jsY_error(J, "number with leading zero");
302 n = 0;
303 if (jsY_accept(J, '.'))
304 n += lexfraction(J);
305 } else if (jsY_accept(J, '.')) {
306 if (!jsY_isdec(J->lexchar))
307 return '.';
308 n = lexfraction(J);
309 } else {
310 n = lexinteger(J);
311 if (jsY_accept(J, '.'))
312 n += lexfraction(J);
313 }
314
315 e = lexexponent(J);
316 if (e < 0)
317 n /= pow(10, -e);
318 else if (e > 0)
319 n *= pow(10, e);
320
321 if (jsY_isidentifierstart(J->lexchar))
322 jsY_error(J, "number with letter suffix");
323
324 J->number = n;
325 return TK_NUMBER;
326}
327
328#else
329
330static int lexnumber(js_State *J)
331{
332 const char *s = J->source - 1;
333
334 if (jsY_accept(J, '0')) {
335 if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) {
336 J->number = lexhex(J);
337 return TK_NUMBER;
338 }
339 if (jsY_isdec(J->lexchar))
340 jsY_error(J, "number with leading zero");
341 if (jsY_accept(J, '.')) {
342 while (jsY_isdec(J->lexchar))
343 jsY_next(J);
344 }
345 } else if (jsY_accept(J, '.')) {
346 if (!jsY_isdec(J->lexchar))
347 return '.';
348 while (jsY_isdec(J->lexchar))
349 jsY_next(J);
350 } else {
351 while (jsY_isdec(J->lexchar))
352 jsY_next(J);
353 if (jsY_accept(J, '.')) {
354 while (jsY_isdec(J->lexchar))
355 jsY_next(J);
356 }
357 }
358
359 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
360 if (J->lexchar == '-' || J->lexchar == '+')
361 jsY_next(J);
362 if (jsY_isdec(J->lexchar))
363 while (jsY_isdec(J->lexchar))
364 jsY_next(J);
365 else
366 jsY_error(J, "missing exponent");
367 }
368
369 if (jsY_isidentifierstart(J->lexchar))
370 jsY_error(J, "number with letter suffix");
371
372 J->number = js_strtod(s, NULL);
373 return TK_NUMBER;
374}
375
376#endif
377
378static int lexescape(js_State *J)
379{
380 int x = 0;
381
382 /* already consumed '\' */
383
384 if (jsY_accept(J, '\n'))
385 return 0;
386
387 switch (J->lexchar) {
388 case 0: jsY_error(J, "unterminated escape sequence");
389 case 'u':
390 jsY_next(J);
391 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); }
392 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); }
393 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
394 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
395 textpush(J, x);
396 break;
397 case 'x':
398 jsY_next(J);
399 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
400 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
401 textpush(J, x);
402 break;
403 case '0': textpush(J, 0); jsY_next(J); break;
404 case '\\': textpush(J, '\\'); jsY_next(J); break;
405 case '\'': textpush(J, '\''); jsY_next(J); break;
406 case '"': textpush(J, '"'); jsY_next(J); break;
407 case 'b': textpush(J, '\b'); jsY_next(J); break;
408 case 'f': textpush(J, '\f'); jsY_next(J); break;
409 case 'n': textpush(J, '\n'); jsY_next(J); break;
410 case 'r': textpush(J, '\r'); jsY_next(J); break;
411 case 't': textpush(J, '\t'); jsY_next(J); break;
412 case 'v': textpush(J, '\v'); jsY_next(J); break;
413 default: textpush(J, J->lexchar); jsY_next(J); break;
414 }
415 return 0;
416}
417
418static int lexstring(js_State *J)
419{
420 const char *s;
421
422 int q = J->lexchar;
423 jsY_next(J);
424
425 textinit(J);
426
427 while (J->lexchar != q) {
428 if (J->lexchar == 0 || J->lexchar == '\n')
429 jsY_error(J, "string not terminated");
430 if (jsY_accept(J, '\\')) {
431 if (lexescape(J))
432 jsY_error(J, "malformed escape sequence");
433 } else {
434 textpush(J, J->lexchar);
435 jsY_next(J);
436 }
437 }
438 jsY_expect(J, q);
439
440 s = textend(J);
441
442 J->text = js_intern(J, s);
443 return TK_STRING;
444}
445
446/* the ugliest language wart ever... */
447static int isregexpcontext(int last)
448{
449 switch (last) {
450 case ']':
451 case ')':
452 case '}':
453 case TK_IDENTIFIER:
454 case TK_NUMBER:
455 case TK_STRING:
456 case TK_FALSE:
457 case TK_NULL:
458 case TK_THIS:
459 case TK_TRUE:
460 return 0;
461 default:
462 return 1;
463 }
464}
465
466static int lexregexp(js_State *J)
467{
468 const char *s;
469 int g, m, i;
470 int inclass = 0;
471
472 /* already consumed initial '/' */
473
474 textinit(J);
475
476 /* regexp body */
477 while (J->lexchar != '/' || inclass) {
478 if (J->lexchar == 0 || J->lexchar == '\n') {
479 jsY_error(J, "regular expression not terminated");
480 } else if (jsY_accept(J, '\\')) {
481 if (jsY_accept(J, '/')) {
482 textpush(J, '/');
483 } else {
484 textpush(J, '\\');
485 if (J->lexchar == 0 || J->lexchar == '\n')
486 jsY_error(J, "regular expression not terminated");
487 textpush(J, J->lexchar);
488 jsY_next(J);
489 }
490 } else {
491 if (J->lexchar == '[' && !inclass)
492 inclass = 1;
493 if (J->lexchar == ']' && inclass)
494 inclass = 0;
495 textpush(J, J->lexchar);
496 jsY_next(J);
497 }
498 }
499 jsY_expect(J, '/');
500
501 s = textend(J);
502
503 /* regexp flags */
504 g = i = m = 0;
505
506 while (jsY_isidentifierpart(J->lexchar)) {
507 if (jsY_accept(J, 'g')) ++g;
508 else if (jsY_accept(J, 'i')) ++i;
509 else if (jsY_accept(J, 'm')) ++m;
510 else jsY_error(J, "illegal flag in regular expression: %c", J->lexchar);
511 }
512
513 if (g > 1 || i > 1 || m > 1)
514 jsY_error(J, "duplicated flag in regular expression");
515
516 J->text = js_intern(J, s);
517 J->number = 0;
518 if (g) J->number += JS_REGEXP_G;
519 if (i) J->number += JS_REGEXP_I;
520 if (m) J->number += JS_REGEXP_M;
521 return TK_REGEXP;
522}
523
524/* simple "return [no Line Terminator here] ..." contexts */
525static int isnlthcontext(int last)
526{
527 switch (last) {
528 case TK_BREAK:
529 case TK_CONTINUE:
530 case TK_RETURN:
531 case TK_THROW:
532 return 1;
533 default:
534 return 0;
535 }
536}
537
538static int jsY_lexx(js_State *J)
539{
540 J->newline = 0;
541
542 while (1) {
543 J->lexline = J->line; /* save location of beginning of token */
544
545 while (jsY_iswhite(J->lexchar))
546 jsY_next(J);
547
548 if (jsY_accept(J, '\n')) {
549 J->newline = 1;
550 if (isnlthcontext(J->lasttoken))
551 return ';';
552 continue;
553 }
554
555 if (jsY_accept(J, '/')) {
556 if (jsY_accept(J, '/')) {
557 lexlinecomment(J);
558 continue;
559 } else if (jsY_accept(J, '*')) {
560 if (lexcomment(J))
561 jsY_error(J, "multi-line comment not terminated");
562 continue;
563 } else if (isregexpcontext(J->lasttoken)) {
564 return lexregexp(J);
565 } else if (jsY_accept(J, '=')) {
566 return TK_DIV_ASS;
567 } else {
568 return '/';
569 }
570 }
571
572 if (J->lexchar >= '0' && J->lexchar <= '9') {
573 return lexnumber(J);
574 }
575
576 switch (J->lexchar) {
577 case '(': jsY_next(J); return '(';
578 case ')': jsY_next(J); return ')';
579 case ',': jsY_next(J); return ',';
580 case ':': jsY_next(J); return ':';
581 case ';': jsY_next(J); return ';';
582 case '?': jsY_next(J); return '?';
583 case '[': jsY_next(J); return '[';
584 case ']': jsY_next(J); return ']';
585 case '{': jsY_next(J); return '{';
586 case '}': jsY_next(J); return '}';
587 case '~': jsY_next(J); return '~';
588
589 case '\'':
590 case '"':
591 return lexstring(J);
592
593 case '.':
594 return lexnumber(J);
595
596 case '<':
597 jsY_next(J);
598 if (jsY_accept(J, '<')) {
599 if (jsY_accept(J, '='))
600 return TK_SHL_ASS;
601 return TK_SHL;
602 }
603 if (jsY_accept(J, '='))
604 return TK_LE;
605 return '<';
606
607 case '>':
608 jsY_next(J);
609 if (jsY_accept(J, '>')) {
610 if (jsY_accept(J, '>')) {
611 if (jsY_accept(J, '='))
612 return TK_USHR_ASS;
613 return TK_USHR;
614 }
615 if (jsY_accept(J, '='))
616 return TK_SHR_ASS;
617 return TK_SHR;
618 }
619 if (jsY_accept(J, '='))
620 return TK_GE;
621 return '>';
622
623 case '=':
624 jsY_next(J);
625 if (jsY_accept(J, '=')) {
626 if (jsY_accept(J, '='))
627 return TK_STRICTEQ;
628 return TK_EQ;
629 }
630 return '=';
631
632 case '!':
633 jsY_next(J);
634 if (jsY_accept(J, '=')) {
635 if (jsY_accept(J, '='))
636 return TK_STRICTNE;
637 return TK_NE;
638 }
639 return '!';
640
641 case '+':
642 jsY_next(J);
643 if (jsY_accept(J, '+'))
644 return TK_INC;
645 if (jsY_accept(J, '='))
646 return TK_ADD_ASS;
647 return '+';
648
649 case '-':
650 jsY_next(J);
651 if (jsY_accept(J, '-'))
652 return TK_DEC;
653 if (jsY_accept(J, '='))
654 return TK_SUB_ASS;
655 return '-';
656
657 case '*':
658 jsY_next(J);
659 if (jsY_accept(J, '='))
660 return TK_MUL_ASS;
661 return '*';
662
663 case '%':
664 jsY_next(J);
665 if (jsY_accept(J, '='))
666 return TK_MOD_ASS;
667 return '%';
668
669 case '&':
670 jsY_next(J);
671 if (jsY_accept(J, '&'))
672 return TK_AND;
673 if (jsY_accept(J, '='))
674 return TK_AND_ASS;
675 return '&';
676
677 case '|':
678 jsY_next(J);
679 if (jsY_accept(J, '|'))
680 return TK_OR;
681 if (jsY_accept(J, '='))
682 return TK_OR_ASS;
683 return '|';
684
685 case '^':
686 jsY_next(J);
687 if (jsY_accept(J, '='))
688 return TK_XOR_ASS;
689 return '^';
690
691 case 0:
692 return 0; /* EOF */
693 }
694
695 /* Handle \uXXXX escapes in identifiers */
696 jsY_unescape(J);
697 if (jsY_isidentifierstart(J->lexchar)) {
698 textinit(J);
699 textpush(J, J->lexchar);
700
701 jsY_next(J);
702 jsY_unescape(J);
703 while (jsY_isidentifierpart(J->lexchar)) {
704 textpush(J, J->lexchar);
705 jsY_next(J);
706 jsY_unescape(J);
707 }
708
709 textend(J);
710
711 return jsY_findkeyword(J, J->lexbuf.text);
712 }
713
714 if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
715 jsY_error(J, "unexpected character: '%c'", J->lexchar);
716 jsY_error(J, "unexpected character: \\u%04X", J->lexchar);
717 }
718}
719
720void jsY_initlex(js_State *J, const char *filename, const char *source)
721{
722 J->filename = filename;
723 J->source = source;
724 J->line = 1;
725 J->lasttoken = 0;
726 jsY_next(J); /* load first lookahead character */
727}
728
729int jsY_lex(js_State *J)
730{
731 return J->lasttoken = jsY_lexx(J);
732}
733
734static int lexjsonnumber(js_State *J)
735{
736 const char *s = J->source - 1;
737
738 if (J->lexchar == '-')
739 jsY_next(J);
740
741 if (J->lexchar == '0')
742 jsY_next(J);
743 else if (J->lexchar >= '1' && J->lexchar <= '9')
744 while (isdigit(J->lexchar))
745 jsY_next(J);
746 else
747 jsY_error(J, "unexpected non-digit");
748
749 if (jsY_accept(J, '.')) {
750 if (isdigit(J->lexchar))
751 while (isdigit(J->lexchar))
752 jsY_next(J);
753 else
754 jsY_error(J, "missing digits after decimal point");
755 }
756
757 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
758 if (J->lexchar == '-' || J->lexchar == '+')
759 jsY_next(J);
760 if (isdigit(J->lexchar))
761 while (isdigit(J->lexchar))
762 jsY_next(J);
763 else
764 jsY_error(J, "missing digits after exponent indicator");
765 }
766
767 J->number = js_strtod(s, NULL);
768 return TK_NUMBER;
769}
770
771static int lexjsonescape(js_State *J)
772{
773 int x = 0;
774
775 /* already consumed '\' */
776
777 switch (J->lexchar) {
778 default: jsY_error(J, "invalid escape sequence");
779 case 'u':
780 jsY_next(J);
781 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); }
782 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); }
783 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
784 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
785 textpush(J, x);
786 break;
787 case '"': textpush(J, '"'); jsY_next(J); break;
788 case '\\': textpush(J, '\\'); jsY_next(J); break;
789 case '/': textpush(J, '/'); jsY_next(J); break;
790 case 'b': textpush(J, '\b'); jsY_next(J); break;
791 case 'f': textpush(J, '\f'); jsY_next(J); break;
792 case 'n': textpush(J, '\n'); jsY_next(J); break;
793 case 'r': textpush(J, '\r'); jsY_next(J); break;
794 case 't': textpush(J, '\t'); jsY_next(J); break;
795 }
796 return 0;
797}
798
799static int lexjsonstring(js_State *J)
800{
801 const char *s;
802
803 textinit(J);
804
805 while (J->lexchar != '"') {
806 if (J->lexchar == 0)
807 jsY_error(J, "unterminated string");
808 else if (J->lexchar < 32)
809 jsY_error(J, "invalid control character in string");
810 else if (jsY_accept(J, '\\'))
811 lexjsonescape(J);
812 else {
813 textpush(J, J->lexchar);
814 jsY_next(J);
815 }
816 }
817 jsY_expect(J, '"');
818
819 s = textend(J);
820
821 J->text = js_intern(J, s);
822 return TK_STRING;
823}
824
825int jsY_lexjson(js_State *J)
826{
827 while (1) {
828 J->lexline = J->line; /* save location of beginning of token */
829
830 while (jsY_iswhite(J->lexchar) || J->lexchar == '\n')
831 jsY_next(J);
832
833 if ((J->lexchar >= '0' && J->lexchar <= '9') || J->lexchar == '-')
834 return lexjsonnumber(J);
835
836 switch (J->lexchar) {
837 case ',': jsY_next(J); return ',';
838 case ':': jsY_next(J); return ':';
839 case '[': jsY_next(J); return '[';
840 case ']': jsY_next(J); return ']';
841 case '{': jsY_next(J); return '{';
842 case '}': jsY_next(J); return '}';
843
844 case '"':
845 jsY_next(J);
846 return lexjsonstring(J);
847
848 case 'f':
849 jsY_next(J); jsY_expect(J, 'a'); jsY_expect(J, 'l'); jsY_expect(J, 's'); jsY_expect(J, 'e');
850 return TK_FALSE;
851
852 case 'n':
853 jsY_next(J); jsY_expect(J, 'u'); jsY_expect(J, 'l'); jsY_expect(J, 'l');
854 return TK_NULL;
855
856 case 't':
857 jsY_next(J); jsY_expect(J, 'r'); jsY_expect(J, 'u'); jsY_expect(J, 'e');
858 return TK_TRUE;
859
860 case 0:
861 return 0; /* EOF */
862 }
863
864 if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
865 jsY_error(J, "unexpected character: '%c'", J->lexchar);
866 jsY_error(J, "unexpected character: \\u%04X", J->lexchar);
867 }
868}
869