1 | #include "jsi.h" |
2 | #include "jslex.h" |
3 | #include "utf.h" |
4 | |
5 | JS_NORETURN static void jsY_error(js_State *J, const char *fmt, ...) JS_PRINTFLIKE(2,3); |
6 | |
7 | static void jsY_error(js_State *J, const char *fmt, ...) |
8 | { |
9 | va_list ap; |
10 | char buf[512]; |
11 | char msgbuf[256]; |
12 | |
13 | va_start(ap, fmt); |
14 | vsnprintf(msgbuf, 256, fmt, ap); |
15 | va_end(ap); |
16 | |
17 | snprintf(buf, 256, "%s:%d: " , J->filename, J->lexline); |
18 | strcat(buf, msgbuf); |
19 | |
20 | js_newsyntaxerror(J, buf); |
21 | js_throw(J); |
22 | } |
23 | |
24 | static const char *tokenstring[] = { |
25 | "(end-of-file)" , |
26 | "'\\x01'" , "'\\x02'" , "'\\x03'" , "'\\x04'" , "'\\x05'" , "'\\x06'" , "'\\x07'" , |
27 | "'\\x08'" , "'\\x09'" , "'\\x0A'" , "'\\x0B'" , "'\\x0C'" , "'\\x0D'" , "'\\x0E'" , "'\\x0F'" , |
28 | "'\\x10'" , "'\\x11'" , "'\\x12'" , "'\\x13'" , "'\\x14'" , "'\\x15'" , "'\\x16'" , "'\\x17'" , |
29 | "'\\x18'" , "'\\x19'" , "'\\x1A'" , "'\\x1B'" , "'\\x1C'" , "'\\x1D'" , "'\\x1E'" , "'\\x1F'" , |
30 | "' '" , "'!'" , "'\"'" , "'#'" , "'$'" , "'%'" , "'&'" , "'\\''" , |
31 | "'('" , "')'" , "'*'" , "'+'" , "','" , "'-'" , "'.'" , "'/'" , |
32 | "'0'" , "'1'" , "'2'" , "'3'" , "'4'" , "'5'" , "'6'" , "'7'" , |
33 | "'8'" , "'9'" , "':'" , "';'" , "'<'" , "'='" , "'>'" , "'?'" , |
34 | "'@'" , "'A'" , "'B'" , "'C'" , "'D'" , "'E'" , "'F'" , "'G'" , |
35 | "'H'" , "'I'" , "'J'" , "'K'" , "'L'" , "'M'" , "'N'" , "'O'" , |
36 | "'P'" , "'Q'" , "'R'" , "'S'" , "'T'" , "'U'" , "'V'" , "'W'" , |
37 | "'X'" , "'Y'" , "'Z'" , "'['" , "'\'" , "']'" , "'^'" , "'_'" , |
38 | "'`'" , "'a'" , "'b'" , "'c'" , "'d'" , "'e'" , "'f'" , "'g'" , |
39 | "'h'" , "'i'" , "'j'" , "'k'" , "'l'" , "'m'" , "'n'" , "'o'" , |
40 | "'p'" , "'q'" , "'r'" , "'s'" , "'t'" , "'u'" , "'v'" , "'w'" , |
41 | "'x'" , "'y'" , "'z'" , "'{'" , "'|'" , "'}'" , "'~'" , "'\\x7F'" , |
42 | |
43 | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
44 | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
45 | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
46 | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
47 | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
48 | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
49 | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
50 | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
51 | |
52 | "(identifier)" , "(number)" , "(string)" , "(regexp)" , |
53 | |
54 | "'<='" , "'>='" , "'=='" , "'!='" , "'==='" , "'!=='" , |
55 | "'<<'" , "'>>'" , "'>>>'" , "'&&'" , "'||'" , |
56 | "'+='" , "'-='" , "'*='" , "'/='" , "'%='" , |
57 | "'<<='" , "'>>='" , "'>>>='" , "'&='" , "'|='" , "'^='" , |
58 | "'++'" , "'--'" , |
59 | |
60 | "'break'" , "'case'" , "'catch'" , "'continue'" , "'debugger'" , |
61 | "'default'" , "'delete'" , "'do'" , "'else'" , "'false'" , "'finally'" , "'for'" , |
62 | "'function'" , "'if'" , "'in'" , "'instanceof'" , "'new'" , "'null'" , "'return'" , |
63 | "'switch'" , "'this'" , "'throw'" , "'true'" , "'try'" , "'typeof'" , "'var'" , |
64 | "'void'" , "'while'" , "'with'" , |
65 | }; |
66 | |
67 | const char *jsY_tokenstring(int token) |
68 | { |
69 | if (token >= 0 && token < (int)nelem(tokenstring)) |
70 | if (tokenstring[token]) |
71 | return tokenstring[token]; |
72 | return "<unknown>" ; |
73 | } |
74 | |
75 | static const char *keywords[] = { |
76 | "break" , "case" , "catch" , "continue" , "debugger" , "default" , "delete" , |
77 | "do" , "else" , "false" , "finally" , "for" , "function" , "if" , "in" , |
78 | "instanceof" , "new" , "null" , "return" , "switch" , "this" , "throw" , |
79 | "true" , "try" , "typeof" , "var" , "void" , "while" , "with" , |
80 | }; |
81 | |
82 | int jsY_findword(const char *s, const char **list, int num) |
83 | { |
84 | int l = 0; |
85 | int r = num - 1; |
86 | while (l <= r) { |
87 | int m = (l + r) >> 1; |
88 | int c = strcmp(s, list[m]); |
89 | if (c < 0) |
90 | r = m - 1; |
91 | else if (c > 0) |
92 | l = m + 1; |
93 | else |
94 | return m; |
95 | } |
96 | return -1; |
97 | } |
98 | |
99 | static int jsY_findkeyword(js_State *J, const char *s) |
100 | { |
101 | int i = jsY_findword(s, keywords, nelem(keywords)); |
102 | if (i >= 0) { |
103 | J->text = keywords[i]; |
104 | return TK_BREAK + i; /* first keyword + i */ |
105 | } |
106 | J->text = js_intern(J, s); |
107 | return TK_IDENTIFIER; |
108 | } |
109 | |
110 | int jsY_iswhite(int c) |
111 | { |
112 | return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF; |
113 | } |
114 | |
115 | int jsY_isnewline(int c) |
116 | { |
117 | return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029; |
118 | } |
119 | |
120 | #ifndef isalpha |
121 | #define isalpha(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) |
122 | #endif |
123 | #ifndef isdigit |
124 | #define isdigit(c) (c >= '0' && c <= '9') |
125 | #endif |
126 | #ifndef ishex |
127 | #define ishex(c) ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) |
128 | #endif |
129 | |
130 | static int jsY_isidentifierstart(int c) |
131 | { |
132 | return isalpha(c) || c == '$' || c == '_' || isalpharune(c); |
133 | } |
134 | |
135 | static int jsY_isidentifierpart(int c) |
136 | { |
137 | return isdigit(c) || isalpha(c) || c == '$' || c == '_' || isalpharune(c); |
138 | } |
139 | |
140 | static int jsY_isdec(int c) |
141 | { |
142 | return isdigit(c); |
143 | } |
144 | |
145 | int jsY_ishex(int c) |
146 | { |
147 | return isdigit(c) || ishex(c); |
148 | } |
149 | |
150 | int jsY_tohex(int c) |
151 | { |
152 | if (c >= '0' && c <= '9') return c - '0'; |
153 | if (c >= 'a' && c <= 'f') return c - 'a' + 0xA; |
154 | if (c >= 'A' && c <= 'F') return c - 'A' + 0xA; |
155 | return 0; |
156 | } |
157 | |
158 | static void jsY_next(js_State *J) |
159 | { |
160 | Rune c; |
161 | J->source += chartorune(&c, J->source); |
162 | /* consume CR LF as one unit */ |
163 | if (c == '\r' && *J->source == '\n') |
164 | ++J->source; |
165 | if (jsY_isnewline(c)) { |
166 | J->line++; |
167 | c = '\n'; |
168 | } |
169 | J->lexchar = c; |
170 | } |
171 | |
172 | #define jsY_accept(J, x) (J->lexchar == x ? (jsY_next(J), 1) : 0) |
173 | |
174 | #define jsY_expect(J, x) if (!jsY_accept(J, x)) jsY_error(J, "expected '%c'", x) |
175 | |
176 | static void jsY_unescape(js_State *J) |
177 | { |
178 | if (jsY_accept(J, '\\')) { |
179 | if (jsY_accept(J, 'u')) { |
180 | int x = 0; |
181 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); |
182 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); |
183 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); |
184 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar); |
185 | J->lexchar = x; |
186 | return; |
187 | } |
188 | error: |
189 | jsY_error(J, "unexpected escape sequence" ); |
190 | } |
191 | } |
192 | |
193 | static void textinit(js_State *J) |
194 | { |
195 | if (!J->lexbuf.text) { |
196 | J->lexbuf.cap = 4096; |
197 | J->lexbuf.text = js_malloc(J, J->lexbuf.cap); |
198 | } |
199 | J->lexbuf.len = 0; |
200 | } |
201 | |
202 | static void textpush(js_State *J, Rune c) |
203 | { |
204 | int n = runelen(c); |
205 | if (J->lexbuf.len + n > J->lexbuf.cap) { |
206 | J->lexbuf.cap = J->lexbuf.cap * 2; |
207 | J->lexbuf.text = js_realloc(J, J->lexbuf.text, J->lexbuf.cap); |
208 | } |
209 | J->lexbuf.len += runetochar(J->lexbuf.text + J->lexbuf.len, &c); |
210 | } |
211 | |
212 | static char *textend(js_State *J) |
213 | { |
214 | textpush(J, 0); |
215 | return J->lexbuf.text; |
216 | } |
217 | |
218 | static void (js_State *J) |
219 | { |
220 | while (J->lexchar && J->lexchar != '\n') |
221 | jsY_next(J); |
222 | } |
223 | |
224 | static int (js_State *J) |
225 | { |
226 | /* already consumed initial '/' '*' sequence */ |
227 | while (J->lexchar != 0) { |
228 | if (jsY_accept(J, '*')) { |
229 | while (J->lexchar == '*') |
230 | jsY_next(J); |
231 | if (jsY_accept(J, '/')) |
232 | return 0; |
233 | } |
234 | else |
235 | jsY_next(J); |
236 | } |
237 | return -1; |
238 | } |
239 | |
240 | static double lexhex(js_State *J) |
241 | { |
242 | double n = 0; |
243 | if (!jsY_ishex(J->lexchar)) |
244 | jsY_error(J, "malformed hexadecimal number" ); |
245 | while (jsY_ishex(J->lexchar)) { |
246 | n = n * 16 + jsY_tohex(J->lexchar); |
247 | jsY_next(J); |
248 | } |
249 | return n; |
250 | } |
251 | |
252 | #if 0 |
253 | |
254 | static double lexinteger(js_State *J) |
255 | { |
256 | double n = 0; |
257 | if (!jsY_isdec(J->lexchar)) |
258 | jsY_error(J, "malformed number" ); |
259 | while (jsY_isdec(J->lexchar)) { |
260 | n = n * 10 + (J->lexchar - '0'); |
261 | jsY_next(J); |
262 | } |
263 | return n; |
264 | } |
265 | |
266 | static double lexfraction(js_State *J) |
267 | { |
268 | double n = 0; |
269 | double d = 1; |
270 | while (jsY_isdec(J->lexchar)) { |
271 | n = n * 10 + (J->lexchar - '0'); |
272 | d = d * 10; |
273 | jsY_next(J); |
274 | } |
275 | return n / d; |
276 | } |
277 | |
278 | static double lexexponent(js_State *J) |
279 | { |
280 | double sign; |
281 | if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { |
282 | if (jsY_accept(J, '-')) sign = -1; |
283 | else if (jsY_accept(J, '+')) sign = 1; |
284 | else sign = 1; |
285 | return sign * lexinteger(J); |
286 | } |
287 | return 0; |
288 | } |
289 | |
290 | static int lexnumber(js_State *J) |
291 | { |
292 | double n; |
293 | double e; |
294 | |
295 | if (jsY_accept(J, '0')) { |
296 | if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) { |
297 | J->number = lexhex(J); |
298 | return TK_NUMBER; |
299 | } |
300 | if (jsY_isdec(J->lexchar)) |
301 | jsY_error(J, "number with leading zero" ); |
302 | n = 0; |
303 | if (jsY_accept(J, '.')) |
304 | n += lexfraction(J); |
305 | } else if (jsY_accept(J, '.')) { |
306 | if (!jsY_isdec(J->lexchar)) |
307 | return '.'; |
308 | n = lexfraction(J); |
309 | } else { |
310 | n = lexinteger(J); |
311 | if (jsY_accept(J, '.')) |
312 | n += lexfraction(J); |
313 | } |
314 | |
315 | e = lexexponent(J); |
316 | if (e < 0) |
317 | n /= pow(10, -e); |
318 | else if (e > 0) |
319 | n *= pow(10, e); |
320 | |
321 | if (jsY_isidentifierstart(J->lexchar)) |
322 | jsY_error(J, "number with letter suffix" ); |
323 | |
324 | J->number = n; |
325 | return TK_NUMBER; |
326 | } |
327 | |
328 | #else |
329 | |
330 | static int lexnumber(js_State *J) |
331 | { |
332 | const char *s = J->source - 1; |
333 | |
334 | if (jsY_accept(J, '0')) { |
335 | if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) { |
336 | J->number = lexhex(J); |
337 | return TK_NUMBER; |
338 | } |
339 | if (jsY_isdec(J->lexchar)) |
340 | jsY_error(J, "number with leading zero" ); |
341 | if (jsY_accept(J, '.')) { |
342 | while (jsY_isdec(J->lexchar)) |
343 | jsY_next(J); |
344 | } |
345 | } else if (jsY_accept(J, '.')) { |
346 | if (!jsY_isdec(J->lexchar)) |
347 | return '.'; |
348 | while (jsY_isdec(J->lexchar)) |
349 | jsY_next(J); |
350 | } else { |
351 | while (jsY_isdec(J->lexchar)) |
352 | jsY_next(J); |
353 | if (jsY_accept(J, '.')) { |
354 | while (jsY_isdec(J->lexchar)) |
355 | jsY_next(J); |
356 | } |
357 | } |
358 | |
359 | if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { |
360 | if (J->lexchar == '-' || J->lexchar == '+') |
361 | jsY_next(J); |
362 | if (jsY_isdec(J->lexchar)) |
363 | while (jsY_isdec(J->lexchar)) |
364 | jsY_next(J); |
365 | else |
366 | jsY_error(J, "missing exponent" ); |
367 | } |
368 | |
369 | if (jsY_isidentifierstart(J->lexchar)) |
370 | jsY_error(J, "number with letter suffix" ); |
371 | |
372 | J->number = js_strtod(s, NULL); |
373 | return TK_NUMBER; |
374 | } |
375 | |
376 | #endif |
377 | |
378 | static int lexescape(js_State *J) |
379 | { |
380 | int x = 0; |
381 | |
382 | /* already consumed '\' */ |
383 | |
384 | if (jsY_accept(J, '\n')) |
385 | return 0; |
386 | |
387 | switch (J->lexchar) { |
388 | case 0: jsY_error(J, "unterminated escape sequence" ); |
389 | case 'u': |
390 | jsY_next(J); |
391 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); } |
392 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); } |
393 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } |
394 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } |
395 | textpush(J, x); |
396 | break; |
397 | case 'x': |
398 | jsY_next(J); |
399 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } |
400 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } |
401 | textpush(J, x); |
402 | break; |
403 | case '0': textpush(J, 0); jsY_next(J); break; |
404 | case '\\': textpush(J, '\\'); jsY_next(J); break; |
405 | case '\'': textpush(J, '\''); jsY_next(J); break; |
406 | case '"': textpush(J, '"'); jsY_next(J); break; |
407 | case 'b': textpush(J, '\b'); jsY_next(J); break; |
408 | case 'f': textpush(J, '\f'); jsY_next(J); break; |
409 | case 'n': textpush(J, '\n'); jsY_next(J); break; |
410 | case 'r': textpush(J, '\r'); jsY_next(J); break; |
411 | case 't': textpush(J, '\t'); jsY_next(J); break; |
412 | case 'v': textpush(J, '\v'); jsY_next(J); break; |
413 | default: textpush(J, J->lexchar); jsY_next(J); break; |
414 | } |
415 | return 0; |
416 | } |
417 | |
418 | static int lexstring(js_State *J) |
419 | { |
420 | const char *s; |
421 | |
422 | int q = J->lexchar; |
423 | jsY_next(J); |
424 | |
425 | textinit(J); |
426 | |
427 | while (J->lexchar != q) { |
428 | if (J->lexchar == 0 || J->lexchar == '\n') |
429 | jsY_error(J, "string not terminated" ); |
430 | if (jsY_accept(J, '\\')) { |
431 | if (lexescape(J)) |
432 | jsY_error(J, "malformed escape sequence" ); |
433 | } else { |
434 | textpush(J, J->lexchar); |
435 | jsY_next(J); |
436 | } |
437 | } |
438 | jsY_expect(J, q); |
439 | |
440 | s = textend(J); |
441 | |
442 | J->text = js_intern(J, s); |
443 | return TK_STRING; |
444 | } |
445 | |
446 | /* the ugliest language wart ever... */ |
447 | static int isregexpcontext(int last) |
448 | { |
449 | switch (last) { |
450 | case ']': |
451 | case ')': |
452 | case '}': |
453 | case TK_IDENTIFIER: |
454 | case TK_NUMBER: |
455 | case TK_STRING: |
456 | case TK_FALSE: |
457 | case TK_NULL: |
458 | case TK_THIS: |
459 | case TK_TRUE: |
460 | return 0; |
461 | default: |
462 | return 1; |
463 | } |
464 | } |
465 | |
466 | static int lexregexp(js_State *J) |
467 | { |
468 | const char *s; |
469 | int g, m, i; |
470 | int inclass = 0; |
471 | |
472 | /* already consumed initial '/' */ |
473 | |
474 | textinit(J); |
475 | |
476 | /* regexp body */ |
477 | while (J->lexchar != '/' || inclass) { |
478 | if (J->lexchar == 0 || J->lexchar == '\n') { |
479 | jsY_error(J, "regular expression not terminated" ); |
480 | } else if (jsY_accept(J, '\\')) { |
481 | if (jsY_accept(J, '/')) { |
482 | textpush(J, '/'); |
483 | } else { |
484 | textpush(J, '\\'); |
485 | if (J->lexchar == 0 || J->lexchar == '\n') |
486 | jsY_error(J, "regular expression not terminated" ); |
487 | textpush(J, J->lexchar); |
488 | jsY_next(J); |
489 | } |
490 | } else { |
491 | if (J->lexchar == '[' && !inclass) |
492 | inclass = 1; |
493 | if (J->lexchar == ']' && inclass) |
494 | inclass = 0; |
495 | textpush(J, J->lexchar); |
496 | jsY_next(J); |
497 | } |
498 | } |
499 | jsY_expect(J, '/'); |
500 | |
501 | s = textend(J); |
502 | |
503 | /* regexp flags */ |
504 | g = i = m = 0; |
505 | |
506 | while (jsY_isidentifierpart(J->lexchar)) { |
507 | if (jsY_accept(J, 'g')) ++g; |
508 | else if (jsY_accept(J, 'i')) ++i; |
509 | else if (jsY_accept(J, 'm')) ++m; |
510 | else jsY_error(J, "illegal flag in regular expression: %c" , J->lexchar); |
511 | } |
512 | |
513 | if (g > 1 || i > 1 || m > 1) |
514 | jsY_error(J, "duplicated flag in regular expression" ); |
515 | |
516 | J->text = js_intern(J, s); |
517 | J->number = 0; |
518 | if (g) J->number += JS_REGEXP_G; |
519 | if (i) J->number += JS_REGEXP_I; |
520 | if (m) J->number += JS_REGEXP_M; |
521 | return TK_REGEXP; |
522 | } |
523 | |
524 | /* simple "return [no Line Terminator here] ..." contexts */ |
525 | static int isnlthcontext(int last) |
526 | { |
527 | switch (last) { |
528 | case TK_BREAK: |
529 | case TK_CONTINUE: |
530 | case TK_RETURN: |
531 | case TK_THROW: |
532 | return 1; |
533 | default: |
534 | return 0; |
535 | } |
536 | } |
537 | |
538 | static int jsY_lexx(js_State *J) |
539 | { |
540 | J->newline = 0; |
541 | |
542 | while (1) { |
543 | J->lexline = J->line; /* save location of beginning of token */ |
544 | |
545 | while (jsY_iswhite(J->lexchar)) |
546 | jsY_next(J); |
547 | |
548 | if (jsY_accept(J, '\n')) { |
549 | J->newline = 1; |
550 | if (isnlthcontext(J->lasttoken)) |
551 | return ';'; |
552 | continue; |
553 | } |
554 | |
555 | if (jsY_accept(J, '/')) { |
556 | if (jsY_accept(J, '/')) { |
557 | lexlinecomment(J); |
558 | continue; |
559 | } else if (jsY_accept(J, '*')) { |
560 | if (lexcomment(J)) |
561 | jsY_error(J, "multi-line comment not terminated" ); |
562 | continue; |
563 | } else if (isregexpcontext(J->lasttoken)) { |
564 | return lexregexp(J); |
565 | } else if (jsY_accept(J, '=')) { |
566 | return TK_DIV_ASS; |
567 | } else { |
568 | return '/'; |
569 | } |
570 | } |
571 | |
572 | if (J->lexchar >= '0' && J->lexchar <= '9') { |
573 | return lexnumber(J); |
574 | } |
575 | |
576 | switch (J->lexchar) { |
577 | case '(': jsY_next(J); return '('; |
578 | case ')': jsY_next(J); return ')'; |
579 | case ',': jsY_next(J); return ','; |
580 | case ':': jsY_next(J); return ':'; |
581 | case ';': jsY_next(J); return ';'; |
582 | case '?': jsY_next(J); return '?'; |
583 | case '[': jsY_next(J); return '['; |
584 | case ']': jsY_next(J); return ']'; |
585 | case '{': jsY_next(J); return '{'; |
586 | case '}': jsY_next(J); return '}'; |
587 | case '~': jsY_next(J); return '~'; |
588 | |
589 | case '\'': |
590 | case '"': |
591 | return lexstring(J); |
592 | |
593 | case '.': |
594 | return lexnumber(J); |
595 | |
596 | case '<': |
597 | jsY_next(J); |
598 | if (jsY_accept(J, '<')) { |
599 | if (jsY_accept(J, '=')) |
600 | return TK_SHL_ASS; |
601 | return TK_SHL; |
602 | } |
603 | if (jsY_accept(J, '=')) |
604 | return TK_LE; |
605 | return '<'; |
606 | |
607 | case '>': |
608 | jsY_next(J); |
609 | if (jsY_accept(J, '>')) { |
610 | if (jsY_accept(J, '>')) { |
611 | if (jsY_accept(J, '=')) |
612 | return TK_USHR_ASS; |
613 | return TK_USHR; |
614 | } |
615 | if (jsY_accept(J, '=')) |
616 | return TK_SHR_ASS; |
617 | return TK_SHR; |
618 | } |
619 | if (jsY_accept(J, '=')) |
620 | return TK_GE; |
621 | return '>'; |
622 | |
623 | case '=': |
624 | jsY_next(J); |
625 | if (jsY_accept(J, '=')) { |
626 | if (jsY_accept(J, '=')) |
627 | return TK_STRICTEQ; |
628 | return TK_EQ; |
629 | } |
630 | return '='; |
631 | |
632 | case '!': |
633 | jsY_next(J); |
634 | if (jsY_accept(J, '=')) { |
635 | if (jsY_accept(J, '=')) |
636 | return TK_STRICTNE; |
637 | return TK_NE; |
638 | } |
639 | return '!'; |
640 | |
641 | case '+': |
642 | jsY_next(J); |
643 | if (jsY_accept(J, '+')) |
644 | return TK_INC; |
645 | if (jsY_accept(J, '=')) |
646 | return TK_ADD_ASS; |
647 | return '+'; |
648 | |
649 | case '-': |
650 | jsY_next(J); |
651 | if (jsY_accept(J, '-')) |
652 | return TK_DEC; |
653 | if (jsY_accept(J, '=')) |
654 | return TK_SUB_ASS; |
655 | return '-'; |
656 | |
657 | case '*': |
658 | jsY_next(J); |
659 | if (jsY_accept(J, '=')) |
660 | return TK_MUL_ASS; |
661 | return '*'; |
662 | |
663 | case '%': |
664 | jsY_next(J); |
665 | if (jsY_accept(J, '=')) |
666 | return TK_MOD_ASS; |
667 | return '%'; |
668 | |
669 | case '&': |
670 | jsY_next(J); |
671 | if (jsY_accept(J, '&')) |
672 | return TK_AND; |
673 | if (jsY_accept(J, '=')) |
674 | return TK_AND_ASS; |
675 | return '&'; |
676 | |
677 | case '|': |
678 | jsY_next(J); |
679 | if (jsY_accept(J, '|')) |
680 | return TK_OR; |
681 | if (jsY_accept(J, '=')) |
682 | return TK_OR_ASS; |
683 | return '|'; |
684 | |
685 | case '^': |
686 | jsY_next(J); |
687 | if (jsY_accept(J, '=')) |
688 | return TK_XOR_ASS; |
689 | return '^'; |
690 | |
691 | case 0: |
692 | return 0; /* EOF */ |
693 | } |
694 | |
695 | /* Handle \uXXXX escapes in identifiers */ |
696 | jsY_unescape(J); |
697 | if (jsY_isidentifierstart(J->lexchar)) { |
698 | textinit(J); |
699 | textpush(J, J->lexchar); |
700 | |
701 | jsY_next(J); |
702 | jsY_unescape(J); |
703 | while (jsY_isidentifierpart(J->lexchar)) { |
704 | textpush(J, J->lexchar); |
705 | jsY_next(J); |
706 | jsY_unescape(J); |
707 | } |
708 | |
709 | textend(J); |
710 | |
711 | return jsY_findkeyword(J, J->lexbuf.text); |
712 | } |
713 | |
714 | if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) |
715 | jsY_error(J, "unexpected character: '%c'" , J->lexchar); |
716 | jsY_error(J, "unexpected character: \\u%04X" , J->lexchar); |
717 | } |
718 | } |
719 | |
720 | void jsY_initlex(js_State *J, const char *filename, const char *source) |
721 | { |
722 | J->filename = filename; |
723 | J->source = source; |
724 | J->line = 1; |
725 | J->lasttoken = 0; |
726 | jsY_next(J); /* load first lookahead character */ |
727 | } |
728 | |
729 | int jsY_lex(js_State *J) |
730 | { |
731 | return J->lasttoken = jsY_lexx(J); |
732 | } |
733 | |
734 | static int lexjsonnumber(js_State *J) |
735 | { |
736 | const char *s = J->source - 1; |
737 | |
738 | if (J->lexchar == '-') |
739 | jsY_next(J); |
740 | |
741 | if (J->lexchar == '0') |
742 | jsY_next(J); |
743 | else if (J->lexchar >= '1' && J->lexchar <= '9') |
744 | while (isdigit(J->lexchar)) |
745 | jsY_next(J); |
746 | else |
747 | jsY_error(J, "unexpected non-digit" ); |
748 | |
749 | if (jsY_accept(J, '.')) { |
750 | if (isdigit(J->lexchar)) |
751 | while (isdigit(J->lexchar)) |
752 | jsY_next(J); |
753 | else |
754 | jsY_error(J, "missing digits after decimal point" ); |
755 | } |
756 | |
757 | if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { |
758 | if (J->lexchar == '-' || J->lexchar == '+') |
759 | jsY_next(J); |
760 | if (isdigit(J->lexchar)) |
761 | while (isdigit(J->lexchar)) |
762 | jsY_next(J); |
763 | else |
764 | jsY_error(J, "missing digits after exponent indicator" ); |
765 | } |
766 | |
767 | J->number = js_strtod(s, NULL); |
768 | return TK_NUMBER; |
769 | } |
770 | |
771 | static int lexjsonescape(js_State *J) |
772 | { |
773 | int x = 0; |
774 | |
775 | /* already consumed '\' */ |
776 | |
777 | switch (J->lexchar) { |
778 | default: jsY_error(J, "invalid escape sequence" ); |
779 | case 'u': |
780 | jsY_next(J); |
781 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); } |
782 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); } |
783 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } |
784 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } |
785 | textpush(J, x); |
786 | break; |
787 | case '"': textpush(J, '"'); jsY_next(J); break; |
788 | case '\\': textpush(J, '\\'); jsY_next(J); break; |
789 | case '/': textpush(J, '/'); jsY_next(J); break; |
790 | case 'b': textpush(J, '\b'); jsY_next(J); break; |
791 | case 'f': textpush(J, '\f'); jsY_next(J); break; |
792 | case 'n': textpush(J, '\n'); jsY_next(J); break; |
793 | case 'r': textpush(J, '\r'); jsY_next(J); break; |
794 | case 't': textpush(J, '\t'); jsY_next(J); break; |
795 | } |
796 | return 0; |
797 | } |
798 | |
799 | static int lexjsonstring(js_State *J) |
800 | { |
801 | const char *s; |
802 | |
803 | textinit(J); |
804 | |
805 | while (J->lexchar != '"') { |
806 | if (J->lexchar == 0) |
807 | jsY_error(J, "unterminated string" ); |
808 | else if (J->lexchar < 32) |
809 | jsY_error(J, "invalid control character in string" ); |
810 | else if (jsY_accept(J, '\\')) |
811 | lexjsonescape(J); |
812 | else { |
813 | textpush(J, J->lexchar); |
814 | jsY_next(J); |
815 | } |
816 | } |
817 | jsY_expect(J, '"'); |
818 | |
819 | s = textend(J); |
820 | |
821 | J->text = js_intern(J, s); |
822 | return TK_STRING; |
823 | } |
824 | |
825 | int jsY_lexjson(js_State *J) |
826 | { |
827 | while (1) { |
828 | J->lexline = J->line; /* save location of beginning of token */ |
829 | |
830 | while (jsY_iswhite(J->lexchar) || J->lexchar == '\n') |
831 | jsY_next(J); |
832 | |
833 | if ((J->lexchar >= '0' && J->lexchar <= '9') || J->lexchar == '-') |
834 | return lexjsonnumber(J); |
835 | |
836 | switch (J->lexchar) { |
837 | case ',': jsY_next(J); return ','; |
838 | case ':': jsY_next(J); return ':'; |
839 | case '[': jsY_next(J); return '['; |
840 | case ']': jsY_next(J); return ']'; |
841 | case '{': jsY_next(J); return '{'; |
842 | case '}': jsY_next(J); return '}'; |
843 | |
844 | case '"': |
845 | jsY_next(J); |
846 | return lexjsonstring(J); |
847 | |
848 | case 'f': |
849 | jsY_next(J); jsY_expect(J, 'a'); jsY_expect(J, 'l'); jsY_expect(J, 's'); jsY_expect(J, 'e'); |
850 | return TK_FALSE; |
851 | |
852 | case 'n': |
853 | jsY_next(J); jsY_expect(J, 'u'); jsY_expect(J, 'l'); jsY_expect(J, 'l'); |
854 | return TK_NULL; |
855 | |
856 | case 't': |
857 | jsY_next(J); jsY_expect(J, 'r'); jsY_expect(J, 'u'); jsY_expect(J, 'e'); |
858 | return TK_TRUE; |
859 | |
860 | case 0: |
861 | return 0; /* EOF */ |
862 | } |
863 | |
864 | if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) |
865 | jsY_error(J, "unexpected character: '%c'" , J->lexchar); |
866 | jsY_error(J, "unexpected character: \\u%04X" , J->lexchar); |
867 | } |
868 | } |
869 | |