1 | /*****************************************************************************/ |
2 | /* */ |
3 | /* scanner.c */ |
4 | /* */ |
5 | /* Source file line info structure */ |
6 | /* */ |
7 | /* */ |
8 | /* */ |
9 | /* (C) 1998-2010, Ullrich von Bassewitz */ |
10 | /* Roemerstrasse 52 */ |
11 | /* D-70794 Filderstadt */ |
12 | /* EMail: uz@cc65.org */ |
13 | /* */ |
14 | /* */ |
15 | /* This software is provided 'as-is', without any expressed or implied */ |
16 | /* warranty. In no event will the authors be held liable for any damages */ |
17 | /* arising from the use of this software. */ |
18 | /* */ |
19 | /* Permission is granted to anyone to use this software for any purpose, */ |
20 | /* including commercial applications, and to alter it and redistribute it */ |
21 | /* freely, subject to the following restrictions: */ |
22 | /* */ |
23 | /* 1. The origin of this software must not be misrepresented; you must not */ |
24 | /* claim that you wrote the original software. If you use this software */ |
25 | /* in a product, an acknowledgment in the product documentation would be */ |
26 | /* appreciated but is not required. */ |
27 | /* 2. Altered source versions must be plainly marked as such, and must not */ |
28 | /* be misrepresented as being the original software. */ |
29 | /* 3. This notice may not be removed or altered from any source */ |
30 | /* distribution. */ |
31 | /* */ |
32 | /*****************************************************************************/ |
33 | |
34 | |
35 | |
36 | #include <stdio.h> |
37 | #include <stdlib.h> |
38 | #include <string.h> |
39 | #include <errno.h> |
40 | #include <ctype.h> |
41 | #include <math.h> |
42 | |
43 | /* common */ |
44 | #include "chartype.h" |
45 | #include "fp.h" |
46 | #include "tgttrans.h" |
47 | |
48 | /* cc65 */ |
49 | #include "datatype.h" |
50 | #include "error.h" |
51 | #include "function.h" |
52 | #include "global.h" |
53 | #include "hexval.h" |
54 | #include "ident.h" |
55 | #include "input.h" |
56 | #include "litpool.h" |
57 | #include "preproc.h" |
58 | #include "scanner.h" |
59 | #include "standard.h" |
60 | #include "symtab.h" |
61 | |
62 | |
63 | |
64 | /*****************************************************************************/ |
65 | /* data */ |
66 | /*****************************************************************************/ |
67 | |
68 | |
69 | |
70 | Token CurTok; /* The current token */ |
71 | Token NextTok; /* The next token */ |
72 | |
73 | |
74 | |
75 | /* Token types */ |
76 | enum { |
77 | TT_C89 = 0x01 << STD_C89, /* Token valid in C89 */ |
78 | TT_C99 = 0x01 << STD_C99, /* Token valid in C99 */ |
79 | TT_CC65 = 0x01 << STD_CC65 /* Token valid in cc65 */ |
80 | }; |
81 | |
82 | /* Token table */ |
83 | static const struct Keyword { |
84 | char* Key; /* Keyword name */ |
85 | unsigned char Tok; /* The token */ |
86 | unsigned char Std; /* Token supported in which standards? */ |
87 | } Keywords [] = { |
88 | { "_Pragma" , TOK_PRAGMA, TT_C89 | TT_C99 | TT_CC65 }, /* !! */ |
89 | { "__AX__" , TOK_AX, TT_C89 | TT_C99 | TT_CC65 }, |
90 | { "__A__" , TOK_A, TT_C89 | TT_C99 | TT_CC65 }, |
91 | { "__EAX__" , TOK_EAX, TT_C89 | TT_C99 | TT_CC65 }, |
92 | { "__X__" , TOK_X, TT_C89 | TT_C99 | TT_CC65 }, |
93 | { "__Y__" , TOK_Y, TT_C89 | TT_C99 | TT_CC65 }, |
94 | { "__asm__" , TOK_ASM, TT_C89 | TT_C99 | TT_CC65 }, |
95 | { "__attribute__" , TOK_ATTRIBUTE, TT_C89 | TT_C99 | TT_CC65 }, |
96 | { "__cdecl__" , TOK_CDECL, TT_C89 | TT_C99 | TT_CC65 }, |
97 | { "__far__" , TOK_FAR, TT_C89 | TT_C99 | TT_CC65 }, |
98 | { "__fastcall__" , TOK_FASTCALL, TT_C89 | TT_C99 | TT_CC65 }, |
99 | { "__inline__" , TOK_INLINE, TT_C89 | TT_C99 | TT_CC65 }, |
100 | { "__near__" , TOK_NEAR, TT_C89 | TT_C99 | TT_CC65 }, |
101 | { "asm" , TOK_ASM, TT_CC65 }, |
102 | { "auto" , TOK_AUTO, TT_C89 | TT_C99 | TT_CC65 }, |
103 | { "break" , TOK_BREAK, TT_C89 | TT_C99 | TT_CC65 }, |
104 | { "case" , TOK_CASE, TT_C89 | TT_C99 | TT_CC65 }, |
105 | { "cdecl" , TOK_CDECL, TT_CC65 }, |
106 | { "char" , TOK_CHAR, TT_C89 | TT_C99 | TT_CC65 }, |
107 | { "const" , TOK_CONST, TT_C89 | TT_C99 | TT_CC65 }, |
108 | { "continue" , TOK_CONTINUE, TT_C89 | TT_C99 | TT_CC65 }, |
109 | { "default" , TOK_DEFAULT, TT_C89 | TT_C99 | TT_CC65 }, |
110 | { "do" , TOK_DO, TT_C89 | TT_C99 | TT_CC65 }, |
111 | { "double" , TOK_DOUBLE, TT_C89 | TT_C99 | TT_CC65 }, |
112 | { "else" , TOK_ELSE, TT_C89 | TT_C99 | TT_CC65 }, |
113 | { "enum" , TOK_ENUM, TT_C89 | TT_C99 | TT_CC65 }, |
114 | { "extern" , TOK_EXTERN, TT_C89 | TT_C99 | TT_CC65 }, |
115 | { "far" , TOK_FAR, TT_CC65 }, |
116 | { "fastcall" , TOK_FASTCALL, TT_CC65 }, |
117 | { "float" , TOK_FLOAT, TT_C89 | TT_C99 | TT_CC65 }, |
118 | { "for" , TOK_FOR, TT_C89 | TT_C99 | TT_CC65 }, |
119 | { "goto" , TOK_GOTO, TT_C89 | TT_C99 | TT_CC65 }, |
120 | { "if" , TOK_IF, TT_C89 | TT_C99 | TT_CC65 }, |
121 | { "inline" , TOK_INLINE, TT_C99 | TT_CC65 }, |
122 | { "int" , TOK_INT, TT_C89 | TT_C99 | TT_CC65 }, |
123 | { "long" , TOK_LONG, TT_C89 | TT_C99 | TT_CC65 }, |
124 | { "near" , TOK_NEAR, TT_CC65 }, |
125 | { "register" , TOK_REGISTER, TT_C89 | TT_C99 | TT_CC65 }, |
126 | { "restrict" , TOK_RESTRICT, TT_C99 | TT_CC65 }, |
127 | { "return" , TOK_RETURN, TT_C89 | TT_C99 | TT_CC65 }, |
128 | { "short" , TOK_SHORT, TT_C89 | TT_C99 | TT_CC65 }, |
129 | { "signed" , TOK_SIGNED, TT_C89 | TT_C99 | TT_CC65 }, |
130 | { "sizeof" , TOK_SIZEOF, TT_C89 | TT_C99 | TT_CC65 }, |
131 | { "static" , TOK_STATIC, TT_C89 | TT_C99 | TT_CC65 }, |
132 | { "struct" , TOK_STRUCT, TT_C89 | TT_C99 | TT_CC65 }, |
133 | { "switch" , TOK_SWITCH, TT_C89 | TT_C99 | TT_CC65 }, |
134 | { "typedef" , TOK_TYPEDEF, TT_C89 | TT_C99 | TT_CC65 }, |
135 | { "union" , TOK_UNION, TT_C89 | TT_C99 | TT_CC65 }, |
136 | { "unsigned" , TOK_UNSIGNED, TT_C89 | TT_C99 | TT_CC65 }, |
137 | { "void" , TOK_VOID, TT_C89 | TT_C99 | TT_CC65 }, |
138 | { "volatile" , TOK_VOLATILE, TT_C89 | TT_C99 | TT_CC65 }, |
139 | { "while" , TOK_WHILE, TT_C89 | TT_C99 | TT_CC65 }, |
140 | }; |
141 | #define KEY_COUNT (sizeof (Keywords) / sizeof (Keywords [0])) |
142 | |
143 | |
144 | |
145 | /* Stuff for determining the type of an integer constant */ |
146 | #define IT_INT 0x01 |
147 | #define IT_UINT 0x02 |
148 | #define IT_LONG 0x04 |
149 | #define IT_ULONG 0x08 |
150 | |
151 | |
152 | |
153 | /*****************************************************************************/ |
154 | /* code */ |
155 | /*****************************************************************************/ |
156 | |
157 | |
158 | |
159 | static int CmpKey (const void* Key, const void* Elem) |
160 | /* Compare function for bsearch */ |
161 | { |
162 | return strcmp ((const char*) Key, ((const struct Keyword*) Elem)->Key); |
163 | } |
164 | |
165 | |
166 | |
167 | static token_t FindKey (const char* Key) |
168 | /* Find a keyword and return the token. Return IDENT if the token is not a |
169 | ** keyword. |
170 | */ |
171 | { |
172 | struct Keyword* K; |
173 | K = bsearch (Key, Keywords, KEY_COUNT, sizeof (Keywords [0]), CmpKey); |
174 | if (K && (K->Std & (0x01 << IS_Get (&Standard))) != 0) { |
175 | return K->Tok; |
176 | } else { |
177 | return TOK_IDENT; |
178 | } |
179 | } |
180 | |
181 | |
182 | |
183 | static int SkipWhite (void) |
184 | /* Skip white space in the input stream, reading and preprocessing new lines |
185 | ** if necessary. Return 0 if end of file is reached, return 1 otherwise. |
186 | */ |
187 | { |
188 | while (1) { |
189 | while (CurC == '\0') { |
190 | if (NextLine () == 0) { |
191 | return 0; |
192 | } |
193 | Preprocess (); |
194 | } |
195 | if (IsSpace (CurC)) { |
196 | NextChar (); |
197 | } else { |
198 | return 1; |
199 | } |
200 | } |
201 | } |
202 | |
203 | |
204 | |
205 | int TokIsFuncSpec (const Token* T) |
206 | /* Return true if the token is a function specifier */ |
207 | { |
208 | return (T->Tok == TOK_INLINE) || |
209 | (T->Tok == TOK_FASTCALL) || (T->Tok == TOK_CDECL) || |
210 | (T->Tok == TOK_NEAR) || (T->Tok == TOK_FAR); |
211 | } |
212 | |
213 | |
214 | |
215 | void SymName (char* S) |
216 | /* Read a symbol from the input stream. The first character must have been |
217 | ** checked before calling this function. The buffer is expected to be at |
218 | ** least of size MAX_IDENTLEN+1. |
219 | */ |
220 | { |
221 | unsigned Len = 0; |
222 | do { |
223 | if (Len < MAX_IDENTLEN) { |
224 | ++Len; |
225 | *S++ = CurC; |
226 | } |
227 | NextChar (); |
228 | } while (IsIdent (CurC) || IsDigit (CurC)); |
229 | *S = '\0'; |
230 | } |
231 | |
232 | |
233 | |
234 | int IsSym (char* S) |
235 | /* If a symbol follows, read it and return 1, otherwise return 0 */ |
236 | { |
237 | if (IsIdent (CurC)) { |
238 | SymName (S); |
239 | return 1; |
240 | } else { |
241 | return 0; |
242 | } |
243 | } |
244 | |
245 | |
246 | |
247 | static void UnknownChar (char C) |
248 | /* Error message for unknown character */ |
249 | { |
250 | Error ("Invalid input character with code %02X" , C & 0xFF); |
251 | NextChar (); /* Skip */ |
252 | } |
253 | |
254 | |
255 | |
256 | static void SetTok (int tok) |
257 | /* Set NextTok.Tok and bump line ptr */ |
258 | { |
259 | NextTok.Tok = tok; |
260 | NextChar (); |
261 | } |
262 | |
263 | |
264 | |
265 | static int ParseChar (void) |
266 | /* Parse a character. Converts escape chars into character codes. */ |
267 | { |
268 | int C; |
269 | int HadError; |
270 | int Count; |
271 | |
272 | /* Check for escape chars */ |
273 | if (CurC == '\\') { |
274 | NextChar (); |
275 | switch (CurC) { |
276 | case '?': |
277 | C = '\?'; |
278 | break; |
279 | case 'a': |
280 | C = '\a'; |
281 | break; |
282 | case 'b': |
283 | C = '\b'; |
284 | break; |
285 | case 'f': |
286 | C = '\f'; |
287 | break; |
288 | case 'r': |
289 | C = '\r'; |
290 | break; |
291 | case 'n': |
292 | C = '\n'; |
293 | break; |
294 | case 't': |
295 | C = '\t'; |
296 | break; |
297 | case 'v': |
298 | C = '\v'; |
299 | break; |
300 | case '\"': |
301 | C = '\"'; |
302 | break; |
303 | case '\'': |
304 | C = '\''; |
305 | break; |
306 | case '\\': |
307 | C = '\\'; |
308 | break; |
309 | case 'x': |
310 | case 'X': |
311 | /* Hex character constant */ |
312 | if (!IsXDigit (NextC)) { |
313 | Error ("\\x used with no following hex digits" ); |
314 | C = ' '; |
315 | } else { |
316 | HadError = 0; |
317 | C = 0; |
318 | while (IsXDigit (NextC)) { |
319 | if ((C << 4) >= 256) { |
320 | if (!HadError) { |
321 | Error ("Hex character constant out of range" ); |
322 | HadError = 1; |
323 | } |
324 | } else { |
325 | C = (C << 4) | HexVal (NextC); |
326 | } |
327 | NextChar (); |
328 | } |
329 | } |
330 | break; |
331 | case '0': |
332 | case '1': |
333 | case '2': |
334 | case '3': |
335 | case '4': |
336 | case '5': |
337 | case '6': |
338 | case '7': |
339 | /* Octal constant */ |
340 | Count = 1; |
341 | C = HexVal (CurC); |
342 | while (IsODigit (NextC) && Count++ < 3) { |
343 | C = (C << 3) | HexVal (NextC); |
344 | NextChar (); |
345 | } |
346 | if (C >= 256) |
347 | Error ("Octal character constant out of range" ); |
348 | break; |
349 | default: |
350 | C = CurC; |
351 | Error ("Illegal escaped character: 0x%02X" , CurC); |
352 | break; |
353 | } |
354 | } else { |
355 | C = CurC; |
356 | } |
357 | |
358 | /* Skip the character read */ |
359 | NextChar (); |
360 | |
361 | /* Do correct sign extension */ |
362 | return SignExtendChar (C); |
363 | } |
364 | |
365 | |
366 | |
367 | static void CharConst (void) |
368 | /* Parse a character constant. */ |
369 | { |
370 | int C; |
371 | |
372 | /* Skip the quote */ |
373 | NextChar (); |
374 | |
375 | /* Get character */ |
376 | C = ParseChar (); |
377 | |
378 | /* Check for closing quote */ |
379 | if (CurC != '\'') { |
380 | Error ("'\'' expected" ); |
381 | } else { |
382 | /* Skip the quote */ |
383 | NextChar (); |
384 | } |
385 | |
386 | /* Setup values and attributes */ |
387 | NextTok.Tok = TOK_CCONST; |
388 | |
389 | /* Translate into target charset */ |
390 | NextTok.IVal = SignExtendChar (TgtTranslateChar (C)); |
391 | |
392 | /* Character constants have type int */ |
393 | NextTok.Type = type_int; |
394 | } |
395 | |
396 | |
397 | |
398 | static void StringConst (void) |
399 | /* Parse a quoted string */ |
400 | { |
401 | /* String buffer */ |
402 | StrBuf S = AUTO_STRBUF_INITIALIZER; |
403 | |
404 | /* Assume next token is a string constant */ |
405 | NextTok.Tok = TOK_SCONST; |
406 | |
407 | /* Concatenate strings. If at least one of the concenated strings is a wide |
408 | ** character literal, the whole string is a wide char literal, otherwise |
409 | ** it's a normal string literal. |
410 | */ |
411 | while (1) { |
412 | |
413 | /* Check if this is a normal or a wide char string */ |
414 | if (CurC == 'L' && NextC == '\"') { |
415 | /* Wide character literal */ |
416 | NextTok.Tok = TOK_WCSCONST; |
417 | NextChar (); |
418 | NextChar (); |
419 | } else if (CurC == '\"') { |
420 | /* Skip the quote char */ |
421 | NextChar (); |
422 | } else { |
423 | /* No string */ |
424 | break; |
425 | } |
426 | |
427 | /* Read until end of string */ |
428 | while (CurC != '\"') { |
429 | if (CurC == '\0') { |
430 | Error ("Unexpected newline" ); |
431 | break; |
432 | } |
433 | SB_AppendChar (&S, ParseChar ()); |
434 | } |
435 | |
436 | /* Skip closing quote char if there was one */ |
437 | NextChar (); |
438 | |
439 | /* Skip white space, read new input */ |
440 | SkipWhite (); |
441 | |
442 | } |
443 | |
444 | /* Terminate the string */ |
445 | SB_AppendChar (&S, '\0'); |
446 | |
447 | /* Add the whole string to the literal pool */ |
448 | NextTok.SVal = AddLiteralStr (&S); |
449 | |
450 | /* Free the buffer */ |
451 | SB_Done (&S); |
452 | } |
453 | |
454 | |
455 | |
456 | static void NumericConst (void) |
457 | /* Parse a numeric constant */ |
458 | { |
459 | unsigned Base; /* Temporary number base */ |
460 | unsigned Prefix; /* Base according to prefix */ |
461 | StrBuf S = STATIC_STRBUF_INITIALIZER; |
462 | int IsFloat; |
463 | char C; |
464 | unsigned DigitVal; |
465 | unsigned long IVal; /* Value */ |
466 | |
467 | /* Check for a leading hex, octal or binary prefix and determine the |
468 | ** possible integer types. |
469 | */ |
470 | if (CurC == '0') { |
471 | /* Gobble 0 and examine next char */ |
472 | NextChar (); |
473 | if (toupper (CurC) == 'X') { |
474 | Base = Prefix = 16; |
475 | NextChar (); /* gobble "x" */ |
476 | } else if (toupper (CurC) == 'B' && IS_Get (&Standard) >= STD_CC65) { |
477 | Base = Prefix = 2; |
478 | NextChar (); /* gobble 'b' */ |
479 | } else { |
480 | Base = 10; /* Assume 10 for now - see below */ |
481 | Prefix = 8; /* Actual prefix says octal */ |
482 | } |
483 | } else { |
484 | Base = Prefix = 10; |
485 | } |
486 | |
487 | /* Because floating point numbers don't have octal prefixes (a number |
488 | ** with a leading zero is decimal), we first have to read the number |
489 | ** before converting it, so we can determine if it's a float or an |
490 | ** integer. |
491 | */ |
492 | while (IsXDigit (CurC) && HexVal (CurC) < Base) { |
493 | SB_AppendChar (&S, CurC); |
494 | NextChar (); |
495 | } |
496 | SB_Terminate (&S); |
497 | |
498 | /* The following character tells us if we have an integer or floating |
499 | ** point constant. Note: Hexadecimal floating point constants aren't |
500 | ** supported in C89. |
501 | */ |
502 | IsFloat = (CurC == '.' || |
503 | (Base == 10 && toupper (CurC) == 'E') || |
504 | (Base == 16 && toupper (CurC) == 'P' && IS_Get (&Standard) >= STD_C99)); |
505 | |
506 | /* If we don't have a floating point type, an octal prefix results in an |
507 | ** octal base. |
508 | */ |
509 | if (!IsFloat && Prefix == 8) { |
510 | Base = 8; |
511 | } |
512 | |
513 | /* Since we do now know the correct base, convert the remembered input |
514 | ** into a number. |
515 | */ |
516 | SB_Reset (&S); |
517 | IVal = 0; |
518 | while ((C = SB_Get (&S)) != '\0') { |
519 | DigitVal = HexVal (C); |
520 | if (DigitVal >= Base) { |
521 | Error ("Numeric constant contains digits beyond the radix" ); |
522 | } |
523 | IVal = (IVal * Base) + DigitVal; |
524 | } |
525 | |
526 | /* We don't need the string buffer any longer */ |
527 | SB_Done (&S); |
528 | |
529 | /* Distinguish between integer and floating point constants */ |
530 | if (!IsFloat) { |
531 | |
532 | unsigned Types; |
533 | int HaveSuffix; |
534 | |
535 | /* Check for a suffix and determine the possible types */ |
536 | HaveSuffix = 1; |
537 | if (toupper (CurC) == 'U') { |
538 | /* Unsigned type */ |
539 | NextChar (); |
540 | if (toupper (CurC) != 'L') { |
541 | Types = IT_UINT | IT_ULONG; |
542 | } else { |
543 | NextChar (); |
544 | Types = IT_ULONG; |
545 | } |
546 | } else if (toupper (CurC) == 'L') { |
547 | /* Long type */ |
548 | NextChar (); |
549 | if (toupper (CurC) != 'U') { |
550 | Types = IT_LONG | IT_ULONG; |
551 | } else { |
552 | NextChar (); |
553 | Types = IT_ULONG; |
554 | } |
555 | } else { |
556 | HaveSuffix = 0; |
557 | if (Prefix == 10) { |
558 | /* Decimal constants are of any type but uint */ |
559 | Types = IT_INT | IT_LONG | IT_ULONG; |
560 | } else { |
561 | /* Octal or hex constants are of any type */ |
562 | Types = IT_INT | IT_UINT | IT_LONG | IT_ULONG; |
563 | } |
564 | } |
565 | |
566 | /* Check the range to determine the type */ |
567 | if (IVal > 0x7FFF) { |
568 | /* Out of range for int */ |
569 | Types &= ~IT_INT; |
570 | /* If the value is in the range 0x8000..0xFFFF, unsigned int is not |
571 | ** allowed, and we don't have a type specifying suffix, emit a |
572 | ** warning, because the constant is of type long. |
573 | */ |
574 | if (IVal <= 0xFFFF && (Types & IT_UINT) == 0 && !HaveSuffix) { |
575 | Warning ("Constant is long" ); |
576 | } |
577 | } |
578 | if (IVal > 0xFFFF) { |
579 | /* Out of range for unsigned int */ |
580 | Types &= ~IT_UINT; |
581 | } |
582 | if (IVal > 0x7FFFFFFF) { |
583 | /* Out of range for long int */ |
584 | Types &= ~IT_LONG; |
585 | } |
586 | |
587 | /* Now set the type string to the smallest type in types */ |
588 | if (Types & IT_INT) { |
589 | NextTok.Type = type_int; |
590 | } else if (Types & IT_UINT) { |
591 | NextTok.Type = type_uint; |
592 | } else if (Types & IT_LONG) { |
593 | NextTok.Type = type_long; |
594 | } else { |
595 | NextTok.Type = type_ulong; |
596 | } |
597 | |
598 | /* Set the value and the token */ |
599 | NextTok.IVal = IVal; |
600 | NextTok.Tok = TOK_ICONST; |
601 | |
602 | } else { |
603 | |
604 | /* Float constant */ |
605 | Double FVal = FP_D_FromInt (IVal); /* Convert to double */ |
606 | |
607 | /* Check for a fractional part and read it */ |
608 | if (CurC == '.') { |
609 | |
610 | Double Scale; |
611 | |
612 | /* Skip the dot */ |
613 | NextChar (); |
614 | |
615 | /* Read fractional digits */ |
616 | Scale = FP_D_Make (1.0); |
617 | while (IsXDigit (CurC) && (DigitVal = HexVal (CurC)) < Base) { |
618 | /* Get the value of this digit */ |
619 | Double FracVal = FP_D_Div (FP_D_FromInt (DigitVal * Base), Scale); |
620 | /* Add it to the float value */ |
621 | FVal = FP_D_Add (FVal, FracVal); |
622 | /* Scale base */ |
623 | Scale = FP_D_Mul (Scale, FP_D_FromInt (DigitVal)); |
624 | /* Skip the digit */ |
625 | NextChar (); |
626 | } |
627 | } |
628 | |
629 | /* Check for an exponent and read it */ |
630 | if ((Base == 16 && toupper (CurC) == 'F') || |
631 | (Base == 10 && toupper (CurC) == 'E')) { |
632 | |
633 | unsigned Digits; |
634 | unsigned Exp; |
635 | |
636 | /* Skip the exponent notifier */ |
637 | NextChar (); |
638 | |
639 | /* Read an optional sign */ |
640 | if (CurC == '-') { |
641 | NextChar (); |
642 | } else if (CurC == '+') { |
643 | NextChar (); |
644 | } |
645 | |
646 | /* Read exponent digits. Since we support only 32 bit floats |
647 | ** with a maximum exponent of +-/127, we read the exponent |
648 | ** part as integer with up to 3 digits and drop the remainder. |
649 | ** This avoids an overflow of Exp. The exponent is always |
650 | ** decimal, even for hex float consts. |
651 | */ |
652 | Digits = 0; |
653 | Exp = 0; |
654 | while (IsDigit (CurC)) { |
655 | if (++Digits <= 3) { |
656 | Exp = Exp * 10 + HexVal (CurC); |
657 | } |
658 | NextChar (); |
659 | } |
660 | |
661 | /* Check for errors: We must have exponent digits, and not more |
662 | ** than three. |
663 | */ |
664 | if (Digits == 0) { |
665 | Error ("Floating constant exponent has no digits" ); |
666 | } else if (Digits > 3) { |
667 | Warning ("Floating constant exponent is too large" ); |
668 | } |
669 | |
670 | /* Scale the exponent and adjust the value accordingly */ |
671 | if (Exp) { |
672 | FVal = FP_D_Mul (FVal, FP_D_Make (pow (10, Exp))); |
673 | } |
674 | } |
675 | |
676 | /* Check for a suffix and determine the type of the constant */ |
677 | if (toupper (CurC) == 'F') { |
678 | NextChar (); |
679 | NextTok.Type = type_float; |
680 | } else { |
681 | NextTok.Type = type_double; |
682 | } |
683 | |
684 | /* Set the value and the token */ |
685 | NextTok.FVal = FVal; |
686 | NextTok.Tok = TOK_FCONST; |
687 | |
688 | } |
689 | } |
690 | |
691 | |
692 | |
693 | void NextToken (void) |
694 | /* Get next token from input stream */ |
695 | { |
696 | ident token; |
697 | |
698 | /* We have to skip white space here before shifting tokens, since the |
699 | ** tokens and the current line info is invalid at startup and will get |
700 | ** initialized by reading the first time from the file. Remember if |
701 | ** we were at end of input and handle that later. |
702 | */ |
703 | int GotEOF = (SkipWhite() == 0); |
704 | |
705 | /* Current token is the lookahead token */ |
706 | if (CurTok.LI) { |
707 | ReleaseLineInfo (CurTok.LI); |
708 | } |
709 | CurTok = NextTok; |
710 | |
711 | /* When reading the first time from the file, the line info in NextTok, |
712 | ** which was copied to CurTok is invalid. Since the information from |
713 | ** the token is used for error messages, we must make it valid. |
714 | */ |
715 | if (CurTok.LI == 0) { |
716 | CurTok.LI = UseLineInfo (GetCurLineInfo ()); |
717 | } |
718 | |
719 | /* Remember the starting position of the next token */ |
720 | NextTok.LI = UseLineInfo (GetCurLineInfo ()); |
721 | |
722 | /* Now handle end of input. */ |
723 | if (GotEOF) { |
724 | /* End of file reached */ |
725 | NextTok.Tok = TOK_CEOF; |
726 | return; |
727 | } |
728 | |
729 | /* Determine the next token from the lookahead */ |
730 | if (IsDigit (CurC) || (CurC == '.' && IsDigit (NextC))) { |
731 | /* A number */ |
732 | NumericConst (); |
733 | return; |
734 | } |
735 | |
736 | /* Check for wide character literals */ |
737 | if (CurC == 'L' && NextC == '\"') { |
738 | StringConst (); |
739 | return; |
740 | } |
741 | |
742 | /* Check for keywords and identifiers */ |
743 | if (IsSym (token)) { |
744 | |
745 | /* Check for a keyword */ |
746 | if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) { |
747 | /* Reserved word found */ |
748 | return; |
749 | } |
750 | /* No reserved word, check for special symbols */ |
751 | if (token[0] == '_' && token[1] == '_') { |
752 | /* Special symbols */ |
753 | if (strcmp (token+2, "FILE__" ) == 0) { |
754 | NextTok.SVal = AddLiteral (GetCurrentFile()); |
755 | NextTok.Tok = TOK_SCONST; |
756 | return; |
757 | } else if (strcmp (token+2, "LINE__" ) == 0) { |
758 | NextTok.Tok = TOK_ICONST; |
759 | NextTok.IVal = GetCurrentLine(); |
760 | NextTok.Type = type_int; |
761 | return; |
762 | } else if (strcmp (token+2, "func__" ) == 0) { |
763 | /* __func__ is only defined in functions */ |
764 | if (CurrentFunc) { |
765 | NextTok.SVal = AddLiteral (F_GetFuncName (CurrentFunc)); |
766 | NextTok.Tok = TOK_SCONST; |
767 | return; |
768 | } |
769 | } |
770 | } |
771 | |
772 | /* No reserved word but identifier */ |
773 | strcpy (NextTok.Ident, token); |
774 | NextTok.Tok = TOK_IDENT; |
775 | return; |
776 | } |
777 | |
778 | /* Monstrous switch statement ahead... */ |
779 | switch (CurC) { |
780 | |
781 | case '!': |
782 | NextChar (); |
783 | if (CurC == '=') { |
784 | SetTok (TOK_NE); |
785 | } else { |
786 | NextTok.Tok = TOK_BOOL_NOT; |
787 | } |
788 | break; |
789 | |
790 | case '\"': |
791 | StringConst (); |
792 | break; |
793 | |
794 | case '%': |
795 | NextChar (); |
796 | if (CurC == '=') { |
797 | SetTok (TOK_MOD_ASSIGN); |
798 | } else { |
799 | NextTok.Tok = TOK_MOD; |
800 | } |
801 | break; |
802 | |
803 | case '&': |
804 | NextChar (); |
805 | switch (CurC) { |
806 | case '&': |
807 | SetTok (TOK_BOOL_AND); |
808 | break; |
809 | case '=': |
810 | SetTok (TOK_AND_ASSIGN); |
811 | break; |
812 | default: |
813 | NextTok.Tok = TOK_AND; |
814 | } |
815 | break; |
816 | |
817 | case '\'': |
818 | CharConst (); |
819 | break; |
820 | |
821 | case '(': |
822 | SetTok (TOK_LPAREN); |
823 | break; |
824 | |
825 | case ')': |
826 | SetTok (TOK_RPAREN); |
827 | break; |
828 | |
829 | case '*': |
830 | NextChar (); |
831 | if (CurC == '=') { |
832 | SetTok (TOK_MUL_ASSIGN); |
833 | } else { |
834 | NextTok.Tok = TOK_STAR; |
835 | } |
836 | break; |
837 | |
838 | case '+': |
839 | NextChar (); |
840 | switch (CurC) { |
841 | case '+': |
842 | SetTok (TOK_INC); |
843 | break; |
844 | case '=': |
845 | SetTok (TOK_PLUS_ASSIGN); |
846 | break; |
847 | default: |
848 | NextTok.Tok = TOK_PLUS; |
849 | } |
850 | break; |
851 | |
852 | case ',': |
853 | SetTok (TOK_COMMA); |
854 | break; |
855 | |
856 | case '-': |
857 | NextChar (); |
858 | switch (CurC) { |
859 | case '-': |
860 | SetTok (TOK_DEC); |
861 | break; |
862 | case '=': |
863 | SetTok (TOK_MINUS_ASSIGN); |
864 | break; |
865 | case '>': |
866 | SetTok (TOK_PTR_REF); |
867 | break; |
868 | default: |
869 | NextTok.Tok = TOK_MINUS; |
870 | } |
871 | break; |
872 | |
873 | case '.': |
874 | NextChar (); |
875 | if (CurC == '.') { |
876 | NextChar (); |
877 | if (CurC == '.') { |
878 | SetTok (TOK_ELLIPSIS); |
879 | } else { |
880 | UnknownChar (CurC); |
881 | } |
882 | } else { |
883 | NextTok.Tok = TOK_DOT; |
884 | } |
885 | break; |
886 | |
887 | case '/': |
888 | NextChar (); |
889 | if (CurC == '=') { |
890 | SetTok (TOK_DIV_ASSIGN); |
891 | } else { |
892 | NextTok.Tok = TOK_DIV; |
893 | } |
894 | break; |
895 | |
896 | case ':': |
897 | SetTok (TOK_COLON); |
898 | break; |
899 | |
900 | case ';': |
901 | SetTok (TOK_SEMI); |
902 | break; |
903 | |
904 | case '<': |
905 | NextChar (); |
906 | switch (CurC) { |
907 | case '=': |
908 | SetTok (TOK_LE); |
909 | break; |
910 | case '<': |
911 | NextChar (); |
912 | if (CurC == '=') { |
913 | SetTok (TOK_SHL_ASSIGN); |
914 | } else { |
915 | NextTok.Tok = TOK_SHL; |
916 | } |
917 | break; |
918 | default: |
919 | NextTok.Tok = TOK_LT; |
920 | } |
921 | break; |
922 | |
923 | case '=': |
924 | NextChar (); |
925 | if (CurC == '=') { |
926 | SetTok (TOK_EQ); |
927 | } else { |
928 | NextTok.Tok = TOK_ASSIGN; |
929 | } |
930 | break; |
931 | |
932 | case '>': |
933 | NextChar (); |
934 | switch (CurC) { |
935 | case '=': |
936 | SetTok (TOK_GE); |
937 | break; |
938 | case '>': |
939 | NextChar (); |
940 | if (CurC == '=') { |
941 | SetTok (TOK_SHR_ASSIGN); |
942 | } else { |
943 | NextTok.Tok = TOK_SHR; |
944 | } |
945 | break; |
946 | default: |
947 | NextTok.Tok = TOK_GT; |
948 | } |
949 | break; |
950 | |
951 | case '?': |
952 | SetTok (TOK_QUEST); |
953 | break; |
954 | |
955 | case '[': |
956 | SetTok (TOK_LBRACK); |
957 | break; |
958 | |
959 | case ']': |
960 | SetTok (TOK_RBRACK); |
961 | break; |
962 | |
963 | case '^': |
964 | NextChar (); |
965 | if (CurC == '=') { |
966 | SetTok (TOK_XOR_ASSIGN); |
967 | } else { |
968 | NextTok.Tok = TOK_XOR; |
969 | } |
970 | break; |
971 | |
972 | case '{': |
973 | SetTok (TOK_LCURLY); |
974 | break; |
975 | |
976 | case '|': |
977 | NextChar (); |
978 | switch (CurC) { |
979 | case '|': |
980 | SetTok (TOK_BOOL_OR); |
981 | break; |
982 | case '=': |
983 | SetTok (TOK_OR_ASSIGN); |
984 | break; |
985 | default: |
986 | NextTok.Tok = TOK_OR; |
987 | } |
988 | break; |
989 | |
990 | case '}': |
991 | SetTok (TOK_RCURLY); |
992 | break; |
993 | |
994 | case '~': |
995 | SetTok (TOK_COMP); |
996 | break; |
997 | |
998 | default: |
999 | UnknownChar (CurC); |
1000 | |
1001 | } |
1002 | |
1003 | } |
1004 | |
1005 | |
1006 | |
1007 | void SkipTokens (const token_t* TokenList, unsigned TokenCount) |
1008 | /* Skip tokens until we reach TOK_CEOF or a token in the given token list. |
1009 | ** This routine is used for error recovery. |
1010 | */ |
1011 | { |
1012 | while (CurTok.Tok != TOK_CEOF) { |
1013 | |
1014 | /* Check if the current token is in the token list */ |
1015 | unsigned I; |
1016 | for (I = 0; I < TokenCount; ++I) { |
1017 | if (CurTok.Tok == TokenList[I]) { |
1018 | /* Found a token in the list */ |
1019 | return; |
1020 | } |
1021 | } |
1022 | |
1023 | /* Not in the list: Skip it */ |
1024 | NextToken (); |
1025 | |
1026 | } |
1027 | } |
1028 | |
1029 | |
1030 | |
1031 | int Consume (token_t Token, const char* ErrorMsg) |
1032 | /* Eat token if it is the next in the input stream, otherwise print an error |
1033 | ** message. Returns true if the token was found and false otherwise. |
1034 | */ |
1035 | { |
1036 | if (CurTok.Tok == Token) { |
1037 | NextToken (); |
1038 | return 1; |
1039 | } else { |
1040 | Error ("%s" , ErrorMsg); |
1041 | return 0; |
1042 | } |
1043 | } |
1044 | |
1045 | |
1046 | |
1047 | int ConsumeColon (void) |
1048 | /* Check for a colon and skip it. */ |
1049 | { |
1050 | return Consume (TOK_COLON, "':' expected" ); |
1051 | } |
1052 | |
1053 | |
1054 | |
1055 | int ConsumeSemi (void) |
1056 | /* Check for a semicolon and skip it. */ |
1057 | { |
1058 | /* Try do be smart about typos... */ |
1059 | if (CurTok.Tok == TOK_SEMI) { |
1060 | NextToken (); |
1061 | return 1; |
1062 | } else { |
1063 | Error ("';' expected" ); |
1064 | if (CurTok.Tok == TOK_COLON || CurTok.Tok == TOK_COMMA) { |
1065 | NextToken (); |
1066 | } |
1067 | return 0; |
1068 | } |
1069 | } |
1070 | |
1071 | |
1072 | |
1073 | int ConsumeComma (void) |
1074 | /* Check for a comma and skip it. */ |
1075 | { |
1076 | /* Try do be smart about typos... */ |
1077 | if (CurTok.Tok == TOK_COMMA) { |
1078 | NextToken (); |
1079 | return 1; |
1080 | } else { |
1081 | Error ("',' expected" ); |
1082 | if (CurTok.Tok == TOK_SEMI) { |
1083 | NextToken (); |
1084 | } |
1085 | return 0; |
1086 | } |
1087 | } |
1088 | |
1089 | |
1090 | |
1091 | int ConsumeLParen (void) |
1092 | /* Check for a left parenthesis and skip it */ |
1093 | { |
1094 | return Consume (TOK_LPAREN, "'(' expected" ); |
1095 | } |
1096 | |
1097 | |
1098 | |
1099 | int ConsumeRParen (void) |
1100 | /* Check for a right parenthesis and skip it */ |
1101 | { |
1102 | return Consume (TOK_RPAREN, "')' expected" ); |
1103 | } |
1104 | |
1105 | |
1106 | |
1107 | int ConsumeLBrack (void) |
1108 | /* Check for a left bracket and skip it */ |
1109 | { |
1110 | return Consume (TOK_LBRACK, "'[' expected" ); |
1111 | } |
1112 | |
1113 | |
1114 | |
1115 | int ConsumeRBrack (void) |
1116 | /* Check for a right bracket and skip it */ |
1117 | { |
1118 | return Consume (TOK_RBRACK, "']' expected" ); |
1119 | } |
1120 | |
1121 | |
1122 | |
1123 | int ConsumeLCurly (void) |
1124 | /* Check for a left curly brace and skip it */ |
1125 | { |
1126 | return Consume (TOK_LCURLY, "'{' expected" ); |
1127 | } |
1128 | |
1129 | |
1130 | |
1131 | int ConsumeRCurly (void) |
1132 | /* Check for a right curly brace and skip it */ |
1133 | { |
1134 | return Consume (TOK_RCURLY, "'}' expected" ); |
1135 | } |
1136 | |