1/*****************************************************************************/
2/* */
3/* scanner.c */
4/* */
5/* The scanner for the ca65 macroassembler */
6/* */
7/* */
8/* */
9/* (C) 1998-2013, Ullrich von Bassewitz */
10/* Roemerstrasse 52 */
11/* D-70794 Filderstadt */
12/* EMail: uz@cc65.org */
13/* */
14/* */
15/* This software is provided 'as-is', without any expressed or implied */
16/* warranty. In no event will the authors be held liable for any damages */
17/* arising from the use of this software. */
18/* */
19/* Permission is granted to anyone to use this software for any purpose, */
20/* including commercial applications, and to alter it and redistribute it */
21/* freely, subject to the following restrictions: */
22/* */
23/* 1. The origin of this software must not be misrepresented; you must not */
24/* claim that you wrote the original software. If you use this software */
25/* in a product, an acknowledgment in the product documentation would be */
26/* appreciated but is not required. */
27/* 2. Altered source versions must be plainly marked as such, and must not */
28/* be misrepresented as being the original software. */
29/* 3. This notice may not be removed or altered from any source */
30/* distribution. */
31/* */
32/*****************************************************************************/
33
34
35
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <ctype.h>
40#include <errno.h>
41
42/* common */
43#include "addrsize.h"
44#include "attrib.h"
45#include "chartype.h"
46#include "check.h"
47#include "filestat.h"
48#include "fname.h"
49#include "xmalloc.h"
50
51/* ca65 */
52#include "condasm.h"
53#include "error.h"
54#include "filetab.h"
55#include "global.h"
56#include "incpath.h"
57#include "instr.h"
58#include "istack.h"
59#include "listing.h"
60#include "macro.h"
61#include "toklist.h"
62#include "scanner.h"
63
64
65
66/*****************************************************************************/
67/* Data */
68/*****************************************************************************/
69
70
71
72/* Current input token incl. attributes */
73Token CurTok = STATIC_TOKEN_INITIALIZER;
74
75/* Struct to handle include files. */
76typedef struct InputFile InputFile;
77struct InputFile {
78 FILE* F; /* Input file descriptor */
79 FilePos Pos; /* Position in file */
80 token_t Tok; /* Last token */
81 int C; /* Last character */
82 StrBuf Line; /* The current input line */
83 int IncSearchPath; /* True if we've added a search path */
84 int BinSearchPath; /* True if we've added a search path */
85 InputFile* Next; /* Linked list of input files */
86};
87
88/* Struct to handle textual input data */
89typedef struct InputData InputData;
90struct InputData {
91 char* Text; /* Pointer to the text data */
92 const char* Pos; /* Pointer to current position */
93 int Malloced; /* Memory was malloced */
94 token_t Tok; /* Last token */
95 int C; /* Last character */
96 InputData* Next; /* Linked list of input data */
97};
98
99/* Input source: Either file or data */
100typedef struct CharSource CharSource;
101
102/* Set of input functions */
103typedef struct CharSourceFunctions CharSourceFunctions;
104struct CharSourceFunctions {
105 void (*MarkStart) (CharSource*); /* Mark the start pos of a token */
106 void (*NextChar) (CharSource*); /* Read next char from input */
107 void (*Done) (CharSource*); /* Close input source */
108};
109
110/* Input source: Either file or data */
111struct CharSource {
112 CharSource* Next; /* Linked list of char sources */
113 token_t Tok; /* Last token */
114 int C; /* Last character */
115 const CharSourceFunctions* Func; /* Pointer to function table */
116 union {
117 InputFile File; /* File data */
118 InputData Data; /* Textual data */
119 } V;
120};
121
122/* Current input variables */
123static CharSource* Source = 0; /* Current char source */
124static unsigned FCount = 0; /* Count of input files */
125static int C = 0; /* Current input character */
126
127/* Force end of assembly */
128int ForcedEnd = 0;
129
130/* List of dot keywords with the corresponding tokens */
131struct DotKeyword {
132 const char* Key; /* MUST be first field */
133 token_t Tok;
134} DotKeywords [] = {
135 { ".A16", TOK_A16 },
136 { ".A8", TOK_A8 },
137 { ".ADDR", TOK_ADDR },
138 { ".ADDRSIZE", TOK_ADDRSIZE },
139 { ".ALIGN", TOK_ALIGN },
140 { ".AND", TOK_BOOLAND },
141 { ".ASCIIZ", TOK_ASCIIZ },
142 { ".ASIZE", TOK_ASIZE },
143 { ".ASSERT", TOK_ASSERT },
144 { ".AUTOIMPORT", TOK_AUTOIMPORT },
145 { ".BANK", TOK_BANK },
146 { ".BANKBYTE", TOK_BANKBYTE },
147 { ".BANKBYTES", TOK_BANKBYTES },
148 { ".BITAND", TOK_AND },
149 { ".BITNOT", TOK_NOT },
150 { ".BITOR", TOK_OR },
151 { ".BITXOR", TOK_XOR },
152 { ".BLANK", TOK_BLANK },
153 { ".BSS", TOK_BSS },
154 { ".BYT", TOK_BYTE },
155 { ".BYTE", TOK_BYTE },
156 { ".CASE", TOK_CASE },
157 { ".CHARMAP", TOK_CHARMAP },
158 { ".CODE", TOK_CODE },
159 { ".CONCAT", TOK_CONCAT },
160 { ".CONDES", TOK_CONDES },
161 { ".CONST", TOK_CONST },
162 { ".CONSTRUCTOR", TOK_CONSTRUCTOR },
163 { ".CPU", TOK_CPU },
164 { ".DATA", TOK_DATA },
165 { ".DBG", TOK_DBG },
166 { ".DBYT", TOK_DBYT },
167 { ".DEBUGINFO", TOK_DEBUGINFO },
168 { ".DEF", TOK_DEFINED },
169 { ".DEFINE", TOK_DEFINE },
170 { ".DEFINED", TOK_DEFINED },
171 { ".DEFINEDMACRO", TOK_DEFINEDMACRO },
172 { ".DELMAC", TOK_DELMAC },
173 { ".DELMACRO", TOK_DELMAC },
174 { ".DESTRUCTOR", TOK_DESTRUCTOR },
175 { ".DWORD", TOK_DWORD },
176 { ".ELSE", TOK_ELSE },
177 { ".ELSEIF", TOK_ELSEIF },
178 { ".END", TOK_END },
179 { ".ENDENUM", TOK_ENDENUM },
180 { ".ENDIF", TOK_ENDIF },
181 { ".ENDMAC", TOK_ENDMACRO },
182 { ".ENDMACRO", TOK_ENDMACRO },
183 { ".ENDPROC", TOK_ENDPROC },
184 { ".ENDREP", TOK_ENDREP },
185 { ".ENDREPEAT", TOK_ENDREP },
186 { ".ENDSCOPE", TOK_ENDSCOPE },
187 { ".ENDSTRUCT", TOK_ENDSTRUCT },
188 { ".ENDUNION", TOK_ENDUNION },
189 { ".ENUM", TOK_ENUM },
190 { ".ERROR", TOK_ERROR },
191 { ".EXITMAC", TOK_EXITMACRO },
192 { ".EXITMACRO", TOK_EXITMACRO },
193 { ".EXPORT", TOK_EXPORT },
194 { ".EXPORTZP", TOK_EXPORTZP },
195 { ".FARADDR", TOK_FARADDR },
196 { ".FATAL", TOK_FATAL },
197 { ".FEATURE", TOK_FEATURE },
198 { ".FILEOPT", TOK_FILEOPT },
199 { ".FOPT", TOK_FILEOPT },
200 { ".FORCEIMPORT", TOK_FORCEIMPORT },
201 { ".FORCEWORD", TOK_FORCEWORD },
202 { ".GLOBAL", TOK_GLOBAL },
203 { ".GLOBALZP", TOK_GLOBALZP },
204 { ".HIBYTE", TOK_HIBYTE },
205 { ".HIBYTES", TOK_HIBYTES },
206 { ".HIWORD", TOK_HIWORD },
207 { ".I16", TOK_I16 },
208 { ".I8", TOK_I8 },
209 { ".IDENT", TOK_MAKEIDENT },
210 { ".IF", TOK_IF },
211 { ".IFBLANK", TOK_IFBLANK },
212 { ".IFCONST", TOK_IFCONST },
213 { ".IFDEF", TOK_IFDEF },
214 { ".IFNBLANK", TOK_IFNBLANK },
215 { ".IFNCONST", TOK_IFNCONST },
216 { ".IFNDEF", TOK_IFNDEF },
217 { ".IFNREF", TOK_IFNREF },
218 { ".IFP02", TOK_IFP02 },
219 { ".IFP4510", TOK_IFP4510 },
220 { ".IFP816", TOK_IFP816 },
221 { ".IFPC02", TOK_IFPC02 },
222 { ".IFPSC02", TOK_IFPSC02 },
223 { ".IFREF", TOK_IFREF },
224 { ".IMPORT", TOK_IMPORT },
225 { ".IMPORTZP", TOK_IMPORTZP },
226 { ".INCBIN", TOK_INCBIN },
227 { ".INCLUDE", TOK_INCLUDE },
228 { ".INTERRUPTOR", TOK_INTERRUPTOR },
229 { ".ISIZE", TOK_ISIZE },
230 { ".ISMNEM", TOK_ISMNEMONIC },
231 { ".ISMNEMONIC", TOK_ISMNEMONIC },
232 { ".LEFT", TOK_LEFT },
233 { ".LINECONT", TOK_LINECONT },
234 { ".LIST", TOK_LIST },
235 { ".LISTBYTES", TOK_LISTBYTES },
236 { ".LOBYTE", TOK_LOBYTE },
237 { ".LOBYTES", TOK_LOBYTES },
238 { ".LOCAL", TOK_LOCAL },
239 { ".LOCALCHAR", TOK_LOCALCHAR },
240 { ".LOWORD", TOK_LOWORD },
241 { ".MAC", TOK_MACRO },
242 { ".MACPACK", TOK_MACPACK },
243 { ".MACRO", TOK_MACRO },
244 { ".MATCH", TOK_MATCH },
245 { ".MAX", TOK_MAX },
246 { ".MID", TOK_MID },
247 { ".MIN", TOK_MIN },
248 { ".MOD", TOK_MOD },
249 { ".NOT", TOK_BOOLNOT },
250 { ".NULL", TOK_NULL },
251 { ".OR", TOK_BOOLOR },
252 { ".ORG", TOK_ORG },
253 { ".OUT", TOK_OUT },
254 { ".P02", TOK_P02 },
255 { ".P4510", TOK_P4510 },
256 { ".P816", TOK_P816 },
257 { ".PAGELEN", TOK_PAGELENGTH },
258 { ".PAGELENGTH", TOK_PAGELENGTH },
259 { ".PARAMCOUNT", TOK_PARAMCOUNT },
260 { ".PC02", TOK_PC02 },
261 { ".POPCPU", TOK_POPCPU },
262 { ".POPSEG", TOK_POPSEG },
263 { ".PROC", TOK_PROC },
264 { ".PSC02", TOK_PSC02 },
265 { ".PUSHCPU", TOK_PUSHCPU },
266 { ".PUSHSEG", TOK_PUSHSEG },
267 { ".REF", TOK_REFERENCED },
268 { ".REFERENCED", TOK_REFERENCED },
269 { ".RELOC", TOK_RELOC },
270 { ".REPEAT", TOK_REPEAT },
271 { ".RES", TOK_RES },
272 { ".RIGHT", TOK_RIGHT },
273 { ".RODATA", TOK_RODATA },
274 { ".SCOPE", TOK_SCOPE },
275 { ".SEGMENT", TOK_SEGMENT },
276 { ".SET", TOK_SET },
277 { ".SETCPU", TOK_SETCPU },
278 { ".SHL", TOK_SHL },
279 { ".SHR", TOK_SHR },
280 { ".SIZEOF", TOK_SIZEOF },
281 { ".SMART", TOK_SMART },
282 { ".SPRINTF", TOK_SPRINTF },
283 { ".STRAT", TOK_STRAT },
284 { ".STRING", TOK_STRING },
285 { ".STRLEN", TOK_STRLEN },
286 { ".STRUCT", TOK_STRUCT },
287 { ".TAG", TOK_TAG },
288 { ".TCOUNT", TOK_TCOUNT },
289 { ".TIME", TOK_TIME },
290 { ".UNDEF", TOK_UNDEF },
291 { ".UNDEFINE", TOK_UNDEF },
292 { ".UNION", TOK_UNION },
293 { ".VERSION", TOK_VERSION },
294 { ".WARNING", TOK_WARNING },
295 { ".WORD", TOK_WORD },
296 { ".XMATCH", TOK_XMATCH },
297 { ".XOR", TOK_BOOLXOR },
298 { ".ZEROPAGE", TOK_ZEROPAGE },
299};
300
301
302
303/*****************************************************************************/
304/* CharSource functions */
305/*****************************************************************************/
306
307
308
309static void UseCharSource (CharSource* S)
310/* Initialize a new input source and start to use it. */
311{
312 /* Remember the current input char and token */
313 S->Tok = CurTok.Tok;
314 S->C = C;
315
316 /* Use the new input source */
317 S->Next = Source;
318 Source = S;
319
320 /* Read the first character from the new file */
321 S->Func->NextChar (S);
322
323 /* Setup the next token so it will be skipped on the next call to
324 ** NextRawTok().
325 */
326 CurTok.Tok = TOK_SEP;
327}
328
329
330
331static void DoneCharSource (void)
332/* Close the top level character source */
333{
334 CharSource* S;
335
336 /* First, call the type specific function */
337 Source->Func->Done (Source);
338
339 /* Restore the old token */
340 CurTok.Tok = Source->Tok;
341 C = Source->C;
342
343 /* Remember the last stacked input source */
344 S = Source->Next;
345
346 /* Delete the top level one ... */
347 xfree (Source);
348
349 /* ... and use the one before */
350 Source = S;
351}
352
353
354
355/*****************************************************************************/
356/* InputFile functions */
357/*****************************************************************************/
358
359
360
361static void IFMarkStart (CharSource* S)
362/* Mark the start of the next token */
363{
364 CurTok.Pos = S->V.File.Pos;
365}
366
367
368
369static void IFNextChar (CharSource* S)
370/* Read the next character from the input file */
371{
372 /* Check for end of line, read the next line if needed */
373 while (SB_GetIndex (&S->V.File.Line) >= SB_GetLen (&S->V.File.Line)) {
374
375 unsigned Len;
376
377 /* End of current line reached, read next line */
378 SB_Clear (&S->V.File.Line);
379 while (1) {
380
381 int N = fgetc (S->V.File.F);
382 if (N == EOF) {
383 /* End of file. Accept files without a newline at the end */
384 if (SB_NotEmpty (&S->V.File.Line)) {
385 break;
386 }
387
388 /* No more data - add an empty line to the listing. This
389 ** is a small hack needed to keep the PC output in sync.
390 */
391 NewListingLine (&EmptyStrBuf, S->V.File.Pos.Name, FCount);
392 C = EOF;
393 return;
394
395 /* Check for end of line */
396 } else if (N == '\n') {
397
398 /* End of line */
399 break;
400
401 /* Collect other stuff */
402 } else {
403
404 /* Append data to line */
405 SB_AppendChar (&S->V.File.Line, N);
406
407 }
408 }
409
410
411 /* If we come here, we have a new input line. To avoid problems
412 ** with strange line terminators, remove all whitespace from the
413 ** end of the line, then add a single newline.
414 */
415 Len = SB_GetLen (&S->V.File.Line);
416 while (Len > 0 && IsSpace (SB_AtUnchecked (&S->V.File.Line, Len-1))) {
417 --Len;
418 }
419 SB_Drop (&S->V.File.Line, SB_GetLen (&S->V.File.Line) - Len);
420 SB_AppendChar (&S->V.File.Line, '\n');
421
422 /* Terminate the string buffer */
423 SB_Terminate (&S->V.File.Line);
424
425 /* One more line */
426 S->V.File.Pos.Line++;
427
428 /* Remember the new line for the listing */
429 NewListingLine (&S->V.File.Line, S->V.File.Pos.Name, FCount);
430
431 }
432
433 /* Set the column pointer */
434 S->V.File.Pos.Col = SB_GetIndex (&S->V.File.Line);
435
436 /* Return the next character from the buffer */
437 C = SB_Get (&S->V.File.Line);
438}
439
440
441
442void IFDone (CharSource* S)
443/* Close the current input file */
444{
445 /* We're at the end of an include file. Check if we have any
446 ** open .IFs, or any open token lists in this file. This
447 ** enforcement is artificial, using conditionals that start
448 ** in one file and end in another are uncommon, and don't
449 ** allowing these things will help finding errors.
450 */
451 CheckOpenIfs ();
452
453 /* If we've added search paths for this file, remove them */
454 if (S->V.File.IncSearchPath) {
455 PopSearchPath (IncSearchPath);
456 }
457 if (S->V.File.BinSearchPath) {
458 PopSearchPath (BinSearchPath);
459 }
460
461 /* Free the line buffer */
462 SB_Done (&S->V.File.Line);
463
464 /* Close the input file and decrement the file count. We will ignore
465 ** errors here, since we were just reading from the file.
466 */
467 (void) fclose (S->V.File.F);
468 --FCount;
469}
470
471
472
473/* Set of input file handling functions */
474static const CharSourceFunctions IFFunc = {
475 IFMarkStart,
476 IFNextChar,
477 IFDone
478};
479
480
481
482int NewInputFile (const char* Name)
483/* Open a new input file. Returns true if the file could be successfully opened
484** and false otherwise.
485*/
486{
487 int RetCode = 0; /* Return code. Assume an error. */
488 char* PathName = 0;
489 FILE* F;
490 struct stat Buf;
491 StrBuf NameBuf; /* No need to initialize */
492 StrBuf Path = AUTO_STRBUF_INITIALIZER;
493 unsigned FileIdx;
494 CharSource* S;
495
496
497 /* If this is the main file, just try to open it. If it's an include file,
498 ** search for it using the include path list.
499 */
500 if (FCount == 0) {
501 /* Main file */
502 F = fopen (Name, "r");
503 if (F == 0) {
504 Fatal ("Cannot open input file '%s': %s", Name, strerror (errno));
505 }
506 } else {
507 /* We are on include level. Search for the file in the include
508 ** directories.
509 */
510 PathName = SearchFile (IncSearchPath, Name);
511 if (PathName == 0 || (F = fopen (PathName, "r")) == 0) {
512 /* Not found or cannot open, print an error and bail out */
513 Error ("Cannot open include file '%s': %s", Name, strerror (errno));
514 goto ExitPoint;
515 }
516
517 /* Use the path name from now on */
518 Name = PathName;
519 }
520
521 /* Stat the file and remember the values. There's a race condition here,
522 ** since we cannot use fileno() (non-standard identifier in standard
523 ** header file), and therefore not fstat. When using stat with the
524 ** file name, there's a risk that the file was deleted and recreated
525 ** while it was open. Since mtime and size are only used to check
526 ** if a file has changed in the debugger, we will ignore this problem
527 ** here.
528 */
529 if (FileStat (Name, &Buf) != 0) {
530 Fatal ("Cannot stat input file '%s': %s", Name, strerror (errno));
531 }
532
533 /* Add the file to the input file table and remember the index */
534 FileIdx = AddFile (SB_InitFromString (&NameBuf, Name),
535 (FCount == 0)? FT_MAIN : FT_INCLUDE,
536 Buf.st_size, (unsigned long) Buf.st_mtime);
537
538 /* Create a new input source variable and initialize it */
539 S = xmalloc (sizeof (*S));
540 S->Func = &IFFunc;
541 S->V.File.F = F;
542 S->V.File.Pos.Line = 0;
543 S->V.File.Pos.Col = 0;
544 S->V.File.Pos.Name = FileIdx;
545 SB_Init (&S->V.File.Line);
546
547 /* Push the path for this file onto the include search lists */
548 SB_CopyBuf (&Path, Name, FindName (Name) - Name);
549 SB_Terminate (&Path);
550 S->V.File.IncSearchPath = PushSearchPath (IncSearchPath, SB_GetConstBuf (&Path));
551 S->V.File.BinSearchPath = PushSearchPath (BinSearchPath, SB_GetConstBuf (&Path));
552 SB_Done (&Path);
553
554 /* Count active input files */
555 ++FCount;
556
557 /* Use this input source */
558 UseCharSource (S);
559
560 /* File successfully opened */
561 RetCode = 1;
562
563ExitPoint:
564 /* Free an allocated name buffer */
565 xfree (PathName);
566
567 /* Return the success code */
568 return RetCode;
569}
570
571
572
573/*****************************************************************************/
574/* InputData functions */
575/*****************************************************************************/
576
577
578
579static void IDMarkStart (CharSource* S attribute ((unused)))
580/* Mark the start of the next token */
581{
582 /* Nothing to do here */
583}
584
585
586
587static void IDNextChar (CharSource* S)
588/* Read the next character from the input text */
589{
590 C = *S->V.Data.Pos++;
591 if (C == '\0') {
592 /* End of input data */
593 --S->V.Data.Pos;
594 C = EOF;
595 }
596}
597
598
599
600void IDDone (CharSource* S)
601/* Close the current input data */
602{
603 /* Cleanup the current stuff */
604 if (S->V.Data.Malloced) {
605 xfree (S->V.Data.Text);
606 }
607}
608
609
610
611/* Set of input data handling functions */
612static const CharSourceFunctions IDFunc = {
613 IDMarkStart,
614 IDNextChar,
615 IDDone
616};
617
618
619
620void NewInputData (char* Text, int Malloced)
621/* Add a chunk of input data to the input stream */
622{
623 CharSource* S;
624
625 /* Create a new input source variable and initialize it */
626 S = xmalloc (sizeof (*S));
627 S->Func = &IDFunc;
628 S->V.Data.Text = Text;
629 S->V.Data.Pos = Text;
630 S->V.Data.Malloced = Malloced;
631
632 /* Use this input source */
633 UseCharSource (S);
634}
635
636
637
638/*****************************************************************************/
639/* Character classification functions */
640/*****************************************************************************/
641
642
643
644int IsIdChar (int C)
645/* Return true if the character is a valid character for an identifier */
646{
647 return IsAlNum (C) ||
648 (C == '_') ||
649 (C == '@' && AtInIdents) ||
650 (C == '$' && DollarInIdents);
651}
652
653
654
655int IsIdStart (int C)
656/* Return true if the character may start an identifier */
657{
658 return IsAlpha (C) || C == '_';
659}
660
661
662
663/*****************************************************************************/
664/* Code */
665/*****************************************************************************/
666
667
668
669static unsigned DigitVal (unsigned char C)
670/* Convert a digit into it's numerical representation */
671{
672 if (IsDigit (C)) {
673 return C - '0';
674 } else {
675 return toupper (C) - 'A' + 10;
676 }
677}
678
679
680
681static void NextChar (void)
682/* Read the next character from the input file */
683{
684 Source->Func->NextChar (Source);
685}
686
687
688
689void LocaseSVal (void)
690/* Make SVal lower case */
691{
692 SB_ToLower (&CurTok.SVal);
693}
694
695
696
697void UpcaseSVal (void)
698/* Make SVal upper case */
699{
700 SB_ToUpper (&CurTok.SVal);
701}
702
703
704
705static int CmpDotKeyword (const void* K1, const void* K2)
706/* Compare function for the dot keyword search */
707{
708 return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
709}
710
711
712
713static token_t FindDotKeyword (void)
714/* Find the dot keyword in SVal. Return the corresponding token if found,
715** return TOK_NONE if not found.
716*/
717{
718 struct DotKeyword K;
719 struct DotKeyword* R;
720
721 /* Initialize K */
722 K.Key = SB_GetConstBuf (&CurTok.SVal);
723 K.Tok = 0;
724
725 /* If we aren't in ignore case mode, we have to uppercase the keyword */
726 if (!IgnoreCase) {
727 UpcaseSVal ();
728 }
729
730 /* Search for the keyword */
731 R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
732 sizeof (DotKeywords [0]), CmpDotKeyword);
733 if (R != 0) {
734
735 /* By default, disable any somewhat experiemental DotKeyword. */
736
737 switch (R->Tok) {
738
739 case TOK_ADDRSIZE:
740 /* Disallow .ADDRSIZE function by default */
741 if (AddrSize == 0) {
742 return TOK_NONE;
743 }
744 break;
745
746 default:
747 break;
748 }
749
750 return R->Tok;
751
752 } else {
753 return TOK_NONE;
754 }
755}
756
757
758
759static void ReadIdent (void)
760/* Read an identifier from the current input position into Ident. Filling SVal
761** starts at the current position with the next character in C. It is assumed
762** that any characters already filled in are ok, and the character in C is
763** checked.
764*/
765{
766 /* Read the identifier */
767 do {
768 SB_AppendChar (&CurTok.SVal, C);
769 NextChar ();
770 } while (IsIdChar (C));
771 SB_Terminate (&CurTok.SVal);
772
773 /* If we should ignore case, convert the identifier to upper case */
774 if (IgnoreCase) {
775 UpcaseSVal ();
776 }
777}
778
779
780
781static void ReadStringConst (int StringTerm)
782/* Read a string constant into SVal. */
783{
784 /* Skip the leading string terminator */
785 NextChar ();
786
787 /* Read the string */
788 while (1) {
789 if (C == StringTerm) {
790 break;
791 }
792 if (C == '\n' || C == EOF) {
793 Error ("Newline in string constant");
794 break;
795 }
796
797 if (C == '\\' && StringEscapes) {
798 NextChar ();
799
800 switch (C) {
801 case EOF:
802 Error ("Unterminated escape sequence in string constant");
803 break;
804 case '\\':
805 case '\'':
806 case '"':
807 break;
808 case 't':
809 C = '\x09';
810 break;
811 case 'r':
812 C = '\x0D';
813 break;
814 case 'n':
815 C = '\x0A';
816 break;
817 case 'x':
818 NextChar ();
819 if (IsXDigit (C)) {
820 char high_nibble = DigitVal (C) << 4;
821 NextChar ();
822 if (IsXDigit (C)) {
823 C = high_nibble | DigitVal (C);
824 break;
825 }
826 }
827 /* FALLTHROUGH */
828 default:
829 Error ("Unsupported escape sequence in string constant");
830 break;
831 }
832 }
833
834 /* Append the char to the string */
835 SB_AppendChar (&CurTok.SVal, C);
836
837 /* Skip the character */
838 NextChar ();
839 }
840
841 /* Skip the trailing terminator */
842 NextChar ();
843
844 /* Terminate the string */
845 SB_Terminate (&CurTok.SVal);
846}
847
848
849
850static int Sweet16Reg (const StrBuf* Id)
851/* Check if the given identifier is a sweet16 register. Return -1 if this is
852** not the case, return the register number otherwise.
853*/
854{
855 unsigned RegNum;
856 char Check;
857
858 if (SB_GetLen (Id) < 2) {
859 return -1;
860 }
861 if (toupper (SB_AtUnchecked (Id, 0)) != 'R') {
862 return -1;
863 }
864 if (!IsDigit (SB_AtUnchecked (Id, 1))) {
865 return -1;
866 }
867
868 if (sscanf (SB_GetConstBuf (Id)+1, "%u%c", &RegNum, &Check) != 1 || RegNum > 15) {
869 /* Invalid register */
870 return -1;
871 }
872
873 /* The register number is valid */
874 return (int) RegNum;
875}
876
877
878
879void NextRawTok (void)
880/* Read the next raw token from the input stream */
881{
882 Macro* M;
883
884 /* If we've a forced end of assembly, don't read further */
885 if (ForcedEnd) {
886 CurTok.Tok = TOK_EOF;
887 return;
888 }
889
890Restart:
891 /* Check if we have tokens from another input source */
892 if (InputFromStack ()) {
893 if (CurTok.Tok == TOK_IDENT && (M = FindDefine (&CurTok.SVal)) != 0) {
894 /* This is a define style macro - expand it */
895 MacExpandStart (M);
896 goto Restart;
897 }
898 return;
899 }
900
901Again:
902 /* Skip whitespace, remember if we had some */
903 if ((CurTok.WS = IsBlank (C)) != 0) {
904 do {
905 NextChar ();
906 } while (IsBlank (C));
907 }
908
909 /* Mark the file position of the next token */
910 Source->Func->MarkStart (Source);
911
912 /* Clear the string attribute */
913 SB_Clear (&CurTok.SVal);
914
915 /* Generate line info for the current token */
916 NewAsmLine ();
917
918 /* Hex number or PC symbol? */
919 if (C == '$') {
920 NextChar ();
921
922 /* Hex digit must follow or DollarIsPC must be enabled */
923 if (!IsXDigit (C)) {
924 if (DollarIsPC) {
925 CurTok.Tok = TOK_PC;
926 return;
927 } else {
928 Error ("Hexadecimal digit expected");
929 }
930 }
931
932 /* Read the number */
933 CurTok.IVal = 0;
934 while (1) {
935 if (UnderlineInNumbers && C == '_') {
936 while (C == '_') {
937 NextChar ();
938 }
939 if (!IsXDigit (C)) {
940 Error ("Number may not end with underline");
941 }
942 }
943 if (IsXDigit (C)) {
944 if (CurTok.IVal & 0xF0000000) {
945 Error ("Overflow in hexadecimal number");
946 CurTok.IVal = 0;
947 }
948 CurTok.IVal = (CurTok.IVal << 4) + DigitVal (C);
949 NextChar ();
950 } else {
951 break;
952 }
953 }
954
955 /* This is an integer constant */
956 CurTok.Tok = TOK_INTCON;
957 return;
958 }
959
960 /* Binary number? */
961 if (C == '%') {
962 NextChar ();
963
964 /* 0 or 1 must follow */
965 if (!IsBDigit (C)) {
966 Error ("Binary digit expected");
967 }
968
969 /* Read the number */
970 CurTok.IVal = 0;
971 while (1) {
972 if (UnderlineInNumbers && C == '_') {
973 while (C == '_') {
974 NextChar ();
975 }
976 if (!IsBDigit (C)) {
977 Error ("Number may not end with underline");
978 }
979 }
980 if (IsBDigit (C)) {
981 if (CurTok.IVal & 0x80000000) {
982 Error ("Overflow in binary number");
983 CurTok.IVal = 0;
984 }
985 CurTok.IVal = (CurTok.IVal << 1) + DigitVal (C);
986 NextChar ();
987 } else {
988 break;
989 }
990 }
991
992 /* This is an integer constant */
993 CurTok.Tok = TOK_INTCON;
994 return;
995 }
996
997 /* Number? */
998 if (IsDigit (C)) {
999
1000 char Buf[16];
1001 unsigned Digits;
1002 unsigned Base;
1003 unsigned I;
1004 long Max;
1005 unsigned DVal;
1006
1007 /* Ignore leading zeros */
1008 while (C == '0') {
1009 NextChar ();
1010 }
1011
1012 /* Read the number into Buf counting the digits */
1013 Digits = 0;
1014 while (1) {
1015 if (UnderlineInNumbers && C == '_') {
1016 while (C == '_') {
1017 NextChar ();
1018 }
1019 if (!IsXDigit (C)) {
1020 Error ("Number may not end with underline");
1021 }
1022 }
1023 if (IsXDigit (C)) {
1024 /* Buf is big enough to allow any decimal and hex number to
1025 ** overflow, so ignore excess digits here, they will be detected
1026 ** when we convert the value.
1027 */
1028 if (Digits < sizeof (Buf)) {
1029 Buf[Digits++] = C;
1030 }
1031 NextChar ();
1032 } else {
1033 break;
1034 }
1035 }
1036
1037 /* Allow zilog/intel style hex numbers with a 'h' suffix */
1038 if (C == 'h' || C == 'H') {
1039 NextChar ();
1040 Base = 16;
1041 Max = 0xFFFFFFFFUL / 16;
1042 } else {
1043 Base = 10;
1044 Max = 0xFFFFFFFFUL / 10;
1045 }
1046
1047 /* Convert the number using the given base */
1048 CurTok.IVal = 0;
1049 for (I = 0; I < Digits; ++I) {
1050 if (CurTok.IVal > Max) {
1051 Error ("Number out of range");
1052 CurTok.IVal = 0;
1053 break;
1054 }
1055 DVal = DigitVal (Buf[I]);
1056 if (DVal >= Base) {
1057 Error ("Invalid digits in number");
1058 CurTok.IVal = 0;
1059 break;
1060 }
1061 CurTok.IVal = (CurTok.IVal * Base) + DVal;
1062 }
1063
1064 /* This is an integer constant */
1065 CurTok.Tok = TOK_INTCON;
1066 return;
1067 }
1068
1069 /* Control command? */
1070 if (C == '.') {
1071
1072 /* Remember and skip the dot */
1073 NextChar ();
1074
1075 /* Check if it's just a dot */
1076 if (!IsIdStart (C)) {
1077
1078 /* Just a dot */
1079 CurTok.Tok = TOK_DOT;
1080
1081 } else {
1082
1083 /* Read the remainder of the identifier */
1084 SB_AppendChar (&CurTok.SVal, '.');
1085 ReadIdent ();
1086
1087 /* Dot keyword, search for it */
1088 CurTok.Tok = FindDotKeyword ();
1089 if (CurTok.Tok == TOK_NONE) {
1090
1091 /* Not found */
1092 if (!LeadingDotInIdents) {
1093 /* Invalid pseudo instruction */
1094 Error ("'%m%p' is not a recognized control command", &CurTok.SVal);
1095 goto Again;
1096 }
1097
1098 /* An identifier with a dot. Check if it's a define style
1099 ** macro.
1100 */
1101 if ((M = FindDefine (&CurTok.SVal)) != 0) {
1102 /* This is a define style macro - expand it */
1103 MacExpandStart (M);
1104 goto Restart;
1105 }
1106
1107 /* Just an identifier with a dot */
1108 CurTok.Tok = TOK_IDENT;
1109 }
1110
1111 }
1112 return;
1113 }
1114
1115 /* Indirect op for sweet16 cpu. Must check this before checking for local
1116 ** symbols, because these may also use the '@' symbol.
1117 */
1118 if (CPU == CPU_SWEET16 && C == '@') {
1119 NextChar ();
1120 CurTok.Tok = TOK_AT;
1121 return;
1122 }
1123
1124 /* Local symbol? */
1125 if (C == LocalStart) {
1126
1127 /* Read the identifier. */
1128 ReadIdent ();
1129
1130 /* Start character alone is not enough */
1131 if (SB_GetLen (&CurTok.SVal) == 1) {
1132 Error ("Invalid cheap local symbol");
1133 goto Again;
1134 }
1135
1136 /* A local identifier */
1137 CurTok.Tok = TOK_LOCAL_IDENT;
1138 return;
1139 }
1140
1141
1142 /* Identifier or keyword? */
1143 if (IsIdStart (C)) {
1144
1145 /* Read the identifier */
1146 ReadIdent ();
1147
1148 /* Check for special names. Bail out if we have identified the type of
1149 ** the token. Go on if the token is an identifier.
1150 */
1151 switch (SB_GetLen (&CurTok.SVal)) {
1152 case 1:
1153 switch (toupper (SB_AtUnchecked (&CurTok.SVal, 0))) {
1154
1155 case 'A':
1156 if (C == ':') {
1157 NextChar ();
1158 CurTok.Tok = TOK_OVERRIDE_ABS;
1159 } else {
1160 CurTok.Tok = TOK_A;
1161 }
1162 return;
1163
1164 case 'F':
1165 if (C == ':') {
1166 NextChar ();
1167 CurTok.Tok = TOK_OVERRIDE_FAR;
1168 return;
1169 }
1170 break;
1171
1172 case 'S':
1173 if ((CPU == CPU_4510) || (CPU == CPU_65816)) {
1174 CurTok.Tok = TOK_S;
1175 return;
1176 }
1177 break;
1178
1179 case 'X':
1180 CurTok.Tok = TOK_X;
1181 return;
1182
1183 case 'Y':
1184 CurTok.Tok = TOK_Y;
1185 return;
1186
1187 case 'Z':
1188 if (C == ':') {
1189 NextChar ();
1190 CurTok.Tok = TOK_OVERRIDE_ZP;
1191 return;
1192 } else {
1193 if (CPU == CPU_4510) {
1194 CurTok.Tok = TOK_Z;
1195 return;
1196 }
1197 }
1198 break;
1199
1200 default:
1201 break;
1202 }
1203 break;
1204 case 2:
1205 if ((CPU == CPU_4510) &&
1206 (toupper (SB_AtUnchecked (&CurTok.SVal, 0)) == 'S') &&
1207 (toupper (SB_AtUnchecked (&CurTok.SVal, 1)) == 'P')) {
1208
1209 CurTok.Tok = TOK_S;
1210 return;
1211 }
1212 /* FALL THROUGH */
1213 default:
1214 if (CPU == CPU_SWEET16 &&
1215 (CurTok.IVal = Sweet16Reg (&CurTok.SVal)) >= 0) {
1216
1217 /* A sweet16 register number in sweet16 mode */
1218 CurTok.Tok = TOK_REG;
1219 return;
1220 }
1221 }
1222
1223 /* Check for define style macro */
1224 if ((M = FindDefine (&CurTok.SVal)) != 0) {
1225 /* Macro - expand it */
1226 MacExpandStart (M);
1227 goto Restart;
1228 } else {
1229 /* An identifier */
1230 CurTok.Tok = TOK_IDENT;
1231 }
1232 return;
1233 }
1234
1235 /* Ok, let's do the switch */
1236CharAgain:
1237 switch (C) {
1238
1239 case '+':
1240 NextChar ();
1241 CurTok.Tok = TOK_PLUS;
1242 return;
1243
1244 case '-':
1245 NextChar ();
1246 CurTok.Tok = TOK_MINUS;
1247 return;
1248
1249 case '/':
1250 NextChar ();
1251 if (C != '*') {
1252 CurTok.Tok = TOK_DIV;
1253 } else if (CComments) {
1254 /* Remember the position, then skip the '*' */
1255 Collection LineInfos = STATIC_COLLECTION_INITIALIZER;
1256 GetFullLineInfo (&LineInfos);
1257 NextChar ();
1258 do {
1259 while (C != '*') {
1260 if (C == EOF) {
1261 LIError (&LineInfos, "Unterminated comment");
1262 ReleaseFullLineInfo (&LineInfos);
1263 DoneCollection (&LineInfos);
1264 goto CharAgain;
1265 }
1266 NextChar ();
1267 }
1268 NextChar ();
1269 } while (C != '/');
1270 NextChar ();
1271 ReleaseFullLineInfo (&LineInfos);
1272 DoneCollection (&LineInfos);
1273 goto Again;
1274 }
1275 return;
1276
1277 case '*':
1278 NextChar ();
1279 CurTok.Tok = TOK_MUL;
1280 return;
1281
1282 case '^':
1283 NextChar ();
1284 CurTok.Tok = TOK_XOR;
1285 return;
1286
1287 case '&':
1288 NextChar ();
1289 if (C == '&') {
1290 NextChar ();
1291 CurTok.Tok = TOK_BOOLAND;
1292 } else {
1293 CurTok.Tok = TOK_AND;
1294 }
1295 return;
1296
1297 case '|':
1298 NextChar ();
1299 if (C == '|') {
1300 NextChar ();
1301 CurTok.Tok = TOK_BOOLOR;
1302 } else {
1303 CurTok.Tok = TOK_OR;
1304 }
1305 return;
1306
1307 case ':':
1308 NextChar ();
1309 switch (C) {
1310
1311 case ':':
1312 NextChar ();
1313 CurTok.Tok = TOK_NAMESPACE;
1314 break;
1315
1316 case '-':
1317 CurTok.IVal = 0;
1318 do {
1319 --CurTok.IVal;
1320 NextChar ();
1321 } while (C == '-');
1322 CurTok.Tok = TOK_ULABEL;
1323 break;
1324
1325 case '+':
1326 CurTok.IVal = 0;
1327 do {
1328 ++CurTok.IVal;
1329 NextChar ();
1330 } while (C == '+');
1331 CurTok.Tok = TOK_ULABEL;
1332 break;
1333
1334 case '=':
1335 NextChar ();
1336 CurTok.Tok = TOK_ASSIGN;
1337 break;
1338
1339 default:
1340 CurTok.Tok = TOK_COLON;
1341 break;
1342 }
1343 return;
1344
1345 case ',':
1346 NextChar ();
1347 CurTok.Tok = TOK_COMMA;
1348 return;
1349
1350 case ';':
1351 NextChar ();
1352 while (C != '\n' && C != EOF) {
1353 NextChar ();
1354 }
1355 goto CharAgain;
1356
1357 case '#':
1358 NextChar ();
1359 CurTok.Tok = TOK_HASH;
1360 return;
1361
1362 case '(':
1363 NextChar ();
1364 CurTok.Tok = TOK_LPAREN;
1365 return;
1366
1367 case ')':
1368 NextChar ();
1369 CurTok.Tok = TOK_RPAREN;
1370 return;
1371
1372 case '[':
1373 NextChar ();
1374 CurTok.Tok = TOK_LBRACK;
1375 return;
1376
1377 case ']':
1378 NextChar ();
1379 CurTok.Tok = TOK_RBRACK;
1380 return;
1381
1382 case '{':
1383 NextChar ();
1384 CurTok.Tok = TOK_LCURLY;
1385 return;
1386
1387 case '}':
1388 NextChar ();
1389 CurTok.Tok = TOK_RCURLY;
1390 return;
1391
1392 case '<':
1393 NextChar ();
1394 if (C == '=') {
1395 NextChar ();
1396 CurTok.Tok = TOK_LE;
1397 } else if (C == '<') {
1398 NextChar ();
1399 CurTok.Tok = TOK_SHL;
1400 } else if (C == '>') {
1401 NextChar ();
1402 CurTok.Tok = TOK_NE;
1403 } else {
1404 CurTok.Tok = TOK_LT;
1405 }
1406 return;
1407
1408 case '=':
1409 NextChar ();
1410 CurTok.Tok = TOK_EQ;
1411 return;
1412
1413 case '!':
1414 NextChar ();
1415 CurTok.Tok = TOK_BOOLNOT;
1416 return;
1417
1418 case '>':
1419 NextChar ();
1420 if (C == '=') {
1421 NextChar ();
1422 CurTok.Tok = TOK_GE;
1423 } else if (C == '>') {
1424 NextChar ();
1425 CurTok.Tok = TOK_SHR;
1426 } else {
1427 CurTok.Tok = TOK_GT;
1428 }
1429 return;
1430
1431 case '~':
1432 NextChar ();
1433 CurTok.Tok = TOK_NOT;
1434 return;
1435
1436 case '\'':
1437 /* Hack: If we allow ' as terminating character for strings, read
1438 ** the following stuff as a string, and check for a one character
1439 ** string later.
1440 */
1441 if (LooseStringTerm) {
1442 ReadStringConst ('\'');
1443 if (SB_GetLen (&CurTok.SVal) == 1) {
1444 CurTok.IVal = SB_AtUnchecked (&CurTok.SVal, 0);
1445 CurTok.Tok = TOK_CHARCON;
1446 } else {
1447 CurTok.Tok = TOK_STRCON;
1448 }
1449 } else {
1450 /* Always a character constant */
1451 NextChar ();
1452 if (C == EOF || IsControl (C)) {
1453 Error ("Illegal character constant");
1454 goto CharAgain;
1455 }
1456 CurTok.IVal = C;
1457 CurTok.Tok = TOK_CHARCON;
1458 NextChar ();
1459 if (C != '\'') {
1460 if (!MissingCharTerm) {
1461 Error ("Illegal character constant");
1462 }
1463 } else {
1464 NextChar ();
1465 }
1466 }
1467 return;
1468
1469 case '\"':
1470 ReadStringConst ('\"');
1471 CurTok.Tok = TOK_STRCON;
1472 return;
1473
1474 case '\\':
1475 /* Line continuation? */
1476 if (LineCont) {
1477 NextChar ();
1478 /* Next char should be a LF, if not, will result in an error later */
1479 if (C == '\n') {
1480 /* Ignore the '\n' */
1481 NextChar ();
1482 goto Again;
1483 } else {
1484 /* Make it clear what the problem is: */
1485 Error ("EOL expected.");
1486 }
1487 }
1488 break;
1489
1490 case '\n':
1491 NextChar ();
1492 CurTok.Tok = TOK_SEP;
1493 return;
1494
1495 case EOF:
1496 CheckInputStack ();
1497 /* In case of the main file, do not close it, but return EOF. */
1498 if (Source && Source->Next) {
1499 DoneCharSource ();
1500 goto Again;
1501 } else {
1502 CurTok.Tok = TOK_EOF;
1503 }
1504 return;
1505 }
1506
1507 /* If we go here, we could not identify the current character. Skip it
1508 ** and try again.
1509 */
1510 Error ("Invalid input character: 0x%02X", C & 0xFF);
1511 NextChar ();
1512 goto Again;
1513}
1514
1515
1516
1517int GetSubKey (const char* const* Keys, unsigned Count)
1518/* Search for a subkey in a table of keywords. The current token must be an
1519** identifier and all keys must be in upper case. The identifier will be
1520** uppercased in the process. The function returns the index of the keyword,
1521** or -1 if the keyword was not found.
1522*/
1523{
1524 unsigned I;
1525
1526 /* Must have an identifier */
1527 PRECONDITION (CurTok.Tok == TOK_IDENT);
1528
1529 /* If we aren't in ignore case mode, we have to uppercase the identifier */
1530 if (!IgnoreCase) {
1531 UpcaseSVal ();
1532 }
1533
1534 /* Do a linear search (a binary search is not worth the effort) */
1535 for (I = 0; I < Count; ++I) {
1536 if (SB_CompareStr (&CurTok.SVal, Keys [I]) == 0) {
1537 /* Found it */
1538 return I;
1539 }
1540 }
1541
1542 /* Not found */
1543 return -1;
1544}
1545
1546
1547
1548unsigned char ParseAddrSize (void)
1549/* Check if the next token is a keyword that denotes an address size specifier.
1550** If so, return the corresponding address size constant, otherwise output an
1551** error message and return ADDR_SIZE_DEFAULT.
1552*/
1553{
1554 unsigned char AddrSize;
1555
1556 /* Check for an identifier */
1557 if (CurTok.Tok != TOK_IDENT) {
1558 Error ("Address size specifier expected");
1559 return ADDR_SIZE_DEFAULT;
1560 }
1561
1562 /* Convert the attribute */
1563 AddrSize = AddrSizeFromStr (SB_GetConstBuf (&CurTok.SVal));
1564 if (AddrSize == ADDR_SIZE_INVALID) {
1565 Error ("Address size specifier expected");
1566 AddrSize = ADDR_SIZE_DEFAULT;
1567 }
1568
1569 /* Done */
1570 return AddrSize;
1571}
1572
1573
1574
1575void InitScanner (const char* InFile)
1576/* Initialize the scanner, open the given input file */
1577{
1578 /* Open the input file */
1579 NewInputFile (InFile);
1580}
1581
1582
1583
1584void DoneScanner (void)
1585/* Release scanner resources */
1586{
1587 DoneCharSource ();
1588}
1589