scanner.c source code [cc65/src/ca65/scanner.c]

1	/***************************************************************************/
2	/ /
3	/ scanner.c /
4	/ /
5	/ The scanner for the ca65 macroassembler /
6	/ /
7	/ /
8	/ /
9	/ (C) 1998-2013, Ullrich von Bassewitz /
10	/ Roemerstrasse 52 /
11	/ D-70794 Filderstadt /
12	/ EMail: uz@cc65.org /
13	/ /
14	/ /
15	/ This software is provided 'as-is', without any expressed or implied /
16	/ warranty. In no event will the authors be held liable for any damages /
17	/ arising from the use of this software. /
18	/ /
19	/ Permission is granted to anyone to use this software for any purpose, /
20	/ including commercial applications, and to alter it and redistribute it /
21	/ freely, subject to the following restrictions: /
22	/ /
23	/ 1. The origin of this software must not be misrepresented; you must not /
24	/ claim that you wrote the original software. If you use this software /
25	/ in a product, an acknowledgment in the product documentation would be /
26	/ appreciated but is not required. /
27	/ 2. Altered source versions must be plainly marked as such, and must not /
28	/ be misrepresented as being the original software. /
29	/ 3. This notice may not be removed or altered from any source /
30	/ distribution. /
31	/ /
32	/***************************************************************************/
33
34
35
36	#include <stdio.h>
37	#include <stdlib.h>
38	#include <string.h>
39	#include <ctype.h>
40	#include <errno.h>
41
42	/ common /
43	#include "addrsize.h"
44	#include "attrib.h"
45	#include "chartype.h"
46	#include "check.h"
47	#include "filestat.h"
48	#include "fname.h"
49	#include "xmalloc.h"
50
51	/ ca65 /
52	#include "condasm.h"
53	#include "error.h"
54	#include "filetab.h"
55	#include "global.h"
56	#include "incpath.h"
57	#include "instr.h"
58	#include "istack.h"
59	#include "listing.h"
60	#include "macro.h"
61	#include "toklist.h"
62	#include "scanner.h"
63
64
65
66	/***************************************************************************/
67	/ Data /
68	/***************************************************************************/
69
70
71
72	/ Current input token incl. attributes /
73	Token CurTok = STATIC_TOKEN_INITIALIZER;
74
75	/ Struct to handle include files. /
76	typedef struct InputFile InputFile;
77	struct InputFile {
78	FILE* F; / Input file descriptor /
79	FilePos Pos; / Position in file /
80	token_t Tok; / Last token /
81	int C; / Last character /
82	StrBuf Line; / The current input line /
83	int IncSearchPath; / True if we've added a search path /
84	int BinSearchPath; / True if we've added a search path /
85	InputFile* Next; / Linked list of input files /
86	};
87
88	/ Struct to handle textual input data /
89	typedef struct InputData InputData;
90	struct InputData {
91	char* Text; / Pointer to the text data /
92	const char* Pos; / Pointer to current position /
93	int Malloced; / Memory was malloced /
94	token_t Tok; / Last token /
95	int C; / Last character /
96	InputData* Next; / Linked list of input data /
97	};
98
99	/ Input source: Either file or data /
100	typedef struct CharSource CharSource;
101
102	/ Set of input functions /
103	typedef struct CharSourceFunctions CharSourceFunctions;
104	struct CharSourceFunctions {
105	void (MarkStart) (CharSource); / Mark the start pos of a token /
106	void (NextChar) (CharSource); / Read next char from input /
107	void (Done) (CharSource); / Close input source /
108	};
109
110	/ Input source: Either file or data /
111	struct CharSource {
112	CharSource* Next; / Linked list of char sources /
113	token_t Tok; / Last token /
114	int C; / Last character /
115	const CharSourceFunctions* Func; / Pointer to function table /
116	union {
117	InputFile File; / File data /
118	InputData Data; / Textual data /
119	} V;
120	};
121
122	/ Current input variables /
123	static CharSource* Source = `0`; / Current char source /
124	static unsigned FCount = `0`; / Count of input files /
125	static int C = `0`; / Current input character /
126
127	/ Force end of assembly /
128	int ForcedEnd = `0`;
129
130	/ List of dot keywords with the corresponding tokens /
131	struct DotKeyword {
132	const char* Key; / MUST be first field /
133	token_t Tok;
134	} DotKeywords [] = {
135	{ ".A16", TOK_A16 },
136	{ ".A8", TOK_A8 },
137	{ ".ADDR", TOK_ADDR },
138	{ ".ADDRSIZE", TOK_ADDRSIZE },
139	{ ".ALIGN", TOK_ALIGN },
140	{ ".AND", TOK_BOOLAND },
141	{ ".ASCIIZ", TOK_ASCIIZ },
142	{ ".ASIZE", TOK_ASIZE },
143	{ ".ASSERT", TOK_ASSERT },
144	{ ".AUTOIMPORT", TOK_AUTOIMPORT },
145	{ ".BANK", TOK_BANK },
146	{ ".BANKBYTE", TOK_BANKBYTE },
147	{ ".BANKBYTES", TOK_BANKBYTES },
148	{ ".BITAND", TOK_AND },
149	{ ".BITNOT", TOK_NOT },
150	{ ".BITOR", TOK_OR },
151	{ ".BITXOR", TOK_XOR },
152	{ ".BLANK", TOK_BLANK },
153	{ ".BSS", TOK_BSS },
154	{ ".BYT", TOK_BYTE },
155	{ ".BYTE", TOK_BYTE },
156	{ ".CASE", TOK_CASE },
157	{ ".CHARMAP", TOK_CHARMAP },
158	{ ".CODE", TOK_CODE },
159	{ ".CONCAT", TOK_CONCAT },
160	{ ".CONDES", TOK_CONDES },
161	{ ".CONST", TOK_CONST },
162	{ ".CONSTRUCTOR", TOK_CONSTRUCTOR },
163	{ ".CPU", TOK_CPU },
164	{ ".DATA", TOK_DATA },
165	{ ".DBG", TOK_DBG },
166	{ ".DBYT", TOK_DBYT },
167	{ ".DEBUGINFO", TOK_DEBUGINFO },
168	{ ".DEF", TOK_DEFINED },
169	{ ".DEFINE", TOK_DEFINE },
170	{ ".DEFINED", TOK_DEFINED },
171	{ ".DEFINEDMACRO", TOK_DEFINEDMACRO },
172	{ ".DELMAC", TOK_DELMAC },
173	{ ".DELMACRO", TOK_DELMAC },
174	{ ".DESTRUCTOR", TOK_DESTRUCTOR },
175	{ ".DWORD", TOK_DWORD },
176	{ ".ELSE", TOK_ELSE },
177	{ ".ELSEIF", TOK_ELSEIF },
178	{ ".END", TOK_END },
179	{ ".ENDENUM", TOK_ENDENUM },
180	{ ".ENDIF", TOK_ENDIF },
181	{ ".ENDMAC", TOK_ENDMACRO },
182	{ ".ENDMACRO", TOK_ENDMACRO },
183	{ ".ENDPROC", TOK_ENDPROC },
184	{ ".ENDREP", TOK_ENDREP },
185	{ ".ENDREPEAT", TOK_ENDREP },
186	{ ".ENDSCOPE", TOK_ENDSCOPE },
187	{ ".ENDSTRUCT", TOK_ENDSTRUCT },
188	{ ".ENDUNION", TOK_ENDUNION },
189	{ ".ENUM", TOK_ENUM },
190	{ ".ERROR", TOK_ERROR },
191	{ ".EXITMAC", TOK_EXITMACRO },
192	{ ".EXITMACRO", TOK_EXITMACRO },
193	{ ".EXPORT", TOK_EXPORT },
194	{ ".EXPORTZP", TOK_EXPORTZP },
195	{ ".FARADDR", TOK_FARADDR },
196	{ ".FATAL", TOK_FATAL },
197	{ ".FEATURE", TOK_FEATURE },
198	{ ".FILEOPT", TOK_FILEOPT },
199	{ ".FOPT", TOK_FILEOPT },
200	{ ".FORCEIMPORT", TOK_FORCEIMPORT },
201	{ ".FORCEWORD", TOK_FORCEWORD },
202	{ ".GLOBAL", TOK_GLOBAL },
203	{ ".GLOBALZP", TOK_GLOBALZP },
204	{ ".HIBYTE", TOK_HIBYTE },
205	{ ".HIBYTES", TOK_HIBYTES },
206	{ ".HIWORD", TOK_HIWORD },
207	{ ".I16", TOK_I16 },
208	{ ".I8", TOK_I8 },
209	{ ".IDENT", TOK_MAKEIDENT },
210	{ ".IF", TOK_IF },
211	{ ".IFBLANK", TOK_IFBLANK },
212	{ ".IFCONST", TOK_IFCONST },
213	{ ".IFDEF", TOK_IFDEF },
214	{ ".IFNBLANK", TOK_IFNBLANK },
215	{ ".IFNCONST", TOK_IFNCONST },
216	{ ".IFNDEF", TOK_IFNDEF },
217	{ ".IFNREF", TOK_IFNREF },
218	{ ".IFP02", TOK_IFP02 },
219	{ ".IFP4510", TOK_IFP4510 },
220	{ ".IFP816", TOK_IFP816 },
221	{ ".IFPC02", TOK_IFPC02 },
222	{ ".IFPSC02", TOK_IFPSC02 },
223	{ ".IFREF", TOK_IFREF },
224	{ ".IMPORT", TOK_IMPORT },
225	{ ".IMPORTZP", TOK_IMPORTZP },
226	{ ".INCBIN", TOK_INCBIN },
227	{ ".INCLUDE", TOK_INCLUDE },
228	{ ".INTERRUPTOR", TOK_INTERRUPTOR },
229	{ ".ISIZE", TOK_ISIZE },
230	{ ".ISMNEM", TOK_ISMNEMONIC },
231	{ ".ISMNEMONIC", TOK_ISMNEMONIC },
232	{ ".LEFT", TOK_LEFT },
233	{ ".LINECONT", TOK_LINECONT },
234	{ ".LIST", TOK_LIST },
235	{ ".LISTBYTES", TOK_LISTBYTES },
236	{ ".LOBYTE", TOK_LOBYTE },
237	{ ".LOBYTES", TOK_LOBYTES },
238	{ ".LOCAL", TOK_LOCAL },
239	{ ".LOCALCHAR", TOK_LOCALCHAR },
240	{ ".LOWORD", TOK_LOWORD },
241	{ ".MAC", TOK_MACRO },
242	{ ".MACPACK", TOK_MACPACK },
243	{ ".MACRO", TOK_MACRO },
244	{ ".MATCH", TOK_MATCH },
245	{ ".MAX", TOK_MAX },
246	{ ".MID", TOK_MID },
247	{ ".MIN", TOK_MIN },
248	{ ".MOD", TOK_MOD },
249	{ ".NOT", TOK_BOOLNOT },
250	{ ".NULL", TOK_NULL },
251	{ ".OR", TOK_BOOLOR },
252	{ ".ORG", TOK_ORG },
253	{ ".OUT", TOK_OUT },
254	{ ".P02", TOK_P02 },
255	{ ".P4510", TOK_P4510 },
256	{ ".P816", TOK_P816 },
257	{ ".PAGELEN", TOK_PAGELENGTH },
258	{ ".PAGELENGTH", TOK_PAGELENGTH },
259	{ ".PARAMCOUNT", TOK_PARAMCOUNT },
260	{ ".PC02", TOK_PC02 },
261	{ ".POPCPU", TOK_POPCPU },
262	{ ".POPSEG", TOK_POPSEG },
263	{ ".PROC", TOK_PROC },
264	{ ".PSC02", TOK_PSC02 },
265	{ ".PUSHCPU", TOK_PUSHCPU },
266	{ ".PUSHSEG", TOK_PUSHSEG },
267	{ ".REF", TOK_REFERENCED },
268	{ ".REFERENCED", TOK_REFERENCED },
269	{ ".RELOC", TOK_RELOC },
270	{ ".REPEAT", TOK_REPEAT },
271	{ ".RES", TOK_RES },
272	{ ".RIGHT", TOK_RIGHT },
273	{ ".RODATA", TOK_RODATA },
274	{ ".SCOPE", TOK_SCOPE },
275	{ ".SEGMENT", TOK_SEGMENT },
276	{ ".SET", TOK_SET },
277	{ ".SETCPU", TOK_SETCPU },
278	{ ".SHL", TOK_SHL },
279	{ ".SHR", TOK_SHR },
280	{ ".SIZEOF", TOK_SIZEOF },
281	{ ".SMART", TOK_SMART },
282	{ ".SPRINTF", TOK_SPRINTF },
283	{ ".STRAT", TOK_STRAT },
284	{ ".STRING", TOK_STRING },
285	{ ".STRLEN", TOK_STRLEN },
286	{ ".STRUCT", TOK_STRUCT },
287	{ ".TAG", TOK_TAG },
288	{ ".TCOUNT", TOK_TCOUNT },
289	{ ".TIME", TOK_TIME },
290	{ ".UNDEF", TOK_UNDEF },
291	{ ".UNDEFINE", TOK_UNDEF },
292	{ ".UNION", TOK_UNION },
293	{ ".VERSION", TOK_VERSION },
294	{ ".WARNING", TOK_WARNING },
295	{ ".WORD", TOK_WORD },
296	{ ".XMATCH", TOK_XMATCH },
297	{ ".XOR", TOK_BOOLXOR },
298	{ ".ZEROPAGE", TOK_ZEROPAGE },
299	};
300
301
302
303	/***************************************************************************/
304	/ CharSource functions /
305	/***************************************************************************/
306
307
308
309	static void UseCharSource (CharSource* S)
310	/ Initialize a new input source and start to use it. /
311	{
312	/ Remember the current input char and token /
313	S->Tok = CurTok.Tok;
314	S->C = C;
315
316	/ Use the new input source /
317	S->Next = Source;
318	Source = S;
319
320	/ Read the first character from the new file /
321	S->Func->NextChar (S);
322
323	/ Setup the next token so it will be skipped on the next call to*
324	** NextRawTok().
325	*/
326	CurTok.Tok = TOK_SEP;
327	}
328
329
330
331	static void DoneCharSource (void)
332	/ Close the top level character source /
333	{
334	CharSource* S;
335
336	/ First, call the type specific function /
337	Source->Func->Done (Source);
338
339	/ Restore the old token /
340	CurTok.Tok = Source->Tok;
341	C = Source->C;
342
343	/ Remember the last stacked input source /
344	S = Source->Next;
345
346	/ Delete the top level one ... /
347	xfree (Source);
348
349	/ ... and use the one before /
350	Source = S;
351	}
352
353
354
355	/***************************************************************************/
356	/ InputFile functions /
357	/***************************************************************************/
358
359
360
361	static void IFMarkStart (CharSource* S)
362	/ Mark the start of the next token /
363	{
364	CurTok.Pos = S->V.File.Pos;
365	}
366
367
368
369	static void IFNextChar (CharSource* S)
370	/ Read the next character from the input file /
371	{
372	/ Check for end of line, read the next line if needed /
373	while (SB_GetIndex (&S->V.File.Line) >= SB_GetLen (&S->V.File.Line)) {
374
375	unsigned Len;
376
377	/ End of current line reached, read next line /
378	SB_Clear (&S->V.File.Line);
379	while (`1`) {
380
381	int N = fgetc (S->V.File.F);
382	if (N == EOF) {
383	/ End of file. Accept files without a newline at the end /
384	if (SB_NotEmpty (&S->V.File.Line)) {
385	break;
386	}
387
388	/ No more data - add an empty line to the listing. This*
389	** is a small hack needed to keep the PC output in sync.
390	*/
391	NewListingLine (&EmptyStrBuf, S->V.File.Pos.Name, FCount);
392	C = EOF;
393	return;
394
395	/ Check for end of line /
396	} else if (N == `'\n'`) {
397
398	/ End of line /
399	break;
400
401	/ Collect other stuff /
402	} else {
403
404	/ Append data to line /
405	SB_AppendChar (&S->V.File.Line, N);
406
407	}
408	}
409
410
411	/ If we come here, we have a new input line. To avoid problems*
412	** with strange line terminators, remove all whitespace from the
413	** end of the line, then add a single newline.
414	*/
415	Len = SB_GetLen (&S->V.File.Line);
416	while (Len > `0` && IsSpace (SB_AtUnchecked (&S->V.File.Line, Len-`1`))) {
417	--Len;
418	}
419	SB_Drop (&S->V.File.Line, SB_GetLen (&S->V.File.Line) - Len);
420	SB_AppendChar (&S->V.File.Line, `'\n'`);
421
422	/ Terminate the string buffer /
423	SB_Terminate (&S->V.File.Line);
424
425	/ One more line /
426	S->V.File.Pos.Line++;
427
428	/ Remember the new line for the listing /
429	NewListingLine (&S->V.File.Line, S->V.File.Pos.Name, FCount);
430
431	}
432
433	/ Set the column pointer /
434	S->V.File.Pos.Col = SB_GetIndex (&S->V.File.Line);
435
436	/ Return the next character from the buffer /
437	C = SB_Get (&S->V.File.Line);
438	}
439
440
441
442	void IFDone (CharSource* S)
443	/ Close the current input file /
444	{
445	/ We're at the end of an include file. Check if we have any*
446	** open .IFs, or any open token lists in this file. This
447	** enforcement is artificial, using conditionals that start
448	** in one file and end in another are uncommon, and don't
449	** allowing these things will help finding errors.
450	*/
451	CheckOpenIfs ();
452
453	/ If we've added search paths for this file, remove them /
454	if (S->V.File.IncSearchPath) {
455	PopSearchPath (IncSearchPath);
456	}
457	if (S->V.File.BinSearchPath) {
458	PopSearchPath (BinSearchPath);
459	}
460
461	/ Free the line buffer /
462	SB_Done (&S->V.File.Line);
463
464	/ Close the input file and decrement the file count. We will ignore*
465	** errors here, since we were just reading from the file.
466	*/
467	(void) fclose (S->V.File.F);
468	--FCount;
469	}
470
471
472
473	/ Set of input file handling functions /
474	static const CharSourceFunctions IFFunc = {
475	IFMarkStart,
476	IFNextChar,
477	IFDone
478	};
479
480
481
482	int NewInputFile (const char* Name)
483	/ Open a new input file. Returns true if the file could be successfully opened*
484	** and false otherwise.
485	*/
486	{
487	int RetCode = `0`; / Return code. Assume an error. /
488	char* PathName = `0`;
489	FILE* F;
490	struct stat Buf;
491	StrBuf NameBuf; / No need to initialize /
492	StrBuf Path = AUTO_STRBUF_INITIALIZER;
493	unsigned FileIdx;
494	CharSource* S;
495
496
497	/ If this is the main file, just try to open it. If it's an include file,*
498	** search for it using the include path list.
499	*/
500	if (FCount == `0`) {
501	/ Main file /
502	F = fopen (Name, "r");
503	if (F == `0`) {
504	Fatal ("Cannot open input file '%s': %s", Name, strerror (errno));
505	}
506	} else {
507	/ We are on include level. Search for the file in the include*
508	** directories.
509	*/
510	PathName = SearchFile (IncSearchPath, Name);
511	if (PathName == `0` \|\| (F = fopen (PathName, "r")) == `0`) {
512	/ Not found or cannot open, print an error and bail out /
513	Error ("Cannot open include file '%s': %s", Name, strerror (errno));
514	goto ExitPoint;
515	}
516
517	/ Use the path name from now on /
518	Name = PathName;
519	}
520
521	/ Stat the file and remember the values. There's a race condition here,*
522	** since we cannot use fileno() (non-standard identifier in standard
523	** header file), and therefore not fstat. When using stat with the
524	** file name, there's a risk that the file was deleted and recreated
525	** while it was open. Since mtime and size are only used to check
526	** if a file has changed in the debugger, we will ignore this problem
527	** here.
528	*/
529	if (FileStat (Name, &Buf) != `0`) {
530	Fatal ("Cannot stat input file '%s': %s", Name, strerror (errno));
531	}
532
533	/ Add the file to the input file table and remember the index /
534	FileIdx = AddFile (SB_InitFromString (&NameBuf, Name),
535	(FCount == `0`)? FT_MAIN : FT_INCLUDE,
536	Buf.st_size, (unsigned long) Buf.st_mtime);
537
538	/ Create a new input source variable and initialize it /
539	S = xmalloc (sizeof (*S));
540	S->Func = &IFFunc;
541	S->V.File.F = F;
542	S->V.File.Pos.Line = `0`;
543	S->V.File.Pos.Col = `0`;
544	S->V.File.Pos.Name = FileIdx;
545	SB_Init (&S->V.File.Line);
546
547	/ Push the path for this file onto the include search lists /
548	SB_CopyBuf (&Path, Name, FindName (Name) - Name);
549	SB_Terminate (&Path);
550	S->V.File.IncSearchPath = PushSearchPath (IncSearchPath, SB_GetConstBuf (&Path));
551	S->V.File.BinSearchPath = PushSearchPath (BinSearchPath, SB_GetConstBuf (&Path));
552	SB_Done (&Path);
553
554	/ Count active input files /
555	++FCount;
556
557	/ Use this input source /
558	UseCharSource (S);
559
560	/ File successfully opened /
561	RetCode = `1`;
562
563	ExitPoint:
564	/ Free an allocated name buffer /
565	xfree (PathName);
566
567	/ Return the success code /
568	return RetCode;
569	}
570
571
572
573	/***************************************************************************/
574	/ InputData functions /
575	/***************************************************************************/
576
577
578
579	static void IDMarkStart (CharSource* S attribute ((unused)))
580	/ Mark the start of the next token /
581	{
582	/ Nothing to do here /
583	}
584
585
586
587	static void IDNextChar (CharSource* S)
588	/ Read the next character from the input text /
589	{
590	C = *S->V.Data.Pos++;
591	if (C == `'\0'`) {
592	/ End of input data /
593	--S->V.Data.Pos;
594	C = EOF;
595	}
596	}
597
598
599
600	void IDDone (CharSource* S)
601	/ Close the current input data /
602	{
603	/ Cleanup the current stuff /
604	if (S->V.Data.Malloced) {
605	xfree (S->V.Data.Text);
606	}
607	}
608
609
610
611	/ Set of input data handling functions /
612	static const CharSourceFunctions IDFunc = {
613	IDMarkStart,
614	IDNextChar,
615	IDDone
616	};
617
618
619
620	void NewInputData (char* Text, int Malloced)
621	/ Add a chunk of input data to the input stream /
622	{
623	CharSource* S;
624
625	/ Create a new input source variable and initialize it /
626	S = xmalloc (sizeof (*S));
627	S->Func = &IDFunc;
628	S->V.Data.Text = Text;
629	S->V.Data.Pos = Text;
630	S->V.Data.Malloced = Malloced;
631
632	/ Use this input source /
633	UseCharSource (S);
634	}
635
636
637
638	/***************************************************************************/
639	/ Character classification functions /
640	/***************************************************************************/
641
642
643
644	int IsIdChar (int C)
645	/ Return true if the character is a valid character for an identifier /
646	{
647	return IsAlNum (C) \|\|
648	(C == `'_'`) \|\|
649	(C == `'@'` && AtInIdents) \|\|
650	(C == `'$'` && DollarInIdents);
651	}
652
653
654
655	int IsIdStart (int C)
656	/ Return true if the character may start an identifier /
657	{
658	return IsAlpha (C) \|\| C == `'_'`;
659	}
660
661
662
663	/***************************************************************************/
664	/ Code /
665	/***************************************************************************/
666
667
668
669	static unsigned DigitVal (unsigned char C)
670	/ Convert a digit into it's numerical representation /
671	{
672	if (IsDigit (C)) {
673	return C - `'0'`;
674	} else {
675	return toupper (C) - `'A'` + `10`;
676	}
677	}
678
679
680
681	static void NextChar (void)
682	/ Read the next character from the input file /
683	{
684	Source->Func->NextChar (Source);
685	}
686
687
688
689	void LocaseSVal (void)
690	/ Make SVal lower case /
691	{
692	SB_ToLower (&CurTok.SVal);
693	}
694
695
696
697	void UpcaseSVal (void)
698	/ Make SVal upper case /
699	{
700	SB_ToUpper (&CurTok.SVal);
701	}
702
703
704
705	static int CmpDotKeyword (const void* K1, const void* K2)
706	/ Compare function for the dot keyword search /
707	{
708	return strcmp (((struct DotKeyword)K1)->Key, ((struct* DotKeyword*)K2)->Key);
709	}
710
711
712
713	static token_t FindDotKeyword (void)
714	/ Find the dot keyword in SVal. Return the corresponding token if found,*
715	** return TOK_NONE if not found.
716	*/
717	{
718	struct DotKeyword K;
719	struct DotKeyword* R;
720
721	/ Initialize K /
722	K.Key = SB_GetConstBuf (&CurTok.SVal);
723	K.Tok = `0`;
724
725	/ If we aren't in ignore case mode, we have to uppercase the keyword /
726	if (!IgnoreCase) {
727	UpcaseSVal ();
728	}
729
730	/ Search for the keyword /
731	R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [`0`]),
732	sizeof (DotKeywords [`0`]), CmpDotKeyword);
733	if (R != `0`) {
734
735	/ By default, disable any somewhat experiemental DotKeyword. /
736
737	switch (R->Tok) {
738
739	case TOK_ADDRSIZE:
740	/ Disallow .ADDRSIZE function by default /
741	if (AddrSize == `0`) {
742	return TOK_NONE;
743	}
744	break;
745
746	default:
747	break;
748	}
749
750	return R->Tok;
751
752	} else {
753	return TOK_NONE;
754	}
755	}
756
757
758
759	static void ReadIdent (void)
760	/ Read an identifier from the current input position into Ident. Filling SVal*
761	** starts at the current position with the next character in C. It is assumed
762	** that any characters already filled in are ok, and the character in C is
763	** checked.
764	*/
765	{
766	/ Read the identifier /
767	do {
768	SB_AppendChar (&CurTok.SVal, C);
769	NextChar ();
770	} while (IsIdChar (C));
771	SB_Terminate (&CurTok.SVal);
772
773	/ If we should ignore case, convert the identifier to upper case /
774	if (IgnoreCase) {
775	UpcaseSVal ();
776	}
777	}
778
779
780
781	static void ReadStringConst (int StringTerm)
782	/ Read a string constant into SVal. /
783	{
784	/ Skip the leading string terminator /
785	NextChar ();
786
787	/ Read the string /
788	while (`1`) {
789	if (C == StringTerm) {
790	break;
791	}
792	if (C == `'\n'` \|\| C == EOF) {
793	Error ("Newline in string constant");
794	break;
795	}
796
797	if (C == `'\\'` && StringEscapes) {
798	NextChar ();
799
800	switch (C) {
801	case EOF:
802	Error ("Unterminated escape sequence in string constant");
803	break;
804	case `'\\'`:
805	case `'\''`:
806	case `'"'`:
807	break;
808	case `'t'`:
809	C = `'\x09'`;
810	break;
811	case `'r'`:
812	C = `'\x0D'`;
813	break;
814	case `'n'`:
815	C = `'\x0A'`;
816	break;
817	case `'x'`:
818	NextChar ();
819	if (IsXDigit (C)) {
820	char high_nibble = DigitVal (C) << `4`;
821	NextChar ();
822	if (IsXDigit (C)) {
823	C = high_nibble \| DigitVal (C);
824	break;
825	}
826	}
827	/ FALLTHROUGH /
828	default:
829	Error ("Unsupported escape sequence in string constant");
830	break;
831	}
832	}
833
834	/ Append the char to the string /
835	SB_AppendChar (&CurTok.SVal, C);
836
837	/ Skip the character /
838	NextChar ();
839	}
840
841	/ Skip the trailing terminator /
842	NextChar ();
843
844	/ Terminate the string /
845	SB_Terminate (&CurTok.SVal);
846	}
847
848
849
850	static int Sweet16Reg (const StrBuf* Id)
851	/ Check if the given identifier is a sweet16 register. Return -1 if this is*
852	** not the case, return the register number otherwise.
853	*/
854	{
855	unsigned RegNum;
856	char Check;
857
858	if (SB_GetLen (Id) < `2`) {
859	return -`1`;
860	}
861	if (toupper (SB_AtUnchecked (Id, `0`)) != `'R'`) {
862	return -`1`;
863	}
864	if (!IsDigit (SB_AtUnchecked (Id, `1`))) {
865	return -`1`;
866	}
867
868	if (sscanf (SB_GetConstBuf (Id)+`1`, "%u%c", &RegNum, &Check) != `1` \|\| RegNum > `15`) {
869	/ Invalid register /
870	return -`1`;
871	}
872
873	/ The register number is valid /
874	return (int) RegNum;
875	}
876
877
878
879	void NextRawTok (void)
880	/ Read the next raw token from the input stream /
881	{
882	Macro* M;
883
884	/ If we've a forced end of assembly, don't read further /
885	if (ForcedEnd) {
886	CurTok.Tok = TOK_EOF;
887	return;
888	}
889
890	Restart:
891	/ Check if we have tokens from another input source /
892	if (InputFromStack ()) {
893	if (CurTok.Tok == TOK_IDENT && (M = FindDefine (&CurTok.SVal)) != `0`) {
894	/ This is a define style macro - expand it /
895	MacExpandStart (M);
896	goto Restart;
897	}
898	return;
899	}
900
901	Again:
902	/ Skip whitespace, remember if we had some /
903	if ((CurTok.WS = IsBlank (C)) != `0`) {
904	do {
905	NextChar ();
906	} while (IsBlank (C));
907	}
908
909	/ Mark the file position of the next token /
910	Source->Func->MarkStart (Source);
911
912	/ Clear the string attribute /
913	SB_Clear (&CurTok.SVal);
914
915	/ Generate line info for the current token /
916	NewAsmLine ();
917
918	/ Hex number or PC symbol? /
919	if (C == `'$'`) {
920	NextChar ();
921
922	/ Hex digit must follow or DollarIsPC must be enabled /
923	if (!IsXDigit (C)) {
924	if (DollarIsPC) {
925	CurTok.Tok = TOK_PC;
926	return;
927	} else {
928	Error ("Hexadecimal digit expected");
929	}
930	}
931
932	/ Read the number /
933	CurTok.IVal = `0`;
934	while (`1`) {
935	if (UnderlineInNumbers && C == `'_'`) {
936	while (C == `'_'`) {
937	NextChar ();
938	}
939	if (!IsXDigit (C)) {
940	Error ("Number may not end with underline");
941	}
942	}
943	if (IsXDigit (C)) {
944	if (CurTok.IVal & `0xF0000000`) {
945	Error ("Overflow in hexadecimal number");
946	CurTok.IVal = `0`;
947	}
948	CurTok.IVal = (CurTok.IVal << `4`) + DigitVal (C);
949	NextChar ();
950	} else {
951	break;
952	}
953	}
954
955	/ This is an integer constant /
956	CurTok.Tok = TOK_INTCON;
957	return;
958	}
959
960	/ Binary number? /
961	if (C == `'%'`) {
962	NextChar ();
963
964	/ 0 or 1 must follow /
965	if (!IsBDigit (C)) {
966	Error ("Binary digit expected");
967	}
968
969	/ Read the number /
970	CurTok.IVal = `0`;
971	while (`1`) {
972	if (UnderlineInNumbers && C == `'_'`) {
973	while (C == `'_'`) {
974	NextChar ();
975	}
976	if (!IsBDigit (C)) {
977	Error ("Number may not end with underline");
978	}
979	}
980	if (IsBDigit (C)) {
981	if (CurTok.IVal & `0x80000000`) {
982	Error ("Overflow in binary number");
983	CurTok.IVal = `0`;
984	}
985	CurTok.IVal = (CurTok.IVal << `1`) + DigitVal (C);
986	NextChar ();
987	} else {
988	break;
989	}
990	}
991
992	/ This is an integer constant /
993	CurTok.Tok = TOK_INTCON;
994	return;
995	}
996
997	/ Number? /
998	if (IsDigit (C)) {
999
1000	char Buf[`16`];
1001	unsigned Digits;
1002	unsigned Base;
1003	unsigned I;
1004	long Max;
1005	unsigned DVal;
1006
1007	/ Ignore leading zeros /
1008	while (C == `'0'`) {
1009	NextChar ();
1010	}
1011
1012	/ Read the number into Buf counting the digits /
1013	Digits = `0`;
1014	while (`1`) {
1015	if (UnderlineInNumbers && C == `'_'`) {
1016	while (C == `'_'`) {
1017	NextChar ();
1018	}
1019	if (!IsXDigit (C)) {
1020	Error ("Number may not end with underline");
1021	}
1022	}
1023	if (IsXDigit (C)) {
1024	/ Buf is big enough to allow any decimal and hex number to*
1025	** overflow, so ignore excess digits here, they will be detected
1026	** when we convert the value.
1027	*/
1028	if (Digits < sizeof (Buf)) {
1029	Buf[Digits++] = C;
1030	}
1031	NextChar ();
1032	} else {
1033	break;
1034	}
1035	}
1036
1037	/ Allow zilog/intel style hex numbers with a 'h' suffix /
1038	if (C == `'h'` \|\| C == `'H'`) {
1039	NextChar ();
1040	Base = `16`;
1041	Max = `0xFFFFFFFFUL` / `16`;
1042	} else {
1043	Base = `10`;
1044	Max = `0xFFFFFFFFUL` / `10`;
1045	}
1046
1047	/ Convert the number using the given base /
1048	CurTok.IVal = `0`;
1049	for (I = `0`; I < Digits; ++I) {
1050	if (CurTok.IVal > Max) {
1051	Error ("Number out of range");
1052	CurTok.IVal = `0`;
1053	break;
1054	}
1055	DVal = DigitVal (Buf[I]);
1056	if (DVal >= Base) {
1057	Error ("Invalid digits in number");
1058	CurTok.IVal = `0`;
1059	break;
1060	}
1061	CurTok.IVal = (CurTok.IVal * Base) + DVal;
1062	}
1063
1064	/ This is an integer constant /
1065	CurTok.Tok = TOK_INTCON;
1066	return;
1067	}
1068
1069	/ Control command? /
1070	if (C == `'.'`) {
1071
1072	/ Remember and skip the dot /
1073	NextChar ();
1074
1075	/ Check if it's just a dot /
1076	if (!IsIdStart (C)) {
1077
1078	/ Just a dot /
1079	CurTok.Tok = TOK_DOT;
1080
1081	} else {
1082
1083	/ Read the remainder of the identifier /
1084	SB_AppendChar (&CurTok.SVal, `'.'`);
1085	ReadIdent ();
1086
1087	/ Dot keyword, search for it /
1088	CurTok.Tok = FindDotKeyword ();
1089	if (CurTok.Tok == TOK_NONE) {
1090
1091	/ Not found /
1092	if (!LeadingDotInIdents) {
1093	/ Invalid pseudo instruction /
1094	Error ("'%m%p' is not a recognized control command", &CurTok.SVal);
1095	goto Again;
1096	}
1097
1098	/ An identifier with a dot. Check if it's a define style*
1099	** macro.
1100	*/
1101	if ((M = FindDefine (&CurTok.SVal)) != `0`) {
1102	/ This is a define style macro - expand it /
1103	MacExpandStart (M);
1104	goto Restart;
1105	}
1106
1107	/ Just an identifier with a dot /
1108	CurTok.Tok = TOK_IDENT;
1109	}
1110
1111	}
1112	return;
1113	}
1114
1115	/ Indirect op for sweet16 cpu. Must check this before checking for local*
1116	** symbols, because these may also use the '@' symbol.
1117	*/
1118	if (CPU == CPU_SWEET16 && C == `'@'`) {
1119	NextChar ();
1120	CurTok.Tok = TOK_AT;
1121	return;
1122	}
1123
1124	/ Local symbol? /
1125	if (C == LocalStart) {
1126
1127	/ Read the identifier. /
1128	ReadIdent ();
1129
1130	/ Start character alone is not enough /
1131	if (SB_GetLen (&CurTok.SVal) == `1`) {
1132	Error ("Invalid cheap local symbol");
1133	goto Again;
1134	}
1135
1136	/ A local identifier /
1137	CurTok.Tok = TOK_LOCAL_IDENT;
1138	return;
1139	}
1140
1141
1142	/ Identifier or keyword? /
1143	if (IsIdStart (C)) {
1144
1145	/ Read the identifier /
1146	ReadIdent ();
1147
1148	/ Check for special names. Bail out if we have identified the type of*
1149	** the token. Go on if the token is an identifier.
1150	*/
1151	switch (SB_GetLen (&CurTok.SVal)) {
1152	case `1`:
1153	switch (toupper (SB_AtUnchecked (&CurTok.SVal, `0`))) {
1154
1155	case `'A'`:
1156	if (C == `':'`) {
1157	NextChar ();
1158	CurTok.Tok = TOK_OVERRIDE_ABS;
1159	} else {
1160	CurTok.Tok = TOK_A;
1161	}
1162	return;
1163
1164	case `'F'`:
1165	if (C == `':'`) {
1166	NextChar ();
1167	CurTok.Tok = TOK_OVERRIDE_FAR;
1168	return;
1169	}
1170	break;
1171
1172	case `'S'`:
1173	if ((CPU == CPU_4510) \|\| (CPU == CPU_65816)) {
1174	CurTok.Tok = TOK_S;
1175	return;
1176	}
1177	break;
1178
1179	case `'X'`:
1180	CurTok.Tok = TOK_X;
1181	return;
1182
1183	case `'Y'`:
1184	CurTok.Tok = TOK_Y;
1185	return;
1186
1187	case `'Z'`:
1188	if (C == `':'`) {
1189	NextChar ();
1190	CurTok.Tok = TOK_OVERRIDE_ZP;
1191	return;
1192	} else {
1193	if (CPU == CPU_4510) {
1194	CurTok.Tok = TOK_Z;
1195	return;
1196	}
1197	}
1198	break;
1199
1200	default:
1201	break;
1202	}
1203	break;
1204	case `2`:
1205	if ((CPU == CPU_4510) &&
1206	(toupper (SB_AtUnchecked (&CurTok.SVal, `0`)) == `'S'`) &&
1207	(toupper (SB_AtUnchecked (&CurTok.SVal, `1`)) == `'P'`)) {
1208
1209	CurTok.Tok = TOK_S;
1210	return;
1211	}
1212	/ FALL THROUGH /
1213	default:
1214	if (CPU == CPU_SWEET16 &&
1215	(CurTok.IVal = Sweet16Reg (&CurTok.SVal)) >= `0`) {
1216
1217	/ A sweet16 register number in sweet16 mode /
1218	CurTok.Tok = TOK_REG;
1219	return;
1220	}
1221	}
1222
1223	/ Check for define style macro /
1224	if ((M = FindDefine (&CurTok.SVal)) != `0`) {
1225	/ Macro - expand it /
1226	MacExpandStart (M);
1227	goto Restart;
1228	} else {
1229	/ An identifier /
1230	CurTok.Tok = TOK_IDENT;
1231	}
1232	return;
1233	}
1234
1235	/ Ok, let's do the switch /
1236	CharAgain:
1237	switch (C) {
1238
1239	case `'+'`:
1240	NextChar ();
1241	CurTok.Tok = TOK_PLUS;
1242	return;
1243
1244	case `'-'`:
1245	NextChar ();
1246	CurTok.Tok = TOK_MINUS;
1247	return;
1248
1249	case `'/'`:
1250	NextChar ();
1251	if (C != `'*'`) {
1252	CurTok.Tok = TOK_DIV;
1253	} else if (CComments) {
1254	/ Remember the position, then skip the '' /*
1255	Collection LineInfos = STATIC_COLLECTION_INITIALIZER;
1256	GetFullLineInfo (&LineInfos);
1257	NextChar ();
1258	do {
1259	while (C != `'*'`) {
1260	if (C == EOF) {
1261	LIError (&LineInfos, "Unterminated comment");
1262	ReleaseFullLineInfo (&LineInfos);
1263	DoneCollection (&LineInfos);
1264	goto CharAgain;
1265	}
1266	NextChar ();
1267	}
1268	NextChar ();
1269	} while (C != `'/'`);
1270	NextChar ();
1271	ReleaseFullLineInfo (&LineInfos);
1272	DoneCollection (&LineInfos);
1273	goto Again;
1274	}
1275	return;
1276
1277	case `'*'`:
1278	NextChar ();
1279	CurTok.Tok = TOK_MUL;
1280	return;
1281
1282	case `'^'`:
1283	NextChar ();
1284	CurTok.Tok = TOK_XOR;
1285	return;
1286
1287	case `'&'`:
1288	NextChar ();
1289	if (C == `'&'`) {
1290	NextChar ();
1291	CurTok.Tok = TOK_BOOLAND;
1292	} else {
1293	CurTok.Tok = TOK_AND;
1294	}
1295	return;
1296
1297	case `'\|'`:
1298	NextChar ();
1299	if (C == `'\|'`) {
1300	NextChar ();
1301	CurTok.Tok = TOK_BOOLOR;
1302	} else {
1303	CurTok.Tok = TOK_OR;
1304	}
1305	return;
1306
1307	case `':'`:
1308	NextChar ();
1309	switch (C) {
1310
1311	case `':'`:
1312	NextChar ();
1313	CurTok.Tok = TOK_NAMESPACE;
1314	break;
1315
1316	case `'-'`:
1317	CurTok.IVal = `0`;
1318	do {
1319	--CurTok.IVal;
1320	NextChar ();
1321	} while (C == `'-'`);
1322	CurTok.Tok = TOK_ULABEL;
1323	break;
1324
1325	case `'+'`:
1326	CurTok.IVal = `0`;
1327	do {
1328	++CurTok.IVal;
1329	NextChar ();
1330	} while (C == `'+'`);
1331	CurTok.Tok = TOK_ULABEL;
1332	break;
1333
1334	case `'='`:
1335	NextChar ();
1336	CurTok.Tok = TOK_ASSIGN;
1337	break;
1338
1339	default:
1340	CurTok.Tok = TOK_COLON;
1341	break;
1342	}
1343	return;
1344
1345	case `','`:
1346	NextChar ();
1347	CurTok.Tok = TOK_COMMA;
1348	return;
1349
1350	case `';'`:
1351	NextChar ();
1352	while (C != `'\n'` && C != EOF) {
1353	NextChar ();
1354	}
1355	goto CharAgain;
1356
1357	case `'#'`:
1358	NextChar ();
1359	CurTok.Tok = TOK_HASH;
1360	return;
1361
1362	case `'('`:
1363	NextChar ();
1364	CurTok.Tok = TOK_LPAREN;
1365	return;
1366
1367	case `')'`:
1368	NextChar ();
1369	CurTok.Tok = TOK_RPAREN;
1370	return;
1371
1372	case `'['`:
1373	NextChar ();
1374	CurTok.Tok = TOK_LBRACK;
1375	return;
1376
1377	case `']'`:
1378	NextChar ();
1379	CurTok.Tok = TOK_RBRACK;
1380	return;
1381
1382	case `'{'`:
1383	NextChar ();
1384	CurTok.Tok = TOK_LCURLY;
1385	return;
1386
1387	case `'}'`:
1388	NextChar ();
1389	CurTok.Tok = TOK_RCURLY;
1390	return;
1391
1392	case `'<'`:
1393	NextChar ();
1394	if (C == `'='`) {
1395	NextChar ();
1396	CurTok.Tok = TOK_LE;
1397	} else if (C == `'<'`) {
1398	NextChar ();
1399	CurTok.Tok = TOK_SHL;
1400	} else if (C == `'>'`) {
1401	NextChar ();
1402	CurTok.Tok = TOK_NE;
1403	} else {
1404	CurTok.Tok = TOK_LT;
1405	}
1406	return;
1407
1408	case `'='`:
1409	NextChar ();
1410	CurTok.Tok = TOK_EQ;
1411	return;
1412
1413	case `'!'`:
1414	NextChar ();
1415	CurTok.Tok = TOK_BOOLNOT;
1416	return;
1417
1418	case `'>'`:
1419	NextChar ();
1420	if (C == `'='`) {
1421	NextChar ();
1422	CurTok.Tok = TOK_GE;
1423	} else if (C == `'>'`) {
1424	NextChar ();
1425	CurTok.Tok = TOK_SHR;
1426	} else {
1427	CurTok.Tok = TOK_GT;
1428	}
1429	return;
1430
1431	case `'~'`:
1432	NextChar ();
1433	CurTok.Tok = TOK_NOT;
1434	return;
1435
1436	case `'\''`:
1437	/ Hack: If we allow ' as terminating character for strings, read*
1438	** the following stuff as a string, and check for a one character
1439	** string later.
1440	*/
1441	if (LooseStringTerm) {
1442	ReadStringConst (`'\''`);
1443	if (SB_GetLen (&CurTok.SVal) == `1`) {
1444	CurTok.IVal = SB_AtUnchecked (&CurTok.SVal, `0`);
1445	CurTok.Tok = TOK_CHARCON;
1446	} else {
1447	CurTok.Tok = TOK_STRCON;
1448	}
1449	} else {
1450	/ Always a character constant /
1451	NextChar ();
1452	if (C == EOF \|\| IsControl (C)) {
1453	Error ("Illegal character constant");
1454	goto CharAgain;
1455	}
1456	CurTok.IVal = C;
1457	CurTok.Tok = TOK_CHARCON;
1458	NextChar ();
1459	if (C != `'\''`) {
1460	if (!MissingCharTerm) {
1461	Error ("Illegal character constant");
1462	}
1463	} else {
1464	NextChar ();
1465	}
1466	}
1467	return;
1468
1469	case `'\"'`:
1470	ReadStringConst (`'\"'`);
1471	CurTok.Tok = TOK_STRCON;
1472	return;
1473
1474	case `'\\'`:
1475	/ Line continuation? /
1476	if (LineCont) {
1477	NextChar ();
1478	/ Next char should be a LF, if not, will result in an error later /
1479	if (C == `'\n'`) {
1480	/ Ignore the '\n' /
1481	NextChar ();
1482	goto Again;
1483	} else {
1484	/ Make it clear what the problem is: /
1485	Error ("EOL expected.");
1486	}
1487	}
1488	break;
1489
1490	case `'\n'`:
1491	NextChar ();
1492	CurTok.Tok = TOK_SEP;
1493	return;
1494
1495	case EOF:
1496	CheckInputStack ();
1497	/ In case of the main file, do not close it, but return EOF. /
1498	if (Source && Source->Next) {
1499	DoneCharSource ();
1500	goto Again;
1501	} else {
1502	CurTok.Tok = TOK_EOF;
1503	}
1504	return;
1505	}
1506
1507	/ If we go here, we could not identify the current character. Skip it*
1508	** and try again.
1509	*/
1510	Error ("Invalid input character: 0x%02X", C & `0xFF`);
1511	NextChar ();
1512	goto Again;
1513	}
1514
1515
1516
1517	int GetSubKey (const char* const* Keys, unsigned Count)
1518	/ Search for a subkey in a table of keywords. The current token must be an*
1519	** identifier and all keys must be in upper case. The identifier will be
1520	** uppercased in the process. The function returns the index of the keyword,
1521	** or -1 if the keyword was not found.
1522	*/
1523	{
1524	unsigned I;
1525
1526	/ Must have an identifier /
1527	PRECONDITION (CurTok.Tok == TOK_IDENT);
1528
1529	/ If we aren't in ignore case mode, we have to uppercase the identifier /
1530	if (!IgnoreCase) {
1531	UpcaseSVal ();
1532	}
1533
1534	/ Do a linear search (a binary search is not worth the effort) /
1535	for (I = `0`; I < Count; ++I) {
1536	if (SB_CompareStr (&CurTok.SVal, Keys [I]) == `0`) {
1537	/ Found it /
1538	return I;
1539	}
1540	}
1541
1542	/ Not found /
1543	return -`1`;
1544	}
1545
1546
1547
1548	unsigned char ParseAddrSize (void)
1549	/ Check if the next token is a keyword that denotes an address size specifier.*
1550	** If so, return the corresponding address size constant, otherwise output an
1551	** error message and return ADDR_SIZE_DEFAULT.
1552	*/
1553	{
1554	unsigned char AddrSize;
1555
1556	/ Check for an identifier /
1557	if (CurTok.Tok != TOK_IDENT) {
1558	Error ("Address size specifier expected");
1559	return ADDR_SIZE_DEFAULT;
1560	}
1561
1562	/ Convert the attribute /
1563	AddrSize = AddrSizeFromStr (SB_GetConstBuf (&CurTok.SVal));
1564	if (AddrSize == ADDR_SIZE_INVALID) {
1565	Error ("Address size specifier expected");
1566	AddrSize = ADDR_SIZE_DEFAULT;
1567	}
1568
1569	/ Done /
1570	return AddrSize;
1571	}
1572
1573
1574
1575	void InitScanner (const char* InFile)
1576	/ Initialize the scanner, open the given input file /
1577	{
1578	/ Open the input file /
1579	NewInputFile (InFile);
1580	}
1581
1582
1583
1584	void DoneScanner (void)
1585	/ Release scanner resources /
1586	{
1587	DoneCharSource ();
1588	}
1589

Browse the source code of cc65/src/ca65/scanner.c