1 | // Scintilla source code edit control |
2 | /** @file LexA68k.cxx |
3 | ** Lexer for Assembler, just for the MASM syntax |
4 | ** Written by Martial Demolins AKA Folco |
5 | **/ |
6 | // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com> |
7 | // The License.txt file describes the conditions under which this software |
8 | // may be distributed. |
9 | |
10 | |
11 | #include <stdlib.h> |
12 | #include <string.h> |
13 | #include <stdio.h> |
14 | #include <stdarg.h> |
15 | #include <assert.h> |
16 | #include <ctype.h> |
17 | |
18 | #include <string> |
19 | #include <string_view> |
20 | |
21 | #include "ILexer.h" |
22 | #include "Scintilla.h" |
23 | #include "SciLexer.h" |
24 | |
25 | #include "WordList.h" |
26 | #include "LexAccessor.h" |
27 | #include "Accessor.h" |
28 | #include "StyleContext.h" |
29 | #include "CharacterSet.h" |
30 | #include "LexerModule.h" |
31 | |
32 | using namespace Lexilla; |
33 | |
34 | |
35 | // Return values for GetOperatorType |
36 | #define NO_OPERATOR 0 |
37 | #define OPERATOR_1CHAR 1 |
38 | #define OPERATOR_2CHAR 2 |
39 | |
40 | |
41 | /** |
42 | * IsIdentifierStart |
43 | * |
44 | * Return true if the given char is a valid identifier first char |
45 | */ |
46 | |
47 | static inline bool IsIdentifierStart (const int ch) |
48 | { |
49 | return (isalpha(ch) || (ch == '_') || (ch == '\\')); |
50 | } |
51 | |
52 | |
53 | /** |
54 | * IsIdentifierChar |
55 | * |
56 | * Return true if the given char is a valid identifier char |
57 | */ |
58 | |
59 | static inline bool IsIdentifierChar (const int ch) |
60 | { |
61 | return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.')); |
62 | } |
63 | |
64 | |
65 | /** |
66 | * GetOperatorType |
67 | * |
68 | * Return: |
69 | * NO_OPERATOR if char is not an operator |
70 | * OPERATOR_1CHAR if the operator is one char long |
71 | * OPERATOR_2CHAR if the operator is two chars long |
72 | */ |
73 | |
74 | static inline int GetOperatorType (const int ch1, const int ch2) |
75 | { |
76 | int OpType = NO_OPERATOR; |
77 | |
78 | if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') || |
79 | (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ',')) |
80 | OpType = OPERATOR_1CHAR; |
81 | |
82 | else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>')) |
83 | OpType = OPERATOR_2CHAR; |
84 | |
85 | return OpType; |
86 | } |
87 | |
88 | |
89 | /** |
90 | * IsBin |
91 | * |
92 | * Return true if the given char is 0 or 1 |
93 | */ |
94 | |
95 | static inline bool IsBin (const int ch) |
96 | { |
97 | return (ch == '0') || (ch == '1'); |
98 | } |
99 | |
100 | |
101 | /** |
102 | * IsDoxygenChar |
103 | * |
104 | * Return true if the char may be part of a Doxygen keyword |
105 | */ |
106 | |
107 | static inline bool IsDoxygenChar (const int ch) |
108 | { |
109 | return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}'); |
110 | } |
111 | |
112 | |
113 | /** |
114 | * ColouriseA68kDoc |
115 | * |
116 | * Main function, which colourises a 68k source |
117 | */ |
118 | |
119 | static void ColouriseA68kDoc (Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler) |
120 | { |
121 | // Used to buffer a string, to be able to compare it using built-in functions |
122 | char Buffer[100]; |
123 | |
124 | |
125 | // Used to know the length of an operator |
126 | int OpType; |
127 | |
128 | |
129 | // Get references to keywords lists |
130 | WordList &cpuInstruction = *keywordlists[0]; |
131 | WordList ®isters = *keywordlists[1]; |
132 | WordList &directive = *keywordlists[2]; |
133 | WordList &extInstruction = *keywordlists[3]; |
134 | WordList &alert = *keywordlists[4]; |
135 | WordList &doxygenKeyword = *keywordlists[5]; |
136 | |
137 | |
138 | // Instanciate a context for our source |
139 | StyleContext sc(startPos, length, initStyle, styler); |
140 | |
141 | |
142 | /************************************************************ |
143 | * |
144 | * Parse the source |
145 | * |
146 | ************************************************************/ |
147 | |
148 | for ( ; sc.More(); sc.Forward()) |
149 | { |
150 | /************************************************************ |
151 | * |
152 | * A style always terminates at the end of a line, even for |
153 | * comments (no multi-lines comments) |
154 | * |
155 | ************************************************************/ |
156 | if (sc.atLineStart) { |
157 | sc.SetState(SCE_A68K_DEFAULT); |
158 | } |
159 | |
160 | |
161 | /************************************************************ |
162 | * |
163 | * If we are not in "default style", check if the style continues |
164 | * In this case, we just have to loop |
165 | * |
166 | ************************************************************/ |
167 | |
168 | if (sc.state != SCE_A68K_DEFAULT) |
169 | { |
170 | if ( ((sc.state == SCE_A68K_NUMBER_DEC) && isdigit(sc.ch)) // Decimal number |
171 | || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch)) // Binary number |
172 | || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch)) // Hexa number |
173 | || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Macro argument |
174 | || ((sc.state == SCE_A68K_STRING1) && (sc.ch != '\'')) // String single-quoted |
175 | || ((sc.state == SCE_A68K_STRING2) && (sc.ch != '\"')) // String double-quoted |
176 | || ((sc.state == SCE_A68K_MACRO_DECLARATION) && IsIdentifierChar(sc.ch)) // Macro declaration (or global label, we don't know at this point) |
177 | || ((sc.state == SCE_A68K_IDENTIFIER) && IsIdentifierChar(sc.ch)) // Identifier |
178 | || ((sc.state == SCE_A68K_LABEL) && IsIdentifierChar(sc.ch)) // Label (local) |
179 | || ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && IsDoxygenChar(sc.ch)) // Doxygen keyword |
180 | || ((sc.state == SCE_A68K_COMMENT_SPECIAL) && isalpha(sc.ch)) // Alert |
181 | || ((sc.state == SCE_A68K_COMMENT) && !isalpha(sc.ch) && (sc.ch != '\\'))) // Normal comment |
182 | { |
183 | continue; |
184 | } |
185 | |
186 | /************************************************************ |
187 | * |
188 | * Check if current state terminates |
189 | * |
190 | ************************************************************/ |
191 | |
192 | // Strings: include terminal ' or " in the current string by skipping it |
193 | if ((sc.state == SCE_A68K_STRING1) || (sc.state == SCE_A68K_STRING2)) { |
194 | sc.Forward(); |
195 | } |
196 | |
197 | |
198 | // If a macro declaration was terminated with ':', it was a label |
199 | else if ((sc.state == SCE_A68K_MACRO_DECLARATION) && (sc.chPrev == ':')) { |
200 | sc.ChangeState(SCE_A68K_LABEL); |
201 | } |
202 | |
203 | |
204 | // If it wasn't a Doxygen keyword, change it to normal comment |
205 | else if (sc.state == SCE_A68K_COMMENT_DOXYGEN) { |
206 | sc.GetCurrent(Buffer, sizeof(Buffer)); |
207 | if (!doxygenKeyword.InList(Buffer)) { |
208 | sc.ChangeState(SCE_A68K_COMMENT); |
209 | } |
210 | sc.SetState(SCE_A68K_COMMENT); |
211 | continue; |
212 | } |
213 | |
214 | |
215 | // If it wasn't an Alert, change it to normal comment |
216 | else if (sc.state == SCE_A68K_COMMENT_SPECIAL) { |
217 | sc.GetCurrent(Buffer, sizeof(Buffer)); |
218 | if (!alert.InList(Buffer)) { |
219 | sc.ChangeState(SCE_A68K_COMMENT); |
220 | } |
221 | // Reset style to normal comment, or to Doxygen keyword if it begins with '\' |
222 | if (sc.ch == '\\') { |
223 | sc.SetState(SCE_A68K_COMMENT_DOXYGEN); |
224 | } |
225 | else { |
226 | sc.SetState(SCE_A68K_COMMENT); |
227 | } |
228 | continue; |
229 | } |
230 | |
231 | |
232 | // If we are in a comment, it's a Doxygen keyword or an Alert |
233 | else if (sc.state == SCE_A68K_COMMENT) { |
234 | if (sc.ch == '\\') { |
235 | sc.SetState(SCE_A68K_COMMENT_DOXYGEN); |
236 | } |
237 | else { |
238 | sc.SetState(SCE_A68K_COMMENT_SPECIAL); |
239 | } |
240 | continue; |
241 | } |
242 | |
243 | |
244 | // Check if we are at the end of an identifier |
245 | // In this case, colourise it if was a keyword. |
246 | else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch)) { |
247 | sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context |
248 | if (cpuInstruction.InList(Buffer)) { // And check if it belongs to a keyword list |
249 | sc.ChangeState(SCE_A68K_CPUINSTRUCTION); |
250 | } |
251 | else if (extInstruction.InList(Buffer)) { |
252 | sc.ChangeState(SCE_A68K_EXTINSTRUCTION); |
253 | } |
254 | else if (registers.InList(Buffer)) { |
255 | sc.ChangeState(SCE_A68K_REGISTER); |
256 | } |
257 | else if (directive.InList(Buffer)) { |
258 | sc.ChangeState(SCE_A68K_DIRECTIVE); |
259 | } |
260 | } |
261 | |
262 | // All special contexts are now handled.Come back to default style |
263 | sc.SetState(SCE_A68K_DEFAULT); |
264 | } |
265 | |
266 | |
267 | /************************************************************ |
268 | * |
269 | * Check if we must enter a new state |
270 | * |
271 | ************************************************************/ |
272 | |
273 | // Something which begins at the beginning of a line, and with |
274 | // - '\' + an identifier start char, or |
275 | // - '\\@' + an identifier start char |
276 | // is a local label (second case is used for macro local labels). We set it already as a label, it can't be a macro/equ declaration |
277 | if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.chNext) && (sc.ch == '\\')) { |
278 | sc.SetState(SCE_A68K_LABEL); |
279 | } |
280 | |
281 | if (sc.atLineStart && (sc.ch < 0x80) && (sc.ch == '\\') && (sc.chNext == '\\')) { |
282 | sc.Forward(2); |
283 | if ((sc.ch == '@') && IsIdentifierStart(sc.chNext)) { |
284 | sc.ChangeState(SCE_A68K_LABEL); |
285 | sc.SetState(SCE_A68K_LABEL); |
286 | } |
287 | } |
288 | |
289 | // Label and macro identifiers start at the beginning of a line |
290 | // We set both as a macro id, but if it wasn't one (':' at the end), |
291 | // it will be changed as a label. |
292 | if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { |
293 | sc.SetState(SCE_A68K_MACRO_DECLARATION); |
294 | } |
295 | else if ((sc.ch < 0x80) && (sc.ch == ';')) { // Default: alert in a comment. If it doesn't match |
296 | sc.SetState(SCE_A68K_COMMENT); // with an alert, it will be toggle to a normal comment |
297 | } |
298 | else if ((sc.ch < 0x80) && isdigit(sc.ch)) { // Decimal numbers haven't prefix |
299 | sc.SetState(SCE_A68K_NUMBER_DEC); |
300 | } |
301 | else if ((sc.ch < 0x80) && (sc.ch == '%')) { // Binary numbers are prefixed with '%' |
302 | sc.SetState(SCE_A68K_NUMBER_BIN); |
303 | } |
304 | else if ((sc.ch < 0x80) && (sc.ch == '$')) { // Hexadecimal numbers are prefixed with '$' |
305 | sc.SetState(SCE_A68K_NUMBER_HEX); |
306 | } |
307 | else if ((sc.ch < 0x80) && (sc.ch == '\'')) { // String (single-quoted) |
308 | sc.SetState(SCE_A68K_STRING1); |
309 | } |
310 | else if ((sc.ch < 0x80) && (sc.ch == '\"')) { // String (double-quoted) |
311 | sc.SetState(SCE_A68K_STRING2); |
312 | } |
313 | else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) { // Replacement symbols in macro are prefixed with '\' |
314 | sc.SetState(SCE_A68K_MACRO_ARG); |
315 | } |
316 | else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { // An identifier: constant, label, etc... |
317 | sc.SetState(SCE_A68K_IDENTIFIER); |
318 | } |
319 | else { |
320 | if (sc.ch < 0x80) { |
321 | OpType = GetOperatorType(sc.ch, sc.chNext); // Check if current char is an operator |
322 | if (OpType != NO_OPERATOR) { |
323 | sc.SetState(SCE_A68K_OPERATOR); |
324 | if (OpType == OPERATOR_2CHAR) { // Check if the operator is 2 bytes long |
325 | sc.ForwardSetState(SCE_A68K_OPERATOR); // (>> or <<) |
326 | } |
327 | } |
328 | } |
329 | } |
330 | } // End of for() |
331 | sc.Complete(); |
332 | } |
333 | |
334 | |
335 | // Names of the keyword lists |
336 | |
337 | static const char * const a68kWordListDesc[] = |
338 | { |
339 | "CPU instructions" , |
340 | "Registers" , |
341 | "Directives" , |
342 | "Extended instructions" , |
343 | "Comment special words" , |
344 | "Doxygen keywords" , |
345 | 0 |
346 | }; |
347 | |
348 | LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k" , 0, a68kWordListDesc); |
349 | |