1// Scintilla source code edit control
2/** @file LexA68k.cxx
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by Martial Demolins AKA Folco
5 **/
6// Copyright 2010 Martial Demolins <mdemolins(a)gmail.com>
7// The License.txt file describes the conditions under which this software
8// may be distributed.
9
10
11#include <stdlib.h>
12#include <string.h>
13#include <stdio.h>
14#include <stdarg.h>
15#include <assert.h>
16#include <ctype.h>
17
18#include <string>
19#include <string_view>
20
21#include "ILexer.h"
22#include "Scintilla.h"
23#include "SciLexer.h"
24
25#include "WordList.h"
26#include "LexAccessor.h"
27#include "Accessor.h"
28#include "StyleContext.h"
29#include "CharacterSet.h"
30#include "LexerModule.h"
31
32using namespace Lexilla;
33
34
35// Return values for GetOperatorType
36#define NO_OPERATOR 0
37#define OPERATOR_1CHAR 1
38#define OPERATOR_2CHAR 2
39
40
41/**
42 * IsIdentifierStart
43 *
44 * Return true if the given char is a valid identifier first char
45 */
46
47static inline bool IsIdentifierStart (const int ch)
48{
49 return (isalpha(ch) || (ch == '_') || (ch == '\\'));
50}
51
52
53/**
54 * IsIdentifierChar
55 *
56 * Return true if the given char is a valid identifier char
57 */
58
59static inline bool IsIdentifierChar (const int ch)
60{
61 return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.'));
62}
63
64
65/**
66 * GetOperatorType
67 *
68 * Return:
69 * NO_OPERATOR if char is not an operator
70 * OPERATOR_1CHAR if the operator is one char long
71 * OPERATOR_2CHAR if the operator is two chars long
72 */
73
74static inline int GetOperatorType (const int ch1, const int ch2)
75{
76 int OpType = NO_OPERATOR;
77
78 if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') ||
79 (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ','))
80 OpType = OPERATOR_1CHAR;
81
82 else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>'))
83 OpType = OPERATOR_2CHAR;
84
85 return OpType;
86}
87
88
89/**
90 * IsBin
91 *
92 * Return true if the given char is 0 or 1
93 */
94
95static inline bool IsBin (const int ch)
96{
97 return (ch == '0') || (ch == '1');
98}
99
100
101/**
102 * IsDoxygenChar
103 *
104 * Return true if the char may be part of a Doxygen keyword
105 */
106
107static inline bool IsDoxygenChar (const int ch)
108{
109 return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}');
110}
111
112
113/**
114 * ColouriseA68kDoc
115 *
116 * Main function, which colourises a 68k source
117 */
118
119static void ColouriseA68kDoc (Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler)
120{
121 // Used to buffer a string, to be able to compare it using built-in functions
122 char Buffer[100];
123
124
125 // Used to know the length of an operator
126 int OpType;
127
128
129 // Get references to keywords lists
130 WordList &cpuInstruction = *keywordlists[0];
131 WordList &registers = *keywordlists[1];
132 WordList &directive = *keywordlists[2];
133 WordList &extInstruction = *keywordlists[3];
134 WordList &alert = *keywordlists[4];
135 WordList &doxygenKeyword = *keywordlists[5];
136
137
138 // Instanciate a context for our source
139 StyleContext sc(startPos, length, initStyle, styler);
140
141
142 /************************************************************
143 *
144 * Parse the source
145 *
146 ************************************************************/
147
148 for ( ; sc.More(); sc.Forward())
149 {
150 /************************************************************
151 *
152 * A style always terminates at the end of a line, even for
153 * comments (no multi-lines comments)
154 *
155 ************************************************************/
156 if (sc.atLineStart) {
157 sc.SetState(SCE_A68K_DEFAULT);
158 }
159
160
161 /************************************************************
162 *
163 * If we are not in "default style", check if the style continues
164 * In this case, we just have to loop
165 *
166 ************************************************************/
167
168 if (sc.state != SCE_A68K_DEFAULT)
169 {
170 if ( ((sc.state == SCE_A68K_NUMBER_DEC) && isdigit(sc.ch)) // Decimal number
171 || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch)) // Binary number
172 || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch)) // Hexa number
173 || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Macro argument
174 || ((sc.state == SCE_A68K_STRING1) && (sc.ch != '\'')) // String single-quoted
175 || ((sc.state == SCE_A68K_STRING2) && (sc.ch != '\"')) // String double-quoted
176 || ((sc.state == SCE_A68K_MACRO_DECLARATION) && IsIdentifierChar(sc.ch)) // Macro declaration (or global label, we don't know at this point)
177 || ((sc.state == SCE_A68K_IDENTIFIER) && IsIdentifierChar(sc.ch)) // Identifier
178 || ((sc.state == SCE_A68K_LABEL) && IsIdentifierChar(sc.ch)) // Label (local)
179 || ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && IsDoxygenChar(sc.ch)) // Doxygen keyword
180 || ((sc.state == SCE_A68K_COMMENT_SPECIAL) && isalpha(sc.ch)) // Alert
181 || ((sc.state == SCE_A68K_COMMENT) && !isalpha(sc.ch) && (sc.ch != '\\'))) // Normal comment
182 {
183 continue;
184 }
185
186 /************************************************************
187 *
188 * Check if current state terminates
189 *
190 ************************************************************/
191
192 // Strings: include terminal ' or " in the current string by skipping it
193 if ((sc.state == SCE_A68K_STRING1) || (sc.state == SCE_A68K_STRING2)) {
194 sc.Forward();
195 }
196
197
198 // If a macro declaration was terminated with ':', it was a label
199 else if ((sc.state == SCE_A68K_MACRO_DECLARATION) && (sc.chPrev == ':')) {
200 sc.ChangeState(SCE_A68K_LABEL);
201 }
202
203
204 // If it wasn't a Doxygen keyword, change it to normal comment
205 else if (sc.state == SCE_A68K_COMMENT_DOXYGEN) {
206 sc.GetCurrent(Buffer, sizeof(Buffer));
207 if (!doxygenKeyword.InList(Buffer)) {
208 sc.ChangeState(SCE_A68K_COMMENT);
209 }
210 sc.SetState(SCE_A68K_COMMENT);
211 continue;
212 }
213
214
215 // If it wasn't an Alert, change it to normal comment
216 else if (sc.state == SCE_A68K_COMMENT_SPECIAL) {
217 sc.GetCurrent(Buffer, sizeof(Buffer));
218 if (!alert.InList(Buffer)) {
219 sc.ChangeState(SCE_A68K_COMMENT);
220 }
221 // Reset style to normal comment, or to Doxygen keyword if it begins with '\'
222 if (sc.ch == '\\') {
223 sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
224 }
225 else {
226 sc.SetState(SCE_A68K_COMMENT);
227 }
228 continue;
229 }
230
231
232 // If we are in a comment, it's a Doxygen keyword or an Alert
233 else if (sc.state == SCE_A68K_COMMENT) {
234 if (sc.ch == '\\') {
235 sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
236 }
237 else {
238 sc.SetState(SCE_A68K_COMMENT_SPECIAL);
239 }
240 continue;
241 }
242
243
244 // Check if we are at the end of an identifier
245 // In this case, colourise it if was a keyword.
246 else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch)) {
247 sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context
248 if (cpuInstruction.InList(Buffer)) { // And check if it belongs to a keyword list
249 sc.ChangeState(SCE_A68K_CPUINSTRUCTION);
250 }
251 else if (extInstruction.InList(Buffer)) {
252 sc.ChangeState(SCE_A68K_EXTINSTRUCTION);
253 }
254 else if (registers.InList(Buffer)) {
255 sc.ChangeState(SCE_A68K_REGISTER);
256 }
257 else if (directive.InList(Buffer)) {
258 sc.ChangeState(SCE_A68K_DIRECTIVE);
259 }
260 }
261
262 // All special contexts are now handled.Come back to default style
263 sc.SetState(SCE_A68K_DEFAULT);
264 }
265
266
267 /************************************************************
268 *
269 * Check if we must enter a new state
270 *
271 ************************************************************/
272
273 // Something which begins at the beginning of a line, and with
274 // - '\' + an identifier start char, or
275 // - '\\@' + an identifier start char
276 // is a local label (second case is used for macro local labels). We set it already as a label, it can't be a macro/equ declaration
277 if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.chNext) && (sc.ch == '\\')) {
278 sc.SetState(SCE_A68K_LABEL);
279 }
280
281 if (sc.atLineStart && (sc.ch < 0x80) && (sc.ch == '\\') && (sc.chNext == '\\')) {
282 sc.Forward(2);
283 if ((sc.ch == '@') && IsIdentifierStart(sc.chNext)) {
284 sc.ChangeState(SCE_A68K_LABEL);
285 sc.SetState(SCE_A68K_LABEL);
286 }
287 }
288
289 // Label and macro identifiers start at the beginning of a line
290 // We set both as a macro id, but if it wasn't one (':' at the end),
291 // it will be changed as a label.
292 if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) {
293 sc.SetState(SCE_A68K_MACRO_DECLARATION);
294 }
295 else if ((sc.ch < 0x80) && (sc.ch == ';')) { // Default: alert in a comment. If it doesn't match
296 sc.SetState(SCE_A68K_COMMENT); // with an alert, it will be toggle to a normal comment
297 }
298 else if ((sc.ch < 0x80) && isdigit(sc.ch)) { // Decimal numbers haven't prefix
299 sc.SetState(SCE_A68K_NUMBER_DEC);
300 }
301 else if ((sc.ch < 0x80) && (sc.ch == '%')) { // Binary numbers are prefixed with '%'
302 sc.SetState(SCE_A68K_NUMBER_BIN);
303 }
304 else if ((sc.ch < 0x80) && (sc.ch == '$')) { // Hexadecimal numbers are prefixed with '$'
305 sc.SetState(SCE_A68K_NUMBER_HEX);
306 }
307 else if ((sc.ch < 0x80) && (sc.ch == '\'')) { // String (single-quoted)
308 sc.SetState(SCE_A68K_STRING1);
309 }
310 else if ((sc.ch < 0x80) && (sc.ch == '\"')) { // String (double-quoted)
311 sc.SetState(SCE_A68K_STRING2);
312 }
313 else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) { // Replacement symbols in macro are prefixed with '\'
314 sc.SetState(SCE_A68K_MACRO_ARG);
315 }
316 else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { // An identifier: constant, label, etc...
317 sc.SetState(SCE_A68K_IDENTIFIER);
318 }
319 else {
320 if (sc.ch < 0x80) {
321 OpType = GetOperatorType(sc.ch, sc.chNext); // Check if current char is an operator
322 if (OpType != NO_OPERATOR) {
323 sc.SetState(SCE_A68K_OPERATOR);
324 if (OpType == OPERATOR_2CHAR) { // Check if the operator is 2 bytes long
325 sc.ForwardSetState(SCE_A68K_OPERATOR); // (>> or <<)
326 }
327 }
328 }
329 }
330 } // End of for()
331 sc.Complete();
332}
333
334
335// Names of the keyword lists
336
337static const char * const a68kWordListDesc[] =
338{
339 "CPU instructions",
340 "Registers",
341 "Directives",
342 "Extended instructions",
343 "Comment special words",
344 "Doxygen keywords",
345 0
346};
347
348LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k", 0, a68kWordListDesc);
349