1// Scintilla source code edit control
2/** @file LexAsm.cxx
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by The Black Horus
5 ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6 ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
8 **/
9// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
10// The License.txt file describes the conditions under which this software may be distributed.
11
12#include <stdlib.h>
13#include <string.h>
14#include <stdio.h>
15#include <stdarg.h>
16#include <assert.h>
17#include <ctype.h>
18
19#include <string>
20#include <string_view>
21#include <map>
22#include <set>
23#include <functional>
24
25#include "ILexer.h"
26#include "Scintilla.h"
27#include "SciLexer.h"
28
29#include "WordList.h"
30#include "LexAccessor.h"
31#include "StyleContext.h"
32#include "CharacterSet.h"
33#include "LexerModule.h"
34#include "OptionSet.h"
35#include "DefaultLexer.h"
36
37using namespace Scintilla;
38using namespace Lexilla;
39
40static inline bool IsAWordChar(const int ch) {
41 return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
42 ch == '_' || ch == '?');
43}
44
45static inline bool IsAWordStart(const int ch) {
46 return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
47 ch == '%' || ch == '@' || ch == '$' || ch == '?');
48}
49
50static inline bool IsAsmOperator(const int ch) {
51 if ((ch < 0x80) && (isalnum(ch)))
52 return false;
53 // '.' left out as it is used to make up numbers
54 if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
55 ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
56 ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
57 ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
58 ch == '%' || ch == ':')
59 return true;
60 return false;
61}
62
63static bool IsStreamCommentStyle(int style) {
64 return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK;
65}
66
67static inline int LowerCase(int c) {
68 if (c >= 'A' && c <= 'Z')
69 return 'a' + c - 'A';
70 return c;
71}
72
73// An individual named option for use in an OptionSet
74
75// Options used for LexerAsm
76struct OptionsAsm {
77 std::string delimiter;
78 bool fold;
79 bool foldSyntaxBased;
80 bool foldCommentMultiline;
81 bool foldCommentExplicit;
82 std::string foldExplicitStart;
83 std::string foldExplicitEnd;
84 bool foldExplicitAnywhere;
85 bool foldCompact;
86 std::string commentChar;
87 OptionsAsm() {
88 delimiter = "";
89 fold = false;
90 foldSyntaxBased = true;
91 foldCommentMultiline = false;
92 foldCommentExplicit = false;
93 foldExplicitStart = "";
94 foldExplicitEnd = "";
95 foldExplicitAnywhere = false;
96 foldCompact = true;
97 commentChar = "";
98 }
99};
100
101static const char * const asmWordListDesc[] = {
102 "CPU instructions",
103 "FPU instructions",
104 "Registers",
105 "Directives",
106 "Directive operands",
107 "Extended instructions",
108 "Directives4Foldstart",
109 "Directives4Foldend",
110 0
111};
112
113struct OptionSetAsm : public OptionSet<OptionsAsm> {
114 OptionSetAsm() {
115 DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
116 "Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
117
118 DefineProperty("fold", &OptionsAsm::fold);
119
120 DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
121 "Set this property to 0 to disable syntax based folding.");
122
123 DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
124 "Set this property to 1 to enable folding multi-line comments.");
125
126 DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
127 "This option enables folding explicit fold points when using the Asm lexer. "
128 "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
129 "at the end of a section that should fold.");
130
131 DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
132 "The string to use for explicit fold start points, replacing the standard ;{.");
133
134 DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
135 "The string to use for explicit fold end points, replacing the standard ;}.");
136
137 DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
138 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
139
140 DefineProperty("fold.compact", &OptionsAsm::foldCompact);
141
142 DefineProperty("lexer.as.comment.character", &OptionsAsm::commentChar,
143 "Overrides the default comment character (which is ';' for asm and '#' for as).");
144
145 DefineWordListSets(asmWordListDesc);
146 }
147};
148
149class LexerAsm : public DefaultLexer {
150 WordList cpuInstruction;
151 WordList mathInstruction;
152 WordList registers;
153 WordList directive;
154 WordList directiveOperand;
155 WordList extInstruction;
156 WordList directives4foldstart;
157 WordList directives4foldend;
158 OptionsAsm options;
159 OptionSetAsm osAsm;
160 int commentChar;
161public:
162 LexerAsm(const char *languageName_, int language_, int commentChar_) : DefaultLexer(languageName_, language_) {
163 commentChar = commentChar_;
164 }
165 virtual ~LexerAsm() {
166 }
167 void SCI_METHOD Release() override {
168 delete this;
169 }
170 int SCI_METHOD Version() const override {
171 return lvRelease5;
172 }
173 const char * SCI_METHOD PropertyNames() override {
174 return osAsm.PropertyNames();
175 }
176 int SCI_METHOD PropertyType(const char *name) override {
177 return osAsm.PropertyType(name);
178 }
179 const char * SCI_METHOD DescribeProperty(const char *name) override {
180 return osAsm.DescribeProperty(name);
181 }
182 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
183 const char * SCI_METHOD PropertyGet(const char *key) override {
184 return osAsm.PropertyGet(key);
185 }
186 const char * SCI_METHOD DescribeWordListSets() override {
187 return osAsm.DescribeWordListSets();
188 }
189 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
190 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
191 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
192
193 void * SCI_METHOD PrivateCall(int, void *) override {
194 return 0;
195 }
196
197 static ILexer5 *LexerFactoryAsm() {
198 return new LexerAsm("asm", SCLEX_ASM, ';');
199 }
200
201 static ILexer5 *LexerFactoryAs() {
202 return new LexerAsm("as", SCLEX_AS, '#');
203 }
204};
205
206Sci_Position SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) {
207 if (osAsm.PropertySet(&options, key, val)) {
208 return 0;
209 }
210 return -1;
211}
212
213Sci_Position SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
214 WordList *wordListN = 0;
215 switch (n) {
216 case 0:
217 wordListN = &cpuInstruction;
218 break;
219 case 1:
220 wordListN = &mathInstruction;
221 break;
222 case 2:
223 wordListN = &registers;
224 break;
225 case 3:
226 wordListN = &directive;
227 break;
228 case 4:
229 wordListN = &directiveOperand;
230 break;
231 case 5:
232 wordListN = &extInstruction;
233 break;
234 case 6:
235 wordListN = &directives4foldstart;
236 break;
237 case 7:
238 wordListN = &directives4foldend;
239 break;
240 }
241 Sci_Position firstModification = -1;
242 if (wordListN) {
243 WordList wlNew;
244 wlNew.Set(wl);
245 if (*wordListN != wlNew) {
246 wordListN->Set(wl);
247 firstModification = 0;
248 }
249 }
250 return firstModification;
251}
252
253void SCI_METHOD LexerAsm::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
254 LexAccessor styler(pAccess);
255
256 const char commentCharacter = options.commentChar.empty() ?
257 commentChar : options.commentChar.front();
258
259 // Do not leak onto next line
260 if (initStyle == SCE_ASM_STRINGEOL)
261 initStyle = SCE_ASM_DEFAULT;
262
263 StyleContext sc(startPos, length, initStyle, styler);
264
265 for (; sc.More(); sc.Forward())
266 {
267
268 // Prevent SCE_ASM_STRINGEOL from leaking back to previous line
269 if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) {
270 sc.SetState(SCE_ASM_STRING);
271 } else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) {
272 sc.SetState(SCE_ASM_CHARACTER);
273 }
274
275 // Handle line continuation generically.
276 if (sc.ch == '\\') {
277 if (sc.chNext == '\n' || sc.chNext == '\r') {
278 sc.Forward();
279 if (sc.ch == '\r' && sc.chNext == '\n') {
280 sc.Forward();
281 }
282 continue;
283 }
284 }
285
286 // Determine if the current state should terminate.
287 if (sc.state == SCE_ASM_OPERATOR) {
288 if (!IsAsmOperator(sc.ch)) {
289 sc.SetState(SCE_ASM_DEFAULT);
290 }
291 } else if (sc.state == SCE_ASM_NUMBER) {
292 if (!IsAWordChar(sc.ch)) {
293 sc.SetState(SCE_ASM_DEFAULT);
294 }
295 } else if (sc.state == SCE_ASM_IDENTIFIER) {
296 if (!IsAWordChar(sc.ch) ) {
297 char s[100];
298 sc.GetCurrentLowered(s, sizeof(s));
299 bool IsDirective = false;
300
301 if (cpuInstruction.InList(s)) {
302 sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
303 } else if (mathInstruction.InList(s)) {
304 sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
305 } else if (registers.InList(s)) {
306 sc.ChangeState(SCE_ASM_REGISTER);
307 } else if (directive.InList(s)) {
308 sc.ChangeState(SCE_ASM_DIRECTIVE);
309 IsDirective = true;
310 } else if (directiveOperand.InList(s)) {
311 sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
312 } else if (extInstruction.InList(s)) {
313 sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
314 }
315 sc.SetState(SCE_ASM_DEFAULT);
316 if (IsDirective && !strcmp(s, "comment")) {
317 char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
318 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
319 sc.ForwardSetState(SCE_ASM_DEFAULT);
320 }
321 if (sc.ch == delimiter) {
322 sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
323 }
324 }
325 }
326 } else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
327 char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
328 if (sc.ch == delimiter) {
329 while (!sc.atLineEnd) {
330 sc.Forward();
331 }
332 sc.SetState(SCE_ASM_DEFAULT);
333 }
334 } else if (sc.state == SCE_ASM_COMMENT ) {
335 if (sc.atLineEnd) {
336 sc.SetState(SCE_ASM_DEFAULT);
337 }
338 } else if (sc.state == SCE_ASM_STRING) {
339 if (sc.ch == '\\') {
340 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
341 sc.Forward();
342 }
343 } else if (sc.ch == '\"') {
344 sc.ForwardSetState(SCE_ASM_DEFAULT);
345 } else if (sc.atLineEnd) {
346 sc.ChangeState(SCE_ASM_STRINGEOL);
347 sc.ForwardSetState(SCE_ASM_DEFAULT);
348 }
349 } else if (sc.state == SCE_ASM_CHARACTER) {
350 if (sc.ch == '\\') {
351 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
352 sc.Forward();
353 }
354 } else if (sc.ch == '\'') {
355 sc.ForwardSetState(SCE_ASM_DEFAULT);
356 } else if (sc.atLineEnd) {
357 sc.ChangeState(SCE_ASM_STRINGEOL);
358 sc.ForwardSetState(SCE_ASM_DEFAULT);
359 }
360 }
361
362 // Determine if a new state should be entered.
363 if (sc.state == SCE_ASM_DEFAULT) {
364 if (sc.ch == commentCharacter) {
365 sc.SetState(SCE_ASM_COMMENT);
366 } else if (IsASCII(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && IsASCII(sc.chNext) && isdigit(sc.chNext)))) {
367 sc.SetState(SCE_ASM_NUMBER);
368 } else if (IsAWordStart(sc.ch)) {
369 sc.SetState(SCE_ASM_IDENTIFIER);
370 } else if (sc.ch == '\"') {
371 sc.SetState(SCE_ASM_STRING);
372 } else if (sc.ch == '\'') {
373 sc.SetState(SCE_ASM_CHARACTER);
374 } else if (IsAsmOperator(sc.ch)) {
375 sc.SetState(SCE_ASM_OPERATOR);
376 }
377 }
378
379 }
380 sc.Complete();
381}
382
383// Store both the current line's fold level and the next lines in the
384// level store to make it easy to pick up with each increment
385// and to make it possible to fiddle the current level for "else".
386
387void SCI_METHOD LexerAsm::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
388
389 if (!options.fold)
390 return;
391
392 LexAccessor styler(pAccess);
393
394 Sci_PositionU endPos = startPos + length;
395 int visibleChars = 0;
396 Sci_Position lineCurrent = styler.GetLine(startPos);
397 int levelCurrent = SC_FOLDLEVELBASE;
398 if (lineCurrent > 0)
399 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
400 int levelNext = levelCurrent;
401 char chNext = styler[startPos];
402 int styleNext = styler.StyleAt(startPos);
403 int style = initStyle;
404 char word[100];
405 int wordlen = 0;
406 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
407 for (Sci_PositionU i = startPos; i < endPos; i++) {
408 char ch = chNext;
409 chNext = styler.SafeGetCharAt(i + 1);
410 int stylePrev = style;
411 style = styleNext;
412 styleNext = styler.StyleAt(i + 1);
413 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
414 if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
415 if (!IsStreamCommentStyle(stylePrev)) {
416 levelNext++;
417 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
418 // Comments don't end at end of line and the next character may be unstyled.
419 levelNext--;
420 }
421 }
422 if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) {
423 if (userDefinedFoldMarkers) {
424 if (styler.Match(i, options.foldExplicitStart.c_str())) {
425 levelNext++;
426 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
427 levelNext--;
428 }
429 } else {
430 if (ch == ';') {
431 if (chNext == '{') {
432 levelNext++;
433 } else if (chNext == '}') {
434 levelNext--;
435 }
436 }
437 }
438 }
439 if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
440 word[wordlen++] = static_cast<char>(LowerCase(ch));
441 if (wordlen == 100) { // prevent overflow
442 word[0] = '\0';
443 wordlen = 1;
444 }
445 if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready
446 word[wordlen] = '\0';
447 wordlen = 0;
448 if (directives4foldstart.InList(word)) {
449 levelNext++;
450 } else if (directives4foldend.InList(word)){
451 levelNext--;
452 }
453 }
454 }
455 if (!IsASpace(ch))
456 visibleChars++;
457 if (atEOL || (i == endPos-1)) {
458 int levelUse = levelCurrent;
459 int lev = levelUse | levelNext << 16;
460 if (visibleChars == 0 && options.foldCompact)
461 lev |= SC_FOLDLEVELWHITEFLAG;
462 if (levelUse < levelNext)
463 lev |= SC_FOLDLEVELHEADERFLAG;
464 if (lev != styler.LevelAt(lineCurrent)) {
465 styler.SetLevel(lineCurrent, lev);
466 }
467 lineCurrent++;
468 levelCurrent = levelNext;
469 if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) {
470 // There is an empty line at end of file so give it same level and empty
471 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
472 }
473 visibleChars = 0;
474 }
475 }
476}
477
478LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);
479LexerModule lmAs(SCLEX_AS, LexerAsm::LexerFactoryAs, "as", asmWordListDesc);
480
481