| 1 | // Scintilla source code edit control |
| 2 | /** @file LexAsm.cxx |
| 3 | ** Lexer for Assembler, just for the MASM syntax |
| 4 | ** Written by The Black Horus |
| 5 | ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10 |
| 6 | ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring |
| 7 | ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net> |
| 8 | **/ |
| 9 | // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org> |
| 10 | // The License.txt file describes the conditions under which this software may be distributed. |
| 11 | |
| 12 | #include <stdlib.h> |
| 13 | #include <string.h> |
| 14 | #include <stdio.h> |
| 15 | #include <stdarg.h> |
| 16 | #include <assert.h> |
| 17 | #include <ctype.h> |
| 18 | |
| 19 | #include <string> |
| 20 | #include <string_view> |
| 21 | #include <map> |
| 22 | #include <set> |
| 23 | #include <functional> |
| 24 | |
| 25 | #include "ILexer.h" |
| 26 | #include "Scintilla.h" |
| 27 | #include "SciLexer.h" |
| 28 | |
| 29 | #include "WordList.h" |
| 30 | #include "LexAccessor.h" |
| 31 | #include "StyleContext.h" |
| 32 | #include "CharacterSet.h" |
| 33 | #include "LexerModule.h" |
| 34 | #include "OptionSet.h" |
| 35 | #include "DefaultLexer.h" |
| 36 | |
| 37 | using namespace Scintilla; |
| 38 | using namespace Lexilla; |
| 39 | |
| 40 | static inline bool IsAWordChar(const int ch) { |
| 41 | return (ch < 0x80) && (isalnum(ch) || ch == '.' || |
| 42 | ch == '_' || ch == '?'); |
| 43 | } |
| 44 | |
| 45 | static inline bool IsAWordStart(const int ch) { |
| 46 | return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' || |
| 47 | ch == '%' || ch == '@' || ch == '$' || ch == '?'); |
| 48 | } |
| 49 | |
| 50 | static inline bool IsAsmOperator(const int ch) { |
| 51 | if ((ch < 0x80) && (isalnum(ch))) |
| 52 | return false; |
| 53 | // '.' left out as it is used to make up numbers |
| 54 | if (ch == '*' || ch == '/' || ch == '-' || ch == '+' || |
| 55 | ch == '(' || ch == ')' || ch == '=' || ch == '^' || |
| 56 | ch == '[' || ch == ']' || ch == '<' || ch == '&' || |
| 57 | ch == '>' || ch == ',' || ch == '|' || ch == '~' || |
| 58 | ch == '%' || ch == ':') |
| 59 | return true; |
| 60 | return false; |
| 61 | } |
| 62 | |
| 63 | static bool (int style) { |
| 64 | return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK; |
| 65 | } |
| 66 | |
| 67 | static inline int LowerCase(int c) { |
| 68 | if (c >= 'A' && c <= 'Z') |
| 69 | return 'a' + c - 'A'; |
| 70 | return c; |
| 71 | } |
| 72 | |
| 73 | // An individual named option for use in an OptionSet |
| 74 | |
| 75 | // Options used for LexerAsm |
| 76 | struct OptionsAsm { |
| 77 | std::string delimiter; |
| 78 | bool fold; |
| 79 | bool foldSyntaxBased; |
| 80 | bool ; |
| 81 | bool ; |
| 82 | std::string foldExplicitStart; |
| 83 | std::string foldExplicitEnd; |
| 84 | bool foldExplicitAnywhere; |
| 85 | bool foldCompact; |
| 86 | std::string ; |
| 87 | OptionsAsm() { |
| 88 | delimiter = "" ; |
| 89 | fold = false; |
| 90 | foldSyntaxBased = true; |
| 91 | foldCommentMultiline = false; |
| 92 | foldCommentExplicit = false; |
| 93 | foldExplicitStart = "" ; |
| 94 | foldExplicitEnd = "" ; |
| 95 | foldExplicitAnywhere = false; |
| 96 | foldCompact = true; |
| 97 | commentChar = "" ; |
| 98 | } |
| 99 | }; |
| 100 | |
| 101 | static const char * const asmWordListDesc[] = { |
| 102 | "CPU instructions" , |
| 103 | "FPU instructions" , |
| 104 | "Registers" , |
| 105 | "Directives" , |
| 106 | "Directive operands" , |
| 107 | "Extended instructions" , |
| 108 | "Directives4Foldstart" , |
| 109 | "Directives4Foldend" , |
| 110 | 0 |
| 111 | }; |
| 112 | |
| 113 | struct OptionSetAsm : public OptionSet<OptionsAsm> { |
| 114 | OptionSetAsm() { |
| 115 | DefineProperty("lexer.asm.comment.delimiter" , &OptionsAsm::delimiter, |
| 116 | "Character used for COMMENT directive's delimiter, replacing the standard \"~\"." ); |
| 117 | |
| 118 | DefineProperty("fold" , &OptionsAsm::fold); |
| 119 | |
| 120 | DefineProperty("fold.asm.syntax.based" , &OptionsAsm::foldSyntaxBased, |
| 121 | "Set this property to 0 to disable syntax based folding." ); |
| 122 | |
| 123 | DefineProperty("fold.asm.comment.multiline" , &OptionsAsm::foldCommentMultiline, |
| 124 | "Set this property to 1 to enable folding multi-line comments." ); |
| 125 | |
| 126 | DefineProperty("fold.asm.comment.explicit" , &OptionsAsm::foldCommentExplicit, |
| 127 | "This option enables folding explicit fold points when using the Asm lexer. " |
| 128 | "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} " |
| 129 | "at the end of a section that should fold." ); |
| 130 | |
| 131 | DefineProperty("fold.asm.explicit.start" , &OptionsAsm::foldExplicitStart, |
| 132 | "The string to use for explicit fold start points, replacing the standard ;{." ); |
| 133 | |
| 134 | DefineProperty("fold.asm.explicit.end" , &OptionsAsm::foldExplicitEnd, |
| 135 | "The string to use for explicit fold end points, replacing the standard ;}." ); |
| 136 | |
| 137 | DefineProperty("fold.asm.explicit.anywhere" , &OptionsAsm::foldExplicitAnywhere, |
| 138 | "Set this property to 1 to enable explicit fold points anywhere, not just in line comments." ); |
| 139 | |
| 140 | DefineProperty("fold.compact" , &OptionsAsm::foldCompact); |
| 141 | |
| 142 | DefineProperty("lexer.as.comment.character" , &OptionsAsm::commentChar, |
| 143 | "Overrides the default comment character (which is ';' for asm and '#' for as)." ); |
| 144 | |
| 145 | DefineWordListSets(asmWordListDesc); |
| 146 | } |
| 147 | }; |
| 148 | |
| 149 | class LexerAsm : public DefaultLexer { |
| 150 | WordList cpuInstruction; |
| 151 | WordList mathInstruction; |
| 152 | WordList registers; |
| 153 | WordList directive; |
| 154 | WordList directiveOperand; |
| 155 | WordList extInstruction; |
| 156 | WordList directives4foldstart; |
| 157 | WordList directives4foldend; |
| 158 | OptionsAsm options; |
| 159 | OptionSetAsm osAsm; |
| 160 | int ; |
| 161 | public: |
| 162 | LexerAsm(const char *languageName_, int language_, int ) : DefaultLexer(languageName_, language_) { |
| 163 | commentChar = commentChar_; |
| 164 | } |
| 165 | virtual ~LexerAsm() { |
| 166 | } |
| 167 | void SCI_METHOD Release() override { |
| 168 | delete this; |
| 169 | } |
| 170 | int SCI_METHOD Version() const override { |
| 171 | return lvRelease5; |
| 172 | } |
| 173 | const char * SCI_METHOD PropertyNames() override { |
| 174 | return osAsm.PropertyNames(); |
| 175 | } |
| 176 | int SCI_METHOD PropertyType(const char *name) override { |
| 177 | return osAsm.PropertyType(name); |
| 178 | } |
| 179 | const char * SCI_METHOD DescribeProperty(const char *name) override { |
| 180 | return osAsm.DescribeProperty(name); |
| 181 | } |
| 182 | Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; |
| 183 | const char * SCI_METHOD PropertyGet(const char *key) override { |
| 184 | return osAsm.PropertyGet(key); |
| 185 | } |
| 186 | const char * SCI_METHOD DescribeWordListSets() override { |
| 187 | return osAsm.DescribeWordListSets(); |
| 188 | } |
| 189 | Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; |
| 190 | void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
| 191 | void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
| 192 | |
| 193 | void * SCI_METHOD PrivateCall(int, void *) override { |
| 194 | return 0; |
| 195 | } |
| 196 | |
| 197 | static ILexer5 *LexerFactoryAsm() { |
| 198 | return new LexerAsm("asm" , SCLEX_ASM, ';'); |
| 199 | } |
| 200 | |
| 201 | static ILexer5 *LexerFactoryAs() { |
| 202 | return new LexerAsm("as" , SCLEX_AS, '#'); |
| 203 | } |
| 204 | }; |
| 205 | |
| 206 | Sci_Position SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) { |
| 207 | if (osAsm.PropertySet(&options, key, val)) { |
| 208 | return 0; |
| 209 | } |
| 210 | return -1; |
| 211 | } |
| 212 | |
| 213 | Sci_Position SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) { |
| 214 | WordList *wordListN = 0; |
| 215 | switch (n) { |
| 216 | case 0: |
| 217 | wordListN = &cpuInstruction; |
| 218 | break; |
| 219 | case 1: |
| 220 | wordListN = &mathInstruction; |
| 221 | break; |
| 222 | case 2: |
| 223 | wordListN = ®isters; |
| 224 | break; |
| 225 | case 3: |
| 226 | wordListN = &directive; |
| 227 | break; |
| 228 | case 4: |
| 229 | wordListN = &directiveOperand; |
| 230 | break; |
| 231 | case 5: |
| 232 | wordListN = &extInstruction; |
| 233 | break; |
| 234 | case 6: |
| 235 | wordListN = &directives4foldstart; |
| 236 | break; |
| 237 | case 7: |
| 238 | wordListN = &directives4foldend; |
| 239 | break; |
| 240 | } |
| 241 | Sci_Position firstModification = -1; |
| 242 | if (wordListN) { |
| 243 | WordList wlNew; |
| 244 | wlNew.Set(wl); |
| 245 | if (*wordListN != wlNew) { |
| 246 | wordListN->Set(wl); |
| 247 | firstModification = 0; |
| 248 | } |
| 249 | } |
| 250 | return firstModification; |
| 251 | } |
| 252 | |
| 253 | void SCI_METHOD LexerAsm::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { |
| 254 | LexAccessor styler(pAccess); |
| 255 | |
| 256 | const char = options.commentChar.empty() ? |
| 257 | commentChar : options.commentChar.front(); |
| 258 | |
| 259 | // Do not leak onto next line |
| 260 | if (initStyle == SCE_ASM_STRINGEOL) |
| 261 | initStyle = SCE_ASM_DEFAULT; |
| 262 | |
| 263 | StyleContext sc(startPos, length, initStyle, styler); |
| 264 | |
| 265 | for (; sc.More(); sc.Forward()) |
| 266 | { |
| 267 | |
| 268 | // Prevent SCE_ASM_STRINGEOL from leaking back to previous line |
| 269 | if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) { |
| 270 | sc.SetState(SCE_ASM_STRING); |
| 271 | } else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) { |
| 272 | sc.SetState(SCE_ASM_CHARACTER); |
| 273 | } |
| 274 | |
| 275 | // Handle line continuation generically. |
| 276 | if (sc.ch == '\\') { |
| 277 | if (sc.chNext == '\n' || sc.chNext == '\r') { |
| 278 | sc.Forward(); |
| 279 | if (sc.ch == '\r' && sc.chNext == '\n') { |
| 280 | sc.Forward(); |
| 281 | } |
| 282 | continue; |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | // Determine if the current state should terminate. |
| 287 | if (sc.state == SCE_ASM_OPERATOR) { |
| 288 | if (!IsAsmOperator(sc.ch)) { |
| 289 | sc.SetState(SCE_ASM_DEFAULT); |
| 290 | } |
| 291 | } else if (sc.state == SCE_ASM_NUMBER) { |
| 292 | if (!IsAWordChar(sc.ch)) { |
| 293 | sc.SetState(SCE_ASM_DEFAULT); |
| 294 | } |
| 295 | } else if (sc.state == SCE_ASM_IDENTIFIER) { |
| 296 | if (!IsAWordChar(sc.ch) ) { |
| 297 | char s[100]; |
| 298 | sc.GetCurrentLowered(s, sizeof(s)); |
| 299 | bool IsDirective = false; |
| 300 | |
| 301 | if (cpuInstruction.InList(s)) { |
| 302 | sc.ChangeState(SCE_ASM_CPUINSTRUCTION); |
| 303 | } else if (mathInstruction.InList(s)) { |
| 304 | sc.ChangeState(SCE_ASM_MATHINSTRUCTION); |
| 305 | } else if (registers.InList(s)) { |
| 306 | sc.ChangeState(SCE_ASM_REGISTER); |
| 307 | } else if (directive.InList(s)) { |
| 308 | sc.ChangeState(SCE_ASM_DIRECTIVE); |
| 309 | IsDirective = true; |
| 310 | } else if (directiveOperand.InList(s)) { |
| 311 | sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND); |
| 312 | } else if (extInstruction.InList(s)) { |
| 313 | sc.ChangeState(SCE_ASM_EXTINSTRUCTION); |
| 314 | } |
| 315 | sc.SetState(SCE_ASM_DEFAULT); |
| 316 | if (IsDirective && !strcmp(s, "comment" )) { |
| 317 | char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0]; |
| 318 | while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) { |
| 319 | sc.ForwardSetState(SCE_ASM_DEFAULT); |
| 320 | } |
| 321 | if (sc.ch == delimiter) { |
| 322 | sc.SetState(SCE_ASM_COMMENTDIRECTIVE); |
| 323 | } |
| 324 | } |
| 325 | } |
| 326 | } else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) { |
| 327 | char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0]; |
| 328 | if (sc.ch == delimiter) { |
| 329 | while (!sc.atLineEnd) { |
| 330 | sc.Forward(); |
| 331 | } |
| 332 | sc.SetState(SCE_ASM_DEFAULT); |
| 333 | } |
| 334 | } else if (sc.state == SCE_ASM_COMMENT ) { |
| 335 | if (sc.atLineEnd) { |
| 336 | sc.SetState(SCE_ASM_DEFAULT); |
| 337 | } |
| 338 | } else if (sc.state == SCE_ASM_STRING) { |
| 339 | if (sc.ch == '\\') { |
| 340 | if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { |
| 341 | sc.Forward(); |
| 342 | } |
| 343 | } else if (sc.ch == '\"') { |
| 344 | sc.ForwardSetState(SCE_ASM_DEFAULT); |
| 345 | } else if (sc.atLineEnd) { |
| 346 | sc.ChangeState(SCE_ASM_STRINGEOL); |
| 347 | sc.ForwardSetState(SCE_ASM_DEFAULT); |
| 348 | } |
| 349 | } else if (sc.state == SCE_ASM_CHARACTER) { |
| 350 | if (sc.ch == '\\') { |
| 351 | if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { |
| 352 | sc.Forward(); |
| 353 | } |
| 354 | } else if (sc.ch == '\'') { |
| 355 | sc.ForwardSetState(SCE_ASM_DEFAULT); |
| 356 | } else if (sc.atLineEnd) { |
| 357 | sc.ChangeState(SCE_ASM_STRINGEOL); |
| 358 | sc.ForwardSetState(SCE_ASM_DEFAULT); |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | // Determine if a new state should be entered. |
| 363 | if (sc.state == SCE_ASM_DEFAULT) { |
| 364 | if (sc.ch == commentCharacter) { |
| 365 | sc.SetState(SCE_ASM_COMMENT); |
| 366 | } else if (IsASCII(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && IsASCII(sc.chNext) && isdigit(sc.chNext)))) { |
| 367 | sc.SetState(SCE_ASM_NUMBER); |
| 368 | } else if (IsAWordStart(sc.ch)) { |
| 369 | sc.SetState(SCE_ASM_IDENTIFIER); |
| 370 | } else if (sc.ch == '\"') { |
| 371 | sc.SetState(SCE_ASM_STRING); |
| 372 | } else if (sc.ch == '\'') { |
| 373 | sc.SetState(SCE_ASM_CHARACTER); |
| 374 | } else if (IsAsmOperator(sc.ch)) { |
| 375 | sc.SetState(SCE_ASM_OPERATOR); |
| 376 | } |
| 377 | } |
| 378 | |
| 379 | } |
| 380 | sc.Complete(); |
| 381 | } |
| 382 | |
| 383 | // Store both the current line's fold level and the next lines in the |
| 384 | // level store to make it easy to pick up with each increment |
| 385 | // and to make it possible to fiddle the current level for "else". |
| 386 | |
| 387 | void SCI_METHOD LexerAsm::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { |
| 388 | |
| 389 | if (!options.fold) |
| 390 | return; |
| 391 | |
| 392 | LexAccessor styler(pAccess); |
| 393 | |
| 394 | Sci_PositionU endPos = startPos + length; |
| 395 | int visibleChars = 0; |
| 396 | Sci_Position lineCurrent = styler.GetLine(startPos); |
| 397 | int levelCurrent = SC_FOLDLEVELBASE; |
| 398 | if (lineCurrent > 0) |
| 399 | levelCurrent = styler.LevelAt(lineCurrent-1) >> 16; |
| 400 | int levelNext = levelCurrent; |
| 401 | char chNext = styler[startPos]; |
| 402 | int styleNext = styler.StyleAt(startPos); |
| 403 | int style = initStyle; |
| 404 | char word[100]; |
| 405 | int wordlen = 0; |
| 406 | const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty(); |
| 407 | for (Sci_PositionU i = startPos; i < endPos; i++) { |
| 408 | char ch = chNext; |
| 409 | chNext = styler.SafeGetCharAt(i + 1); |
| 410 | int stylePrev = style; |
| 411 | style = styleNext; |
| 412 | styleNext = styler.StyleAt(i + 1); |
| 413 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
| 414 | if (options.foldCommentMultiline && IsStreamCommentStyle(style)) { |
| 415 | if (!IsStreamCommentStyle(stylePrev)) { |
| 416 | levelNext++; |
| 417 | } else if (!IsStreamCommentStyle(styleNext) && !atEOL) { |
| 418 | // Comments don't end at end of line and the next character may be unstyled. |
| 419 | levelNext--; |
| 420 | } |
| 421 | } |
| 422 | if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) { |
| 423 | if (userDefinedFoldMarkers) { |
| 424 | if (styler.Match(i, options.foldExplicitStart.c_str())) { |
| 425 | levelNext++; |
| 426 | } else if (styler.Match(i, options.foldExplicitEnd.c_str())) { |
| 427 | levelNext--; |
| 428 | } |
| 429 | } else { |
| 430 | if (ch == ';') { |
| 431 | if (chNext == '{') { |
| 432 | levelNext++; |
| 433 | } else if (chNext == '}') { |
| 434 | levelNext--; |
| 435 | } |
| 436 | } |
| 437 | } |
| 438 | } |
| 439 | if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) { |
| 440 | word[wordlen++] = static_cast<char>(LowerCase(ch)); |
| 441 | if (wordlen == 100) { // prevent overflow |
| 442 | word[0] = '\0'; |
| 443 | wordlen = 1; |
| 444 | } |
| 445 | if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready |
| 446 | word[wordlen] = '\0'; |
| 447 | wordlen = 0; |
| 448 | if (directives4foldstart.InList(word)) { |
| 449 | levelNext++; |
| 450 | } else if (directives4foldend.InList(word)){ |
| 451 | levelNext--; |
| 452 | } |
| 453 | } |
| 454 | } |
| 455 | if (!IsASpace(ch)) |
| 456 | visibleChars++; |
| 457 | if (atEOL || (i == endPos-1)) { |
| 458 | int levelUse = levelCurrent; |
| 459 | int lev = levelUse | levelNext << 16; |
| 460 | if (visibleChars == 0 && options.foldCompact) |
| 461 | lev |= SC_FOLDLEVELWHITEFLAG; |
| 462 | if (levelUse < levelNext) |
| 463 | lev |= SC_FOLDLEVELHEADERFLAG; |
| 464 | if (lev != styler.LevelAt(lineCurrent)) { |
| 465 | styler.SetLevel(lineCurrent, lev); |
| 466 | } |
| 467 | lineCurrent++; |
| 468 | levelCurrent = levelNext; |
| 469 | if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) { |
| 470 | // There is an empty line at end of file so give it same level and empty |
| 471 | styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG); |
| 472 | } |
| 473 | visibleChars = 0; |
| 474 | } |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm" , asmWordListDesc); |
| 479 | LexerModule lmAs(SCLEX_AS, LexerAsm::LexerFactoryAs, "as" , asmWordListDesc); |
| 480 | |
| 481 | |