| 1 | // Scintilla source code edit control |
| 2 | /** @file LexA68k.cxx |
| 3 | ** Lexer for Assembler, just for the MASM syntax |
| 4 | ** Written by Martial Demolins AKA Folco |
| 5 | **/ |
| 6 | // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com> |
| 7 | // The License.txt file describes the conditions under which this software |
| 8 | // may be distributed. |
| 9 | |
| 10 | |
| 11 | #include <stdlib.h> |
| 12 | #include <string.h> |
| 13 | #include <stdio.h> |
| 14 | #include <stdarg.h> |
| 15 | #include <assert.h> |
| 16 | #include <ctype.h> |
| 17 | |
| 18 | #include <string> |
| 19 | #include <string_view> |
| 20 | |
| 21 | #include "ILexer.h" |
| 22 | #include "Scintilla.h" |
| 23 | #include "SciLexer.h" |
| 24 | |
| 25 | #include "WordList.h" |
| 26 | #include "LexAccessor.h" |
| 27 | #include "Accessor.h" |
| 28 | #include "StyleContext.h" |
| 29 | #include "CharacterSet.h" |
| 30 | #include "LexerModule.h" |
| 31 | |
| 32 | using namespace Lexilla; |
| 33 | |
| 34 | |
| 35 | // Return values for GetOperatorType |
| 36 | #define NO_OPERATOR 0 |
| 37 | #define OPERATOR_1CHAR 1 |
| 38 | #define OPERATOR_2CHAR 2 |
| 39 | |
| 40 | |
| 41 | /** |
| 42 | * IsIdentifierStart |
| 43 | * |
| 44 | * Return true if the given char is a valid identifier first char |
| 45 | */ |
| 46 | |
| 47 | static inline bool IsIdentifierStart (const int ch) |
| 48 | { |
| 49 | return (isalpha(ch) || (ch == '_') || (ch == '\\')); |
| 50 | } |
| 51 | |
| 52 | |
| 53 | /** |
| 54 | * IsIdentifierChar |
| 55 | * |
| 56 | * Return true if the given char is a valid identifier char |
| 57 | */ |
| 58 | |
| 59 | static inline bool IsIdentifierChar (const int ch) |
| 60 | { |
| 61 | return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.')); |
| 62 | } |
| 63 | |
| 64 | |
| 65 | /** |
| 66 | * GetOperatorType |
| 67 | * |
| 68 | * Return: |
| 69 | * NO_OPERATOR if char is not an operator |
| 70 | * OPERATOR_1CHAR if the operator is one char long |
| 71 | * OPERATOR_2CHAR if the operator is two chars long |
| 72 | */ |
| 73 | |
| 74 | static inline int GetOperatorType (const int ch1, const int ch2) |
| 75 | { |
| 76 | int OpType = NO_OPERATOR; |
| 77 | |
| 78 | if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') || |
| 79 | (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ',')) |
| 80 | OpType = OPERATOR_1CHAR; |
| 81 | |
| 82 | else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>')) |
| 83 | OpType = OPERATOR_2CHAR; |
| 84 | |
| 85 | return OpType; |
| 86 | } |
| 87 | |
| 88 | |
| 89 | /** |
| 90 | * IsBin |
| 91 | * |
| 92 | * Return true if the given char is 0 or 1 |
| 93 | */ |
| 94 | |
| 95 | static inline bool IsBin (const int ch) |
| 96 | { |
| 97 | return (ch == '0') || (ch == '1'); |
| 98 | } |
| 99 | |
| 100 | |
| 101 | /** |
| 102 | * IsDoxygenChar |
| 103 | * |
| 104 | * Return true if the char may be part of a Doxygen keyword |
| 105 | */ |
| 106 | |
| 107 | static inline bool IsDoxygenChar (const int ch) |
| 108 | { |
| 109 | return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}'); |
| 110 | } |
| 111 | |
| 112 | |
| 113 | /** |
| 114 | * ColouriseA68kDoc |
| 115 | * |
| 116 | * Main function, which colourises a 68k source |
| 117 | */ |
| 118 | |
| 119 | static void ColouriseA68kDoc (Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler) |
| 120 | { |
| 121 | // Used to buffer a string, to be able to compare it using built-in functions |
| 122 | char Buffer[100]; |
| 123 | |
| 124 | |
| 125 | // Used to know the length of an operator |
| 126 | int OpType; |
| 127 | |
| 128 | |
| 129 | // Get references to keywords lists |
| 130 | WordList &cpuInstruction = *keywordlists[0]; |
| 131 | WordList ®isters = *keywordlists[1]; |
| 132 | WordList &directive = *keywordlists[2]; |
| 133 | WordList &extInstruction = *keywordlists[3]; |
| 134 | WordList &alert = *keywordlists[4]; |
| 135 | WordList &doxygenKeyword = *keywordlists[5]; |
| 136 | |
| 137 | |
| 138 | // Instanciate a context for our source |
| 139 | StyleContext sc(startPos, length, initStyle, styler); |
| 140 | |
| 141 | |
| 142 | /************************************************************ |
| 143 | * |
| 144 | * Parse the source |
| 145 | * |
| 146 | ************************************************************/ |
| 147 | |
| 148 | for ( ; sc.More(); sc.Forward()) |
| 149 | { |
| 150 | /************************************************************ |
| 151 | * |
| 152 | * A style always terminates at the end of a line, even for |
| 153 | * comments (no multi-lines comments) |
| 154 | * |
| 155 | ************************************************************/ |
| 156 | if (sc.atLineStart) { |
| 157 | sc.SetState(SCE_A68K_DEFAULT); |
| 158 | } |
| 159 | |
| 160 | |
| 161 | /************************************************************ |
| 162 | * |
| 163 | * If we are not in "default style", check if the style continues |
| 164 | * In this case, we just have to loop |
| 165 | * |
| 166 | ************************************************************/ |
| 167 | |
| 168 | if (sc.state != SCE_A68K_DEFAULT) |
| 169 | { |
| 170 | if ( ((sc.state == SCE_A68K_NUMBER_DEC) && isdigit(sc.ch)) // Decimal number |
| 171 | || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch)) // Binary number |
| 172 | || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch)) // Hexa number |
| 173 | || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Macro argument |
| 174 | || ((sc.state == SCE_A68K_STRING1) && (sc.ch != '\'')) // String single-quoted |
| 175 | || ((sc.state == SCE_A68K_STRING2) && (sc.ch != '\"')) // String double-quoted |
| 176 | || ((sc.state == SCE_A68K_MACRO_DECLARATION) && IsIdentifierChar(sc.ch)) // Macro declaration (or global label, we don't know at this point) |
| 177 | || ((sc.state == SCE_A68K_IDENTIFIER) && IsIdentifierChar(sc.ch)) // Identifier |
| 178 | || ((sc.state == SCE_A68K_LABEL) && IsIdentifierChar(sc.ch)) // Label (local) |
| 179 | || ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && IsDoxygenChar(sc.ch)) // Doxygen keyword |
| 180 | || ((sc.state == SCE_A68K_COMMENT_SPECIAL) && isalpha(sc.ch)) // Alert |
| 181 | || ((sc.state == SCE_A68K_COMMENT) && !isalpha(sc.ch) && (sc.ch != '\\'))) // Normal comment |
| 182 | { |
| 183 | continue; |
| 184 | } |
| 185 | |
| 186 | /************************************************************ |
| 187 | * |
| 188 | * Check if current state terminates |
| 189 | * |
| 190 | ************************************************************/ |
| 191 | |
| 192 | // Strings: include terminal ' or " in the current string by skipping it |
| 193 | if ((sc.state == SCE_A68K_STRING1) || (sc.state == SCE_A68K_STRING2)) { |
| 194 | sc.Forward(); |
| 195 | } |
| 196 | |
| 197 | |
| 198 | // If a macro declaration was terminated with ':', it was a label |
| 199 | else if ((sc.state == SCE_A68K_MACRO_DECLARATION) && (sc.chPrev == ':')) { |
| 200 | sc.ChangeState(SCE_A68K_LABEL); |
| 201 | } |
| 202 | |
| 203 | |
| 204 | // If it wasn't a Doxygen keyword, change it to normal comment |
| 205 | else if (sc.state == SCE_A68K_COMMENT_DOXYGEN) { |
| 206 | sc.GetCurrent(Buffer, sizeof(Buffer)); |
| 207 | if (!doxygenKeyword.InList(Buffer)) { |
| 208 | sc.ChangeState(SCE_A68K_COMMENT); |
| 209 | } |
| 210 | sc.SetState(SCE_A68K_COMMENT); |
| 211 | continue; |
| 212 | } |
| 213 | |
| 214 | |
| 215 | // If it wasn't an Alert, change it to normal comment |
| 216 | else if (sc.state == SCE_A68K_COMMENT_SPECIAL) { |
| 217 | sc.GetCurrent(Buffer, sizeof(Buffer)); |
| 218 | if (!alert.InList(Buffer)) { |
| 219 | sc.ChangeState(SCE_A68K_COMMENT); |
| 220 | } |
| 221 | // Reset style to normal comment, or to Doxygen keyword if it begins with '\' |
| 222 | if (sc.ch == '\\') { |
| 223 | sc.SetState(SCE_A68K_COMMENT_DOXYGEN); |
| 224 | } |
| 225 | else { |
| 226 | sc.SetState(SCE_A68K_COMMENT); |
| 227 | } |
| 228 | continue; |
| 229 | } |
| 230 | |
| 231 | |
| 232 | // If we are in a comment, it's a Doxygen keyword or an Alert |
| 233 | else if (sc.state == SCE_A68K_COMMENT) { |
| 234 | if (sc.ch == '\\') { |
| 235 | sc.SetState(SCE_A68K_COMMENT_DOXYGEN); |
| 236 | } |
| 237 | else { |
| 238 | sc.SetState(SCE_A68K_COMMENT_SPECIAL); |
| 239 | } |
| 240 | continue; |
| 241 | } |
| 242 | |
| 243 | |
| 244 | // Check if we are at the end of an identifier |
| 245 | // In this case, colourise it if was a keyword. |
| 246 | else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch)) { |
| 247 | sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context |
| 248 | if (cpuInstruction.InList(Buffer)) { // And check if it belongs to a keyword list |
| 249 | sc.ChangeState(SCE_A68K_CPUINSTRUCTION); |
| 250 | } |
| 251 | else if (extInstruction.InList(Buffer)) { |
| 252 | sc.ChangeState(SCE_A68K_EXTINSTRUCTION); |
| 253 | } |
| 254 | else if (registers.InList(Buffer)) { |
| 255 | sc.ChangeState(SCE_A68K_REGISTER); |
| 256 | } |
| 257 | else if (directive.InList(Buffer)) { |
| 258 | sc.ChangeState(SCE_A68K_DIRECTIVE); |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | // All special contexts are now handled.Come back to default style |
| 263 | sc.SetState(SCE_A68K_DEFAULT); |
| 264 | } |
| 265 | |
| 266 | |
| 267 | /************************************************************ |
| 268 | * |
| 269 | * Check if we must enter a new state |
| 270 | * |
| 271 | ************************************************************/ |
| 272 | |
| 273 | // Something which begins at the beginning of a line, and with |
| 274 | // - '\' + an identifier start char, or |
| 275 | // - '\\@' + an identifier start char |
| 276 | // is a local label (second case is used for macro local labels). We set it already as a label, it can't be a macro/equ declaration |
| 277 | if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.chNext) && (sc.ch == '\\')) { |
| 278 | sc.SetState(SCE_A68K_LABEL); |
| 279 | } |
| 280 | |
| 281 | if (sc.atLineStart && (sc.ch < 0x80) && (sc.ch == '\\') && (sc.chNext == '\\')) { |
| 282 | sc.Forward(2); |
| 283 | if ((sc.ch == '@') && IsIdentifierStart(sc.chNext)) { |
| 284 | sc.ChangeState(SCE_A68K_LABEL); |
| 285 | sc.SetState(SCE_A68K_LABEL); |
| 286 | } |
| 287 | } |
| 288 | |
| 289 | // Label and macro identifiers start at the beginning of a line |
| 290 | // We set both as a macro id, but if it wasn't one (':' at the end), |
| 291 | // it will be changed as a label. |
| 292 | if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { |
| 293 | sc.SetState(SCE_A68K_MACRO_DECLARATION); |
| 294 | } |
| 295 | else if ((sc.ch < 0x80) && (sc.ch == ';')) { // Default: alert in a comment. If it doesn't match |
| 296 | sc.SetState(SCE_A68K_COMMENT); // with an alert, it will be toggle to a normal comment |
| 297 | } |
| 298 | else if ((sc.ch < 0x80) && isdigit(sc.ch)) { // Decimal numbers haven't prefix |
| 299 | sc.SetState(SCE_A68K_NUMBER_DEC); |
| 300 | } |
| 301 | else if ((sc.ch < 0x80) && (sc.ch == '%')) { // Binary numbers are prefixed with '%' |
| 302 | sc.SetState(SCE_A68K_NUMBER_BIN); |
| 303 | } |
| 304 | else if ((sc.ch < 0x80) && (sc.ch == '$')) { // Hexadecimal numbers are prefixed with '$' |
| 305 | sc.SetState(SCE_A68K_NUMBER_HEX); |
| 306 | } |
| 307 | else if ((sc.ch < 0x80) && (sc.ch == '\'')) { // String (single-quoted) |
| 308 | sc.SetState(SCE_A68K_STRING1); |
| 309 | } |
| 310 | else if ((sc.ch < 0x80) && (sc.ch == '\"')) { // String (double-quoted) |
| 311 | sc.SetState(SCE_A68K_STRING2); |
| 312 | } |
| 313 | else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) { // Replacement symbols in macro are prefixed with '\' |
| 314 | sc.SetState(SCE_A68K_MACRO_ARG); |
| 315 | } |
| 316 | else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { // An identifier: constant, label, etc... |
| 317 | sc.SetState(SCE_A68K_IDENTIFIER); |
| 318 | } |
| 319 | else { |
| 320 | if (sc.ch < 0x80) { |
| 321 | OpType = GetOperatorType(sc.ch, sc.chNext); // Check if current char is an operator |
| 322 | if (OpType != NO_OPERATOR) { |
| 323 | sc.SetState(SCE_A68K_OPERATOR); |
| 324 | if (OpType == OPERATOR_2CHAR) { // Check if the operator is 2 bytes long |
| 325 | sc.ForwardSetState(SCE_A68K_OPERATOR); // (>> or <<) |
| 326 | } |
| 327 | } |
| 328 | } |
| 329 | } |
| 330 | } // End of for() |
| 331 | sc.Complete(); |
| 332 | } |
| 333 | |
| 334 | |
| 335 | // Names of the keyword lists |
| 336 | |
| 337 | static const char * const a68kWordListDesc[] = |
| 338 | { |
| 339 | "CPU instructions" , |
| 340 | "Registers" , |
| 341 | "Directives" , |
| 342 | "Extended instructions" , |
| 343 | "Comment special words" , |
| 344 | "Doxygen keywords" , |
| 345 | 0 |
| 346 | }; |
| 347 | |
| 348 | LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k" , 0, a68kWordListDesc); |
| 349 | |