| 1 | // Scintilla source code edit control |
| 2 | /** @file LexVB.cxx |
| 3 | ** Lexer for Visual Basic and VBScript. |
| 4 | **/ |
| 5 | // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org> |
| 6 | // The License.txt file describes the conditions under which this software may be distributed. |
| 7 | |
| 8 | #include <stdlib.h> |
| 9 | #include <string.h> |
| 10 | #include <stdio.h> |
| 11 | #include <stdarg.h> |
| 12 | #include <assert.h> |
| 13 | #include <ctype.h> |
| 14 | |
| 15 | #include <string> |
| 16 | #include <string_view> |
| 17 | |
| 18 | #include "ILexer.h" |
| 19 | #include "Scintilla.h" |
| 20 | #include "SciLexer.h" |
| 21 | |
| 22 | #include "WordList.h" |
| 23 | #include "LexAccessor.h" |
| 24 | #include "Accessor.h" |
| 25 | #include "StyleContext.h" |
| 26 | #include "CharacterSet.h" |
| 27 | #include "LexerModule.h" |
| 28 | |
| 29 | using namespace Lexilla; |
| 30 | |
| 31 | // Internal state, highlighted as number |
| 32 | #define SCE_B_FILENUMBER SCE_B_DEFAULT+100 |
| 33 | |
| 34 | |
| 35 | static bool (Accessor &styler, Sci_Position pos, Sci_Position len) { |
| 36 | return len > 0 && styler[pos] == '\''; |
| 37 | } |
| 38 | |
| 39 | static inline bool IsTypeCharacter(int ch) { |
| 40 | return ch == '%' || ch == '&' || ch == '@' || ch == '!' || ch == '#' || ch == '$'; |
| 41 | } |
| 42 | |
| 43 | // Extended to accept accented characters |
| 44 | static inline bool IsAWordChar(int ch) { |
| 45 | return ch >= 0x80 || |
| 46 | (isalnum(ch) || ch == '.' || ch == '_'); |
| 47 | } |
| 48 | |
| 49 | static inline bool IsAWordStart(int ch) { |
| 50 | return ch >= 0x80 || |
| 51 | (isalpha(ch) || ch == '_'); |
| 52 | } |
| 53 | |
| 54 | static inline bool IsANumberChar(int ch) { |
| 55 | // Not exactly following number definition (several dots are seen as OK, etc.) |
| 56 | // but probably enough in most cases. |
| 57 | return (ch < 0x80) && |
| 58 | (isdigit(ch) || toupper(ch) == 'E' || |
| 59 | ch == '.' || ch == '-' || ch == '+' || ch == '_'); |
| 60 | } |
| 61 | |
| 62 | static void ColouriseVBDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, |
| 63 | WordList *keywordlists[], Accessor &styler, bool vbScriptSyntax) { |
| 64 | |
| 65 | WordList &keywords = *keywordlists[0]; |
| 66 | WordList &keywords2 = *keywordlists[1]; |
| 67 | WordList &keywords3 = *keywordlists[2]; |
| 68 | WordList &keywords4 = *keywordlists[3]; |
| 69 | |
| 70 | styler.StartAt(startPos); |
| 71 | |
| 72 | int visibleChars = 0; |
| 73 | int fileNbDigits = 0; |
| 74 | |
| 75 | // Do not leak onto next line |
| 76 | if (initStyle == SCE_B_STRINGEOL || initStyle == SCE_B_COMMENT || initStyle == SCE_B_PREPROCESSOR) { |
| 77 | initStyle = SCE_B_DEFAULT; |
| 78 | } |
| 79 | |
| 80 | StyleContext sc(startPos, length, initStyle, styler); |
| 81 | |
| 82 | for (; sc.More(); sc.Forward()) { |
| 83 | |
| 84 | if (sc.state == SCE_B_OPERATOR) { |
| 85 | sc.SetState(SCE_B_DEFAULT); |
| 86 | } else if (sc.state == SCE_B_IDENTIFIER) { |
| 87 | if (!IsAWordChar(sc.ch)) { |
| 88 | // In Basic (except VBScript), a variable name or a function name |
| 89 | // can end with a special character indicating the type of the value |
| 90 | // held or returned. |
| 91 | bool skipType = false; |
| 92 | if (!vbScriptSyntax && IsTypeCharacter(sc.ch)) { |
| 93 | sc.Forward(); // Skip it |
| 94 | skipType = true; |
| 95 | } |
| 96 | if (sc.ch == ']') { |
| 97 | sc.Forward(); |
| 98 | } |
| 99 | char s[100]; |
| 100 | sc.GetCurrentLowered(s, sizeof(s)); |
| 101 | if (skipType) { |
| 102 | s[strlen(s) - 1] = '\0'; |
| 103 | } |
| 104 | if (strcmp(s, "rem" ) == 0) { |
| 105 | sc.ChangeState(SCE_B_COMMENT); |
| 106 | } else { |
| 107 | if (keywords.InList(s)) { |
| 108 | sc.ChangeState(SCE_B_KEYWORD); |
| 109 | } else if (keywords2.InList(s)) { |
| 110 | sc.ChangeState(SCE_B_KEYWORD2); |
| 111 | } else if (keywords3.InList(s)) { |
| 112 | sc.ChangeState(SCE_B_KEYWORD3); |
| 113 | } else if (keywords4.InList(s)) { |
| 114 | sc.ChangeState(SCE_B_KEYWORD4); |
| 115 | } // Else, it is really an identifier... |
| 116 | sc.SetState(SCE_B_DEFAULT); |
| 117 | } |
| 118 | } |
| 119 | } else if (sc.state == SCE_B_NUMBER) { |
| 120 | // We stop the number definition on non-numerical non-dot non-eE non-sign char |
| 121 | // Also accepts A-F for hex. numbers |
| 122 | if (!IsANumberChar(sc.ch) && !(tolower(sc.ch) >= 'a' && tolower(sc.ch) <= 'f')) { |
| 123 | sc.SetState(SCE_B_DEFAULT); |
| 124 | } |
| 125 | } else if (sc.state == SCE_B_STRING) { |
| 126 | // VB doubles quotes to preserve them, so just end this string |
| 127 | // state now as a following quote will start again |
| 128 | if (sc.ch == '\"') { |
| 129 | if (sc.chNext == '\"') { |
| 130 | sc.Forward(); |
| 131 | } else { |
| 132 | if (tolower(sc.chNext) == 'c') { |
| 133 | sc.Forward(); |
| 134 | } |
| 135 | sc.ForwardSetState(SCE_B_DEFAULT); |
| 136 | } |
| 137 | } else if (sc.atLineEnd) { |
| 138 | visibleChars = 0; |
| 139 | sc.ChangeState(SCE_B_STRINGEOL); |
| 140 | sc.ForwardSetState(SCE_B_DEFAULT); |
| 141 | } |
| 142 | } else if (sc.state == SCE_B_COMMENT) { |
| 143 | if (sc.atLineEnd) { |
| 144 | visibleChars = 0; |
| 145 | sc.ForwardSetState(SCE_B_DEFAULT); |
| 146 | } |
| 147 | } else if (sc.state == SCE_B_PREPROCESSOR) { |
| 148 | if (sc.atLineEnd) { |
| 149 | visibleChars = 0; |
| 150 | sc.ForwardSetState(SCE_B_DEFAULT); |
| 151 | } |
| 152 | } else if (sc.state == SCE_B_FILENUMBER) { |
| 153 | if (IsADigit(sc.ch)) { |
| 154 | fileNbDigits++; |
| 155 | if (fileNbDigits > 3) { |
| 156 | sc.ChangeState(SCE_B_DATE); |
| 157 | } |
| 158 | } else if (sc.ch == '\r' || sc.ch == '\n' || sc.ch == ',') { |
| 159 | // Regular uses: Close #1; Put #1, ...; Get #1, ... etc. |
| 160 | // Too bad if date is format #27, Oct, 2003# or something like that... |
| 161 | // Use regular number state |
| 162 | sc.ChangeState(SCE_B_NUMBER); |
| 163 | sc.SetState(SCE_B_DEFAULT); |
| 164 | } else if (sc.ch == '#') { |
| 165 | sc.ChangeState(SCE_B_DATE); |
| 166 | sc.ForwardSetState(SCE_B_DEFAULT); |
| 167 | } else { |
| 168 | sc.ChangeState(SCE_B_DATE); |
| 169 | } |
| 170 | if (sc.state != SCE_B_FILENUMBER) { |
| 171 | fileNbDigits = 0; |
| 172 | } |
| 173 | } else if (sc.state == SCE_B_DATE) { |
| 174 | if (sc.atLineEnd) { |
| 175 | visibleChars = 0; |
| 176 | sc.ChangeState(SCE_B_STRINGEOL); |
| 177 | sc.ForwardSetState(SCE_B_DEFAULT); |
| 178 | } else if (sc.ch == '#') { |
| 179 | sc.ForwardSetState(SCE_B_DEFAULT); |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | if (sc.state == SCE_B_DEFAULT) { |
| 184 | if (sc.ch == '\'') { |
| 185 | sc.SetState(SCE_B_COMMENT); |
| 186 | } else if (sc.ch == '\"') { |
| 187 | sc.SetState(SCE_B_STRING); |
| 188 | } else if (sc.ch == '#' && visibleChars == 0) { |
| 189 | // Preprocessor commands are alone on their line |
| 190 | sc.SetState(SCE_B_PREPROCESSOR); |
| 191 | } else if (sc.ch == '#') { |
| 192 | // It can be a date literal, ending with #, or a file number, from 1 to 511 |
| 193 | // The date literal depends on the locale, so anything can go between #'s. |
| 194 | // Can be #January 1, 1993# or #1 Jan 93# or #05/11/2003#, etc. |
| 195 | // So we set the FILENUMBER state, and switch to DATE if it isn't a file number |
| 196 | sc.SetState(SCE_B_FILENUMBER); |
| 197 | } else if (sc.ch == '&' && tolower(sc.chNext) == 'h') { |
| 198 | // Hexadecimal number |
| 199 | sc.SetState(SCE_B_NUMBER); |
| 200 | sc.Forward(); |
| 201 | } else if (sc.ch == '&' && tolower(sc.chNext) == 'o') { |
| 202 | // Octal number |
| 203 | sc.SetState(SCE_B_NUMBER); |
| 204 | sc.Forward(); |
| 205 | } else if (sc.ch == '&' && tolower(sc.chNext) == 'b') { |
| 206 | // Binary number |
| 207 | sc.SetState(SCE_B_NUMBER); |
| 208 | sc.Forward(); |
| 209 | } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) { |
| 210 | sc.SetState(SCE_B_NUMBER); |
| 211 | } else if (IsAWordStart(sc.ch) || (sc.ch == '[')) { |
| 212 | sc.SetState(SCE_B_IDENTIFIER); |
| 213 | } else if (isoperator(static_cast<char>(sc.ch)) || (sc.ch == '\\')) { // Integer division |
| 214 | sc.SetState(SCE_B_OPERATOR); |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | if (sc.atLineEnd) { |
| 219 | visibleChars = 0; |
| 220 | } |
| 221 | if (!IsASpace(sc.ch)) { |
| 222 | visibleChars++; |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | if (sc.state == SCE_B_IDENTIFIER && !IsAWordChar(sc.ch)) { |
| 227 | // In Basic (except VBScript), a variable name or a function name |
| 228 | // can end with a special character indicating the type of the value |
| 229 | // held or returned. |
| 230 | bool skipType = false; |
| 231 | if (!vbScriptSyntax && IsTypeCharacter(sc.ch)) { |
| 232 | sc.Forward(); // Skip it |
| 233 | skipType = true; |
| 234 | } |
| 235 | if (sc.ch == ']') { |
| 236 | sc.Forward(); |
| 237 | } |
| 238 | char s[100]; |
| 239 | sc.GetCurrentLowered(s, sizeof(s)); |
| 240 | if (skipType) { |
| 241 | s[strlen(s) - 1] = '\0'; |
| 242 | } |
| 243 | if (strcmp(s, "rem" ) == 0) { |
| 244 | sc.ChangeState(SCE_B_COMMENT); |
| 245 | } else { |
| 246 | if (keywords.InList(s)) { |
| 247 | sc.ChangeState(SCE_B_KEYWORD); |
| 248 | } else if (keywords2.InList(s)) { |
| 249 | sc.ChangeState(SCE_B_KEYWORD2); |
| 250 | } else if (keywords3.InList(s)) { |
| 251 | sc.ChangeState(SCE_B_KEYWORD3); |
| 252 | } else if (keywords4.InList(s)) { |
| 253 | sc.ChangeState(SCE_B_KEYWORD4); |
| 254 | } // Else, it is really an identifier... |
| 255 | sc.SetState(SCE_B_DEFAULT); |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | sc.Complete(); |
| 260 | } |
| 261 | |
| 262 | static void FoldVBDoc(Sci_PositionU startPos, Sci_Position length, int, |
| 263 | WordList *[], Accessor &styler) { |
| 264 | Sci_Position endPos = startPos + length; |
| 265 | |
| 266 | // Backtrack to previous line in case need to fix its fold status |
| 267 | Sci_Position lineCurrent = styler.GetLine(startPos); |
| 268 | if (startPos > 0) { |
| 269 | if (lineCurrent > 0) { |
| 270 | lineCurrent--; |
| 271 | startPos = styler.LineStart(lineCurrent); |
| 272 | } |
| 273 | } |
| 274 | int spaceFlags = 0; |
| 275 | int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, IsVBComment); |
| 276 | char chNext = styler[startPos]; |
| 277 | for (Sci_Position i = startPos; i < endPos; i++) { |
| 278 | char ch = chNext; |
| 279 | chNext = styler.SafeGetCharAt(i + 1); |
| 280 | |
| 281 | if ((ch == '\r' && chNext != '\n') || (ch == '\n') || (i == endPos)) { |
| 282 | int lev = indentCurrent; |
| 283 | int indentNext = styler.IndentAmount(lineCurrent + 1, &spaceFlags, IsVBComment); |
| 284 | if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) { |
| 285 | // Only non whitespace lines can be headers |
| 286 | if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK)) { |
| 287 | lev |= SC_FOLDLEVELHEADERFLAG; |
| 288 | } else if (indentNext & SC_FOLDLEVELWHITEFLAG) { |
| 289 | // Line after is blank so check the next - maybe should continue further? |
| 290 | int spaceFlags2 = 0; |
| 291 | int indentNext2 = styler.IndentAmount(lineCurrent + 2, &spaceFlags2, IsVBComment); |
| 292 | if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext2 & SC_FOLDLEVELNUMBERMASK)) { |
| 293 | lev |= SC_FOLDLEVELHEADERFLAG; |
| 294 | } |
| 295 | } |
| 296 | } |
| 297 | indentCurrent = indentNext; |
| 298 | styler.SetLevel(lineCurrent, lev); |
| 299 | lineCurrent++; |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | static void ColouriseVBNetDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, |
| 305 | WordList *keywordlists[], Accessor &styler) { |
| 306 | ColouriseVBDoc(startPos, length, initStyle, keywordlists, styler, false); |
| 307 | } |
| 308 | |
| 309 | static void ColouriseVBScriptDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, |
| 310 | WordList *keywordlists[], Accessor &styler) { |
| 311 | ColouriseVBDoc(startPos, length, initStyle, keywordlists, styler, true); |
| 312 | } |
| 313 | |
| 314 | static const char * const vbWordListDesc[] = { |
| 315 | "Keywords" , |
| 316 | "user1" , |
| 317 | "user2" , |
| 318 | "user3" , |
| 319 | 0 |
| 320 | }; |
| 321 | |
| 322 | LexerModule lmVB(SCLEX_VB, ColouriseVBNetDoc, "vb" , FoldVBDoc, vbWordListDesc); |
| 323 | LexerModule lmVBScript(SCLEX_VBSCRIPT, ColouriseVBScriptDoc, "vbscript" , FoldVBDoc, vbWordListDesc); |
| 324 | |
| 325 | |