| 1 | // Scintilla source code edit control |
| 2 | /** @file LexRebol.cxx |
| 3 | ** Lexer for REBOL. |
| 4 | ** Written by Pascal Hurni, inspired from LexLua by Paul Winwood & Marcos E. Wurzius & Philippe Lhoste |
| 5 | ** |
| 6 | ** History: |
| 7 | ** 2005-04-07 First release. |
| 8 | ** 2005-04-10 Closing parens and brackets go now in default style |
| 9 | ** String and comment nesting should be more safe |
| 10 | **/ |
| 11 | // Copyright 2005 by Pascal Hurni <pascal_hurni@fastmail.fm> |
| 12 | // The License.txt file describes the conditions under which this software may be distributed. |
| 13 | |
| 14 | #include <stdlib.h> |
| 15 | #include <string.h> |
| 16 | #include <stdio.h> |
| 17 | #include <stdarg.h> |
| 18 | #include <assert.h> |
| 19 | #include <ctype.h> |
| 20 | |
| 21 | #include <string> |
| 22 | #include <string_view> |
| 23 | |
| 24 | #include "ILexer.h" |
| 25 | #include "Scintilla.h" |
| 26 | #include "SciLexer.h" |
| 27 | |
| 28 | #include "WordList.h" |
| 29 | #include "LexAccessor.h" |
| 30 | #include "Accessor.h" |
| 31 | #include "StyleContext.h" |
| 32 | #include "CharacterSet.h" |
| 33 | #include "LexerModule.h" |
| 34 | |
| 35 | using namespace Lexilla; |
| 36 | |
| 37 | static inline bool IsAWordChar(const int ch) { |
| 38 | return (isalnum(ch) || ch == '?' || ch == '!' || ch == '.' || ch == '\'' || ch == '+' || ch == '-' || ch == '*' || ch == '&' || ch == '|' || ch == '=' || ch == '_' || ch == '~'); |
| 39 | } |
| 40 | |
| 41 | static inline bool IsAWordStart(const int ch, const int ch2) { |
| 42 | return ((ch == '+' || ch == '-' || ch == '.') && !isdigit(ch2)) || |
| 43 | (isalpha(ch) || ch == '?' || ch == '!' || ch == '\'' || ch == '*' || ch == '&' || ch == '|' || ch == '=' || ch == '_' || ch == '~'); |
| 44 | } |
| 45 | |
| 46 | static inline bool IsAnOperator(const int ch, const int ch2, const int ch3) { |
| 47 | // One char operators |
| 48 | if (IsASpaceOrTab(ch2)) { |
| 49 | return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '<' || ch == '>' || ch == '=' || ch == '?'; |
| 50 | } |
| 51 | |
| 52 | // Two char operators |
| 53 | if (IsASpaceOrTab(ch3)) { |
| 54 | return (ch == '*' && ch2 == '*') || |
| 55 | (ch == '/' && ch2 == '/') || |
| 56 | (ch == '<' && (ch2 == '=' || ch2 == '>')) || |
| 57 | (ch == '>' && ch2 == '=') || |
| 58 | (ch == '=' && (ch2 == '=' || ch2 == '?')) || |
| 59 | (ch == '?' && ch2 == '?'); |
| 60 | } |
| 61 | |
| 62 | return false; |
| 63 | } |
| 64 | |
| 65 | static inline bool IsBinaryStart(const int ch, const int ch2, const int ch3, const int ch4) { |
| 66 | return (ch == '#' && ch2 == '{') || |
| 67 | (IsADigit(ch) && ch2 == '#' && ch3 == '{' ) || |
| 68 | (IsADigit(ch) && IsADigit(ch2) && ch3 == '#' && ch4 == '{' ); |
| 69 | } |
| 70 | |
| 71 | |
| 72 | static void ColouriseRebolDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler) { |
| 73 | |
| 74 | WordList &keywords = *keywordlists[0]; |
| 75 | WordList &keywords2 = *keywordlists[1]; |
| 76 | WordList &keywords3 = *keywordlists[2]; |
| 77 | WordList &keywords4 = *keywordlists[3]; |
| 78 | WordList &keywords5 = *keywordlists[4]; |
| 79 | WordList &keywords6 = *keywordlists[5]; |
| 80 | WordList &keywords7 = *keywordlists[6]; |
| 81 | WordList &keywords8 = *keywordlists[7]; |
| 82 | |
| 83 | Sci_Position currentLine = styler.GetLine(startPos); |
| 84 | // Initialize the braced string {.. { ... } ..} nesting level, if we are inside such a string. |
| 85 | int stringLevel = 0; |
| 86 | if (initStyle == SCE_REBOL_BRACEDSTRING || initStyle == SCE_REBOL_COMMENTBLOCK) { |
| 87 | stringLevel = styler.GetLineState(currentLine - 1); |
| 88 | } |
| 89 | |
| 90 | bool = initStyle == SCE_REBOL_COMMENTBLOCK; |
| 91 | int dotCount = 0; |
| 92 | |
| 93 | // Do not leak onto next line |
| 94 | if (initStyle == SCE_REBOL_COMMENTLINE) { |
| 95 | initStyle = SCE_REBOL_DEFAULT; |
| 96 | } |
| 97 | |
| 98 | StyleContext sc(startPos, length, initStyle, styler); |
| 99 | if (startPos == 0) { |
| 100 | sc.SetState(SCE_REBOL_PREFACE); |
| 101 | } |
| 102 | for (; sc.More(); sc.Forward()) { |
| 103 | |
| 104 | //--- What to do at line end ? |
| 105 | if (sc.atLineEnd) { |
| 106 | // Can be either inside a {} string or simply at eol |
| 107 | if (sc.state != SCE_REBOL_BRACEDSTRING && sc.state != SCE_REBOL_COMMENTBLOCK && |
| 108 | sc.state != SCE_REBOL_BINARY && sc.state != SCE_REBOL_PREFACE) |
| 109 | sc.SetState(SCE_REBOL_DEFAULT); |
| 110 | |
| 111 | // Update the line state, so it can be seen by next line |
| 112 | currentLine = styler.GetLine(sc.currentPos); |
| 113 | switch (sc.state) { |
| 114 | case SCE_REBOL_BRACEDSTRING: |
| 115 | case SCE_REBOL_COMMENTBLOCK: |
| 116 | // Inside a braced string, we set the line state |
| 117 | styler.SetLineState(currentLine, stringLevel); |
| 118 | break; |
| 119 | default: |
| 120 | // Reset the line state |
| 121 | styler.SetLineState(currentLine, 0); |
| 122 | break; |
| 123 | } |
| 124 | |
| 125 | // continue with next char |
| 126 | continue; |
| 127 | } |
| 128 | |
| 129 | //--- What to do on white-space ? |
| 130 | if (IsASpaceOrTab(sc.ch)) |
| 131 | { |
| 132 | // Return to default if any of these states |
| 133 | if (sc.state == SCE_REBOL_OPERATOR || sc.state == SCE_REBOL_CHARACTER || |
| 134 | sc.state == SCE_REBOL_NUMBER || sc.state == SCE_REBOL_PAIR || |
| 135 | sc.state == SCE_REBOL_TUPLE || sc.state == SCE_REBOL_FILE || |
| 136 | sc.state == SCE_REBOL_DATE || sc.state == SCE_REBOL_TIME || |
| 137 | sc.state == SCE_REBOL_MONEY || sc.state == SCE_REBOL_ISSUE || |
| 138 | sc.state == SCE_REBOL_URL || sc.state == SCE_REBOL_EMAIL) { |
| 139 | sc.SetState(SCE_REBOL_DEFAULT); |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | //--- Specialize state ? |
| 144 | // URL, Email look like identifier |
| 145 | if (sc.state == SCE_REBOL_IDENTIFIER) |
| 146 | { |
| 147 | if (sc.ch == ':' && !IsASpace(sc.chNext)) { |
| 148 | sc.ChangeState(SCE_REBOL_URL); |
| 149 | } else if (sc.ch == '@') { |
| 150 | sc.ChangeState(SCE_REBOL_EMAIL); |
| 151 | } else if (sc.ch == '$') { |
| 152 | sc.ChangeState(SCE_REBOL_MONEY); |
| 153 | } |
| 154 | } |
| 155 | // Words look like identifiers |
| 156 | if (sc.state == SCE_REBOL_IDENTIFIER || (sc.state >= SCE_REBOL_WORD && sc.state <= SCE_REBOL_WORD8)) { |
| 157 | // Keywords ? |
| 158 | if (!IsAWordChar(sc.ch) || sc.Match('/')) { |
| 159 | char s[100]; |
| 160 | sc.GetCurrentLowered(s, sizeof(s)); |
| 161 | blockComment = strcmp(s, "comment" ) == 0; |
| 162 | if (keywords8.InList(s)) { |
| 163 | sc.ChangeState(SCE_REBOL_WORD8); |
| 164 | } else if (keywords7.InList(s)) { |
| 165 | sc.ChangeState(SCE_REBOL_WORD7); |
| 166 | } else if (keywords6.InList(s)) { |
| 167 | sc.ChangeState(SCE_REBOL_WORD6); |
| 168 | } else if (keywords5.InList(s)) { |
| 169 | sc.ChangeState(SCE_REBOL_WORD5); |
| 170 | } else if (keywords4.InList(s)) { |
| 171 | sc.ChangeState(SCE_REBOL_WORD4); |
| 172 | } else if (keywords3.InList(s)) { |
| 173 | sc.ChangeState(SCE_REBOL_WORD3); |
| 174 | } else if (keywords2.InList(s)) { |
| 175 | sc.ChangeState(SCE_REBOL_WORD2); |
| 176 | } else if (keywords.InList(s)) { |
| 177 | sc.ChangeState(SCE_REBOL_WORD); |
| 178 | } |
| 179 | // Keep same style if there are refinements |
| 180 | if (!sc.Match('/')) { |
| 181 | sc.SetState(SCE_REBOL_DEFAULT); |
| 182 | } |
| 183 | } |
| 184 | // special numbers |
| 185 | } else if (sc.state == SCE_REBOL_NUMBER) { |
| 186 | switch (sc.ch) { |
| 187 | case 'x': sc.ChangeState(SCE_REBOL_PAIR); |
| 188 | break; |
| 189 | case ':': sc.ChangeState(SCE_REBOL_TIME); |
| 190 | break; |
| 191 | case '-': |
| 192 | case '/': sc.ChangeState(SCE_REBOL_DATE); |
| 193 | break; |
| 194 | case '.': if (++dotCount >= 2) sc.ChangeState(SCE_REBOL_TUPLE); |
| 195 | break; |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | //--- Determine if the current state should terminate |
| 200 | if (sc.state == SCE_REBOL_QUOTEDSTRING || sc.state == SCE_REBOL_CHARACTER) { |
| 201 | if (sc.ch == '^' && sc.chNext == '\"') { |
| 202 | sc.Forward(); |
| 203 | } else if (sc.ch == '\"') { |
| 204 | sc.ForwardSetState(SCE_REBOL_DEFAULT); |
| 205 | } |
| 206 | } else if (sc.state == SCE_REBOL_BRACEDSTRING || sc.state == SCE_REBOL_COMMENTBLOCK) { |
| 207 | if (sc.ch == '}') { |
| 208 | if (--stringLevel == 0) { |
| 209 | sc.ForwardSetState(SCE_REBOL_DEFAULT); |
| 210 | } |
| 211 | } else if (sc.ch == '{') { |
| 212 | stringLevel++; |
| 213 | } |
| 214 | } else if (sc.state == SCE_REBOL_BINARY) { |
| 215 | if (sc.ch == '}') { |
| 216 | sc.ForwardSetState(SCE_REBOL_DEFAULT); |
| 217 | } |
| 218 | } else if (sc.state == SCE_REBOL_TAG) { |
| 219 | if (sc.ch == '>') { |
| 220 | sc.ForwardSetState(SCE_REBOL_DEFAULT); |
| 221 | } |
| 222 | } else if (sc.state == SCE_REBOL_PREFACE) { |
| 223 | if (sc.MatchIgnoreCase("rebol" )) |
| 224 | { |
| 225 | int i; |
| 226 | for (i=5; IsASpaceOrTab(styler.SafeGetCharAt(sc.currentPos+i, 0)); i++); |
| 227 | if (sc.GetRelative(i) == '[') |
| 228 | sc.SetState(SCE_REBOL_DEFAULT); |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | //--- Parens and bracket changes to default style when the current is a number |
| 233 | if (sc.state == SCE_REBOL_NUMBER || sc.state == SCE_REBOL_PAIR || sc.state == SCE_REBOL_TUPLE || |
| 234 | sc.state == SCE_REBOL_MONEY || sc.state == SCE_REBOL_ISSUE || sc.state == SCE_REBOL_EMAIL || |
| 235 | sc.state == SCE_REBOL_URL || sc.state == SCE_REBOL_DATE || sc.state == SCE_REBOL_TIME) { |
| 236 | if (sc.ch == '(' || sc.ch == '[' || sc.ch == ')' || sc.ch == ']') { |
| 237 | sc.SetState(SCE_REBOL_DEFAULT); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | //--- Determine if a new state should be entered. |
| 242 | if (sc.state == SCE_REBOL_DEFAULT) { |
| 243 | if (IsAnOperator(sc.ch, sc.chNext, sc.GetRelative(2))) { |
| 244 | sc.SetState(SCE_REBOL_OPERATOR); |
| 245 | } else if (IsBinaryStart(sc.ch, sc.chNext, sc.GetRelative(2), sc.GetRelative(3))) { |
| 246 | sc.SetState(SCE_REBOL_BINARY); |
| 247 | } else if (IsAWordStart(sc.ch, sc.chNext)) { |
| 248 | sc.SetState(SCE_REBOL_IDENTIFIER); |
| 249 | } else if (IsADigit(sc.ch) || sc.ch == '+' || sc.ch == '-' || /*Decimal*/ sc.ch == '.' || sc.ch == ',') { |
| 250 | dotCount = 0; |
| 251 | sc.SetState(SCE_REBOL_NUMBER); |
| 252 | } else if (sc.ch == '\"') { |
| 253 | sc.SetState(SCE_REBOL_QUOTEDSTRING); |
| 254 | } else if (sc.ch == '{') { |
| 255 | sc.SetState(blockComment ? SCE_REBOL_COMMENTBLOCK : SCE_REBOL_BRACEDSTRING); |
| 256 | ++stringLevel; |
| 257 | } else if (sc.ch == ';') { |
| 258 | sc.SetState(SCE_REBOL_COMMENTLINE); |
| 259 | } else if (sc.ch == '$') { |
| 260 | sc.SetState(SCE_REBOL_MONEY); |
| 261 | } else if (sc.ch == '%') { |
| 262 | sc.SetState(SCE_REBOL_FILE); |
| 263 | } else if (sc.ch == '<') { |
| 264 | sc.SetState(SCE_REBOL_TAG); |
| 265 | } else if (sc.ch == '#' && sc.chNext == '"') { |
| 266 | sc.SetState(SCE_REBOL_CHARACTER); |
| 267 | sc.Forward(); |
| 268 | } else if (sc.ch == '#' && sc.chNext != '"' && sc.chNext != '{' ) { |
| 269 | sc.SetState(SCE_REBOL_ISSUE); |
| 270 | } |
| 271 | } |
| 272 | } |
| 273 | sc.Complete(); |
| 274 | } |
| 275 | |
| 276 | |
| 277 | static void FoldRebolDoc(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, WordList *[], |
| 278 | Accessor &styler) { |
| 279 | Sci_PositionU lengthDoc = startPos + length; |
| 280 | int visibleChars = 0; |
| 281 | Sci_Position lineCurrent = styler.GetLine(startPos); |
| 282 | int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; |
| 283 | int levelCurrent = levelPrev; |
| 284 | char chNext = styler[startPos]; |
| 285 | int styleNext = styler.StyleAt(startPos); |
| 286 | for (Sci_PositionU i = startPos; i < lengthDoc; i++) { |
| 287 | char ch = chNext; |
| 288 | chNext = styler.SafeGetCharAt(i + 1); |
| 289 | int style = styleNext; |
| 290 | styleNext = styler.StyleAt(i + 1); |
| 291 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
| 292 | if (style == SCE_REBOL_DEFAULT) { |
| 293 | if (ch == '[') { |
| 294 | levelCurrent++; |
| 295 | } else if (ch == ']') { |
| 296 | levelCurrent--; |
| 297 | } |
| 298 | } |
| 299 | if (atEOL) { |
| 300 | int lev = levelPrev; |
| 301 | if (visibleChars == 0) |
| 302 | lev |= SC_FOLDLEVELWHITEFLAG; |
| 303 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) |
| 304 | lev |= SC_FOLDLEVELHEADERFLAG; |
| 305 | if (lev != styler.LevelAt(lineCurrent)) { |
| 306 | styler.SetLevel(lineCurrent, lev); |
| 307 | } |
| 308 | lineCurrent++; |
| 309 | levelPrev = levelCurrent; |
| 310 | visibleChars = 0; |
| 311 | } |
| 312 | if (!isspacechar(ch)) |
| 313 | visibleChars++; |
| 314 | } |
| 315 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later |
| 316 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; |
| 317 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); |
| 318 | } |
| 319 | |
| 320 | static const char * const rebolWordListDesc[] = { |
| 321 | "Keywords" , |
| 322 | 0 |
| 323 | }; |
| 324 | |
| 325 | LexerModule lmREBOL(SCLEX_REBOL, ColouriseRebolDoc, "rebol" , FoldRebolDoc, rebolWordListDesc); |
| 326 | |
| 327 | |