1 | // Scintilla source code edit control |
2 | /** @file LexRebol.cxx |
3 | ** Lexer for REBOL. |
4 | ** Written by Pascal Hurni, inspired from LexLua by Paul Winwood & Marcos E. Wurzius & Philippe Lhoste |
5 | ** |
6 | ** History: |
7 | ** 2005-04-07 First release. |
8 | ** 2005-04-10 Closing parens and brackets go now in default style |
9 | ** String and comment nesting should be more safe |
10 | **/ |
11 | // Copyright 2005 by Pascal Hurni <pascal_hurni@fastmail.fm> |
12 | // The License.txt file describes the conditions under which this software may be distributed. |
13 | |
14 | #include <stdlib.h> |
15 | #include <string.h> |
16 | #include <stdio.h> |
17 | #include <stdarg.h> |
18 | #include <assert.h> |
19 | #include <ctype.h> |
20 | |
21 | #include <string> |
22 | #include <string_view> |
23 | |
24 | #include "ILexer.h" |
25 | #include "Scintilla.h" |
26 | #include "SciLexer.h" |
27 | |
28 | #include "WordList.h" |
29 | #include "LexAccessor.h" |
30 | #include "Accessor.h" |
31 | #include "StyleContext.h" |
32 | #include "CharacterSet.h" |
33 | #include "LexerModule.h" |
34 | |
35 | using namespace Lexilla; |
36 | |
37 | static inline bool IsAWordChar(const int ch) { |
38 | return (isalnum(ch) || ch == '?' || ch == '!' || ch == '.' || ch == '\'' || ch == '+' || ch == '-' || ch == '*' || ch == '&' || ch == '|' || ch == '=' || ch == '_' || ch == '~'); |
39 | } |
40 | |
41 | static inline bool IsAWordStart(const int ch, const int ch2) { |
42 | return ((ch == '+' || ch == '-' || ch == '.') && !isdigit(ch2)) || |
43 | (isalpha(ch) || ch == '?' || ch == '!' || ch == '\'' || ch == '*' || ch == '&' || ch == '|' || ch == '=' || ch == '_' || ch == '~'); |
44 | } |
45 | |
46 | static inline bool IsAnOperator(const int ch, const int ch2, const int ch3) { |
47 | // One char operators |
48 | if (IsASpaceOrTab(ch2)) { |
49 | return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '<' || ch == '>' || ch == '=' || ch == '?'; |
50 | } |
51 | |
52 | // Two char operators |
53 | if (IsASpaceOrTab(ch3)) { |
54 | return (ch == '*' && ch2 == '*') || |
55 | (ch == '/' && ch2 == '/') || |
56 | (ch == '<' && (ch2 == '=' || ch2 == '>')) || |
57 | (ch == '>' && ch2 == '=') || |
58 | (ch == '=' && (ch2 == '=' || ch2 == '?')) || |
59 | (ch == '?' && ch2 == '?'); |
60 | } |
61 | |
62 | return false; |
63 | } |
64 | |
65 | static inline bool IsBinaryStart(const int ch, const int ch2, const int ch3, const int ch4) { |
66 | return (ch == '#' && ch2 == '{') || |
67 | (IsADigit(ch) && ch2 == '#' && ch3 == '{' ) || |
68 | (IsADigit(ch) && IsADigit(ch2) && ch3 == '#' && ch4 == '{' ); |
69 | } |
70 | |
71 | |
72 | static void ColouriseRebolDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler) { |
73 | |
74 | WordList &keywords = *keywordlists[0]; |
75 | WordList &keywords2 = *keywordlists[1]; |
76 | WordList &keywords3 = *keywordlists[2]; |
77 | WordList &keywords4 = *keywordlists[3]; |
78 | WordList &keywords5 = *keywordlists[4]; |
79 | WordList &keywords6 = *keywordlists[5]; |
80 | WordList &keywords7 = *keywordlists[6]; |
81 | WordList &keywords8 = *keywordlists[7]; |
82 | |
83 | Sci_Position currentLine = styler.GetLine(startPos); |
84 | // Initialize the braced string {.. { ... } ..} nesting level, if we are inside such a string. |
85 | int stringLevel = 0; |
86 | if (initStyle == SCE_REBOL_BRACEDSTRING || initStyle == SCE_REBOL_COMMENTBLOCK) { |
87 | stringLevel = styler.GetLineState(currentLine - 1); |
88 | } |
89 | |
90 | bool = initStyle == SCE_REBOL_COMMENTBLOCK; |
91 | int dotCount = 0; |
92 | |
93 | // Do not leak onto next line |
94 | if (initStyle == SCE_REBOL_COMMENTLINE) { |
95 | initStyle = SCE_REBOL_DEFAULT; |
96 | } |
97 | |
98 | StyleContext sc(startPos, length, initStyle, styler); |
99 | if (startPos == 0) { |
100 | sc.SetState(SCE_REBOL_PREFACE); |
101 | } |
102 | for (; sc.More(); sc.Forward()) { |
103 | |
104 | //--- What to do at line end ? |
105 | if (sc.atLineEnd) { |
106 | // Can be either inside a {} string or simply at eol |
107 | if (sc.state != SCE_REBOL_BRACEDSTRING && sc.state != SCE_REBOL_COMMENTBLOCK && |
108 | sc.state != SCE_REBOL_BINARY && sc.state != SCE_REBOL_PREFACE) |
109 | sc.SetState(SCE_REBOL_DEFAULT); |
110 | |
111 | // Update the line state, so it can be seen by next line |
112 | currentLine = styler.GetLine(sc.currentPos); |
113 | switch (sc.state) { |
114 | case SCE_REBOL_BRACEDSTRING: |
115 | case SCE_REBOL_COMMENTBLOCK: |
116 | // Inside a braced string, we set the line state |
117 | styler.SetLineState(currentLine, stringLevel); |
118 | break; |
119 | default: |
120 | // Reset the line state |
121 | styler.SetLineState(currentLine, 0); |
122 | break; |
123 | } |
124 | |
125 | // continue with next char |
126 | continue; |
127 | } |
128 | |
129 | //--- What to do on white-space ? |
130 | if (IsASpaceOrTab(sc.ch)) |
131 | { |
132 | // Return to default if any of these states |
133 | if (sc.state == SCE_REBOL_OPERATOR || sc.state == SCE_REBOL_CHARACTER || |
134 | sc.state == SCE_REBOL_NUMBER || sc.state == SCE_REBOL_PAIR || |
135 | sc.state == SCE_REBOL_TUPLE || sc.state == SCE_REBOL_FILE || |
136 | sc.state == SCE_REBOL_DATE || sc.state == SCE_REBOL_TIME || |
137 | sc.state == SCE_REBOL_MONEY || sc.state == SCE_REBOL_ISSUE || |
138 | sc.state == SCE_REBOL_URL || sc.state == SCE_REBOL_EMAIL) { |
139 | sc.SetState(SCE_REBOL_DEFAULT); |
140 | } |
141 | } |
142 | |
143 | //--- Specialize state ? |
144 | // URL, Email look like identifier |
145 | if (sc.state == SCE_REBOL_IDENTIFIER) |
146 | { |
147 | if (sc.ch == ':' && !IsASpace(sc.chNext)) { |
148 | sc.ChangeState(SCE_REBOL_URL); |
149 | } else if (sc.ch == '@') { |
150 | sc.ChangeState(SCE_REBOL_EMAIL); |
151 | } else if (sc.ch == '$') { |
152 | sc.ChangeState(SCE_REBOL_MONEY); |
153 | } |
154 | } |
155 | // Words look like identifiers |
156 | if (sc.state == SCE_REBOL_IDENTIFIER || (sc.state >= SCE_REBOL_WORD && sc.state <= SCE_REBOL_WORD8)) { |
157 | // Keywords ? |
158 | if (!IsAWordChar(sc.ch) || sc.Match('/')) { |
159 | char s[100]; |
160 | sc.GetCurrentLowered(s, sizeof(s)); |
161 | blockComment = strcmp(s, "comment" ) == 0; |
162 | if (keywords8.InList(s)) { |
163 | sc.ChangeState(SCE_REBOL_WORD8); |
164 | } else if (keywords7.InList(s)) { |
165 | sc.ChangeState(SCE_REBOL_WORD7); |
166 | } else if (keywords6.InList(s)) { |
167 | sc.ChangeState(SCE_REBOL_WORD6); |
168 | } else if (keywords5.InList(s)) { |
169 | sc.ChangeState(SCE_REBOL_WORD5); |
170 | } else if (keywords4.InList(s)) { |
171 | sc.ChangeState(SCE_REBOL_WORD4); |
172 | } else if (keywords3.InList(s)) { |
173 | sc.ChangeState(SCE_REBOL_WORD3); |
174 | } else if (keywords2.InList(s)) { |
175 | sc.ChangeState(SCE_REBOL_WORD2); |
176 | } else if (keywords.InList(s)) { |
177 | sc.ChangeState(SCE_REBOL_WORD); |
178 | } |
179 | // Keep same style if there are refinements |
180 | if (!sc.Match('/')) { |
181 | sc.SetState(SCE_REBOL_DEFAULT); |
182 | } |
183 | } |
184 | // special numbers |
185 | } else if (sc.state == SCE_REBOL_NUMBER) { |
186 | switch (sc.ch) { |
187 | case 'x': sc.ChangeState(SCE_REBOL_PAIR); |
188 | break; |
189 | case ':': sc.ChangeState(SCE_REBOL_TIME); |
190 | break; |
191 | case '-': |
192 | case '/': sc.ChangeState(SCE_REBOL_DATE); |
193 | break; |
194 | case '.': if (++dotCount >= 2) sc.ChangeState(SCE_REBOL_TUPLE); |
195 | break; |
196 | } |
197 | } |
198 | |
199 | //--- Determine if the current state should terminate |
200 | if (sc.state == SCE_REBOL_QUOTEDSTRING || sc.state == SCE_REBOL_CHARACTER) { |
201 | if (sc.ch == '^' && sc.chNext == '\"') { |
202 | sc.Forward(); |
203 | } else if (sc.ch == '\"') { |
204 | sc.ForwardSetState(SCE_REBOL_DEFAULT); |
205 | } |
206 | } else if (sc.state == SCE_REBOL_BRACEDSTRING || sc.state == SCE_REBOL_COMMENTBLOCK) { |
207 | if (sc.ch == '}') { |
208 | if (--stringLevel == 0) { |
209 | sc.ForwardSetState(SCE_REBOL_DEFAULT); |
210 | } |
211 | } else if (sc.ch == '{') { |
212 | stringLevel++; |
213 | } |
214 | } else if (sc.state == SCE_REBOL_BINARY) { |
215 | if (sc.ch == '}') { |
216 | sc.ForwardSetState(SCE_REBOL_DEFAULT); |
217 | } |
218 | } else if (sc.state == SCE_REBOL_TAG) { |
219 | if (sc.ch == '>') { |
220 | sc.ForwardSetState(SCE_REBOL_DEFAULT); |
221 | } |
222 | } else if (sc.state == SCE_REBOL_PREFACE) { |
223 | if (sc.MatchIgnoreCase("rebol" )) |
224 | { |
225 | int i; |
226 | for (i=5; IsASpaceOrTab(styler.SafeGetCharAt(sc.currentPos+i, 0)); i++); |
227 | if (sc.GetRelative(i) == '[') |
228 | sc.SetState(SCE_REBOL_DEFAULT); |
229 | } |
230 | } |
231 | |
232 | //--- Parens and bracket changes to default style when the current is a number |
233 | if (sc.state == SCE_REBOL_NUMBER || sc.state == SCE_REBOL_PAIR || sc.state == SCE_REBOL_TUPLE || |
234 | sc.state == SCE_REBOL_MONEY || sc.state == SCE_REBOL_ISSUE || sc.state == SCE_REBOL_EMAIL || |
235 | sc.state == SCE_REBOL_URL || sc.state == SCE_REBOL_DATE || sc.state == SCE_REBOL_TIME) { |
236 | if (sc.ch == '(' || sc.ch == '[' || sc.ch == ')' || sc.ch == ']') { |
237 | sc.SetState(SCE_REBOL_DEFAULT); |
238 | } |
239 | } |
240 | |
241 | //--- Determine if a new state should be entered. |
242 | if (sc.state == SCE_REBOL_DEFAULT) { |
243 | if (IsAnOperator(sc.ch, sc.chNext, sc.GetRelative(2))) { |
244 | sc.SetState(SCE_REBOL_OPERATOR); |
245 | } else if (IsBinaryStart(sc.ch, sc.chNext, sc.GetRelative(2), sc.GetRelative(3))) { |
246 | sc.SetState(SCE_REBOL_BINARY); |
247 | } else if (IsAWordStart(sc.ch, sc.chNext)) { |
248 | sc.SetState(SCE_REBOL_IDENTIFIER); |
249 | } else if (IsADigit(sc.ch) || sc.ch == '+' || sc.ch == '-' || /*Decimal*/ sc.ch == '.' || sc.ch == ',') { |
250 | dotCount = 0; |
251 | sc.SetState(SCE_REBOL_NUMBER); |
252 | } else if (sc.ch == '\"') { |
253 | sc.SetState(SCE_REBOL_QUOTEDSTRING); |
254 | } else if (sc.ch == '{') { |
255 | sc.SetState(blockComment ? SCE_REBOL_COMMENTBLOCK : SCE_REBOL_BRACEDSTRING); |
256 | ++stringLevel; |
257 | } else if (sc.ch == ';') { |
258 | sc.SetState(SCE_REBOL_COMMENTLINE); |
259 | } else if (sc.ch == '$') { |
260 | sc.SetState(SCE_REBOL_MONEY); |
261 | } else if (sc.ch == '%') { |
262 | sc.SetState(SCE_REBOL_FILE); |
263 | } else if (sc.ch == '<') { |
264 | sc.SetState(SCE_REBOL_TAG); |
265 | } else if (sc.ch == '#' && sc.chNext == '"') { |
266 | sc.SetState(SCE_REBOL_CHARACTER); |
267 | sc.Forward(); |
268 | } else if (sc.ch == '#' && sc.chNext != '"' && sc.chNext != '{' ) { |
269 | sc.SetState(SCE_REBOL_ISSUE); |
270 | } |
271 | } |
272 | } |
273 | sc.Complete(); |
274 | } |
275 | |
276 | |
277 | static void FoldRebolDoc(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, WordList *[], |
278 | Accessor &styler) { |
279 | Sci_PositionU lengthDoc = startPos + length; |
280 | int visibleChars = 0; |
281 | Sci_Position lineCurrent = styler.GetLine(startPos); |
282 | int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; |
283 | int levelCurrent = levelPrev; |
284 | char chNext = styler[startPos]; |
285 | int styleNext = styler.StyleAt(startPos); |
286 | for (Sci_PositionU i = startPos; i < lengthDoc; i++) { |
287 | char ch = chNext; |
288 | chNext = styler.SafeGetCharAt(i + 1); |
289 | int style = styleNext; |
290 | styleNext = styler.StyleAt(i + 1); |
291 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
292 | if (style == SCE_REBOL_DEFAULT) { |
293 | if (ch == '[') { |
294 | levelCurrent++; |
295 | } else if (ch == ']') { |
296 | levelCurrent--; |
297 | } |
298 | } |
299 | if (atEOL) { |
300 | int lev = levelPrev; |
301 | if (visibleChars == 0) |
302 | lev |= SC_FOLDLEVELWHITEFLAG; |
303 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) |
304 | lev |= SC_FOLDLEVELHEADERFLAG; |
305 | if (lev != styler.LevelAt(lineCurrent)) { |
306 | styler.SetLevel(lineCurrent, lev); |
307 | } |
308 | lineCurrent++; |
309 | levelPrev = levelCurrent; |
310 | visibleChars = 0; |
311 | } |
312 | if (!isspacechar(ch)) |
313 | visibleChars++; |
314 | } |
315 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later |
316 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; |
317 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); |
318 | } |
319 | |
320 | static const char * const rebolWordListDesc[] = { |
321 | "Keywords" , |
322 | 0 |
323 | }; |
324 | |
325 | LexerModule lmREBOL(SCLEX_REBOL, ColouriseRebolDoc, "rebol" , FoldRebolDoc, rebolWordListDesc); |
326 | |
327 | |