1 | // Scintilla source code edit control |
2 | /** @file LexECL.cxx |
3 | ** Lexer for ECL. |
4 | **/ |
5 | // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org> |
6 | // The License.txt file describes the conditions under which this software may be distributed. |
7 | |
8 | #include <stdlib.h> |
9 | #include <string.h> |
10 | #include <stdio.h> |
11 | #include <stdarg.h> |
12 | #include <assert.h> |
13 | #include <ctype.h> |
14 | |
15 | #ifdef _MSC_VER |
16 | #pragma warning(disable: 4786) |
17 | #endif |
18 | #ifdef __BORLANDC__ |
19 | // Borland C++ displays warnings in vector header without this |
20 | #pragma option -w-ccc -w-rch |
21 | #endif |
22 | |
23 | #include <string> |
24 | #include <string_view> |
25 | #include <vector> |
26 | #include <map> |
27 | #include <algorithm> |
28 | #include <functional> |
29 | |
30 | #include "ILexer.h" |
31 | #include "Scintilla.h" |
32 | #include "SciLexer.h" |
33 | |
34 | #include "PropSetSimple.h" |
35 | #include "WordList.h" |
36 | #include "LexAccessor.h" |
37 | #include "Accessor.h" |
38 | #include "StyleContext.h" |
39 | #include "CharacterSet.h" |
40 | #include "LexerModule.h" |
41 | #include "OptionSet.h" |
42 | |
43 | #define SET_LOWER "abcdefghijklmnopqrstuvwxyz" |
44 | #define SET_UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
45 | #define SET_DIGITS "0123456789" |
46 | |
47 | using namespace Lexilla; |
48 | |
49 | static bool IsSpaceEquiv(int state) { |
50 | switch (state) { |
51 | case SCE_ECL_DEFAULT: |
52 | case SCE_ECL_COMMENT: |
53 | case SCE_ECL_COMMENTLINE: |
54 | case SCE_ECL_COMMENTLINEDOC: |
55 | case SCE_ECL_COMMENTDOCKEYWORD: |
56 | case SCE_ECL_COMMENTDOCKEYWORDERROR: |
57 | case SCE_ECL_COMMENTDOC: |
58 | return true; |
59 | |
60 | default: |
61 | return false; |
62 | } |
63 | } |
64 | |
65 | static void ColouriseEclDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], |
66 | Accessor &styler) { |
67 | WordList &keywords0 = *keywordlists[0]; |
68 | WordList &keywords1 = *keywordlists[1]; |
69 | WordList &keywords2 = *keywordlists[2]; |
70 | WordList &keywords3 = *keywordlists[3]; //Value Types |
71 | WordList &keywords4 = *keywordlists[4]; |
72 | WordList &keywords5 = *keywordlists[5]; |
73 | WordList &keywords6 = *keywordlists[6]; //Javadoc Tags |
74 | WordList cplusplus; |
75 | cplusplus.Set("beginc endc" ); |
76 | |
77 | bool stylingWithinPreprocessor = false; |
78 | |
79 | CharacterSet setOKBeforeRE(CharacterSet::setNone, "(=," ); |
80 | CharacterSet setDoxygen(CharacterSet::setLower, "$@\\&<>#{}[]" ); |
81 | CharacterSet setWordStart(CharacterSet::setAlpha, "_" , 0x80, true); |
82 | CharacterSet setWord(CharacterSet::setAlphaNum, "._" , 0x80, true); |
83 | CharacterSet setQualified(CharacterSet::setNone, "uUxX" ); |
84 | |
85 | int chPrevNonWhite = ' '; |
86 | int visibleChars = 0; |
87 | bool lastWordWasUUID = false; |
88 | int styleBeforeDCKeyword = SCE_ECL_DEFAULT; |
89 | bool continuationLine = false; |
90 | |
91 | if (initStyle == SCE_ECL_PREPROCESSOR) { |
92 | // Set continuationLine if last character of previous line is '\' |
93 | Sci_Position lineCurrent = styler.GetLine(startPos); |
94 | if (lineCurrent > 0) { |
95 | int chBack = styler.SafeGetCharAt(startPos-1, 0); |
96 | int chBack2 = styler.SafeGetCharAt(startPos-2, 0); |
97 | int lineEndChar = '!'; |
98 | if (chBack2 == '\r' && chBack == '\n') { |
99 | lineEndChar = styler.SafeGetCharAt(startPos-3, 0); |
100 | } else if (chBack == '\n' || chBack == '\r') { |
101 | lineEndChar = chBack2; |
102 | } |
103 | continuationLine = lineEndChar == '\\'; |
104 | } |
105 | } |
106 | |
107 | // look back to set chPrevNonWhite properly for better regex colouring |
108 | if (startPos > 0) { |
109 | Sci_Position back = startPos; |
110 | while (--back && IsSpaceEquiv(styler.StyleAt(back))) |
111 | ; |
112 | if (styler.StyleAt(back) == SCE_ECL_OPERATOR) { |
113 | chPrevNonWhite = styler.SafeGetCharAt(back); |
114 | } |
115 | } |
116 | |
117 | StyleContext sc(startPos, length, initStyle, styler); |
118 | |
119 | for (; sc.More(); sc.Forward()) { |
120 | if (sc.atLineStart) { |
121 | if (sc.state == SCE_ECL_STRING) { |
122 | // Prevent SCE_ECL_STRINGEOL from leaking back to previous line which |
123 | // ends with a line continuation by locking in the state upto this position. |
124 | sc.SetState(SCE_ECL_STRING); |
125 | } |
126 | // Reset states to begining of colourise so no surprises |
127 | // if different sets of lines lexed. |
128 | visibleChars = 0; |
129 | lastWordWasUUID = false; |
130 | } |
131 | |
132 | // Handle line continuation generically. |
133 | if (sc.ch == '\\') { |
134 | if (sc.chNext == '\n' || sc.chNext == '\r') { |
135 | sc.Forward(); |
136 | if (sc.ch == '\r' && sc.chNext == '\n') { |
137 | sc.Forward(); |
138 | } |
139 | continuationLine = true; |
140 | continue; |
141 | } |
142 | } |
143 | |
144 | // Determine if the current state should terminate. |
145 | switch (sc.state) { |
146 | case SCE_ECL_ADDED: |
147 | case SCE_ECL_DELETED: |
148 | case SCE_ECL_CHANGED: |
149 | case SCE_ECL_MOVED: |
150 | if (sc.atLineStart) |
151 | sc.SetState(SCE_ECL_DEFAULT); |
152 | break; |
153 | case SCE_ECL_OPERATOR: |
154 | sc.SetState(SCE_ECL_DEFAULT); |
155 | break; |
156 | case SCE_ECL_NUMBER: |
157 | // We accept almost anything because of hex. and number suffixes |
158 | if (!setWord.Contains(sc.ch)) { |
159 | sc.SetState(SCE_ECL_DEFAULT); |
160 | } |
161 | break; |
162 | case SCE_ECL_IDENTIFIER: |
163 | if (!setWord.Contains(sc.ch) || (sc.ch == '.')) { |
164 | char s[1000]; |
165 | sc.GetCurrentLowered(s, sizeof(s)); |
166 | if (keywords0.InList(s)) { |
167 | lastWordWasUUID = strcmp(s, "uuid" ) == 0; |
168 | sc.ChangeState(SCE_ECL_WORD0); |
169 | } else if (keywords1.InList(s)) { |
170 | sc.ChangeState(SCE_ECL_WORD1); |
171 | } else if (keywords2.InList(s)) { |
172 | sc.ChangeState(SCE_ECL_WORD2); |
173 | } else if (keywords4.InList(s)) { |
174 | sc.ChangeState(SCE_ECL_WORD4); |
175 | } else if (keywords5.InList(s)) { |
176 | sc.ChangeState(SCE_ECL_WORD5); |
177 | } |
178 | else //Data types are of from KEYWORD## |
179 | { |
180 | int i = static_cast<int>(strlen(s)) - 1; |
181 | while(i >= 0 && (isdigit(s[i]) || s[i] == '_')) |
182 | --i; |
183 | |
184 | char s2[1000]; |
185 | strncpy(s2, s, i + 1); |
186 | s2[i + 1] = 0; |
187 | if (keywords3.InList(s2)) { |
188 | sc.ChangeState(SCE_ECL_WORD3); |
189 | } |
190 | } |
191 | sc.SetState(SCE_ECL_DEFAULT); |
192 | } |
193 | break; |
194 | case SCE_ECL_PREPROCESSOR: |
195 | if (sc.atLineStart && !continuationLine) { |
196 | sc.SetState(SCE_ECL_DEFAULT); |
197 | } else if (stylingWithinPreprocessor) { |
198 | if (IsASpace(sc.ch)) { |
199 | sc.SetState(SCE_ECL_DEFAULT); |
200 | } |
201 | } else { |
202 | if (sc.Match('/', '*') || sc.Match('/', '/')) { |
203 | sc.SetState(SCE_ECL_DEFAULT); |
204 | } |
205 | } |
206 | break; |
207 | case SCE_ECL_COMMENT: |
208 | if (sc.Match('*', '/')) { |
209 | sc.Forward(); |
210 | sc.ForwardSetState(SCE_ECL_DEFAULT); |
211 | } |
212 | break; |
213 | case SCE_ECL_COMMENTDOC: |
214 | if (sc.Match('*', '/')) { |
215 | sc.Forward(); |
216 | sc.ForwardSetState(SCE_ECL_DEFAULT); |
217 | } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support |
218 | // Verify that we have the conditions to mark a comment-doc-keyword |
219 | if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) { |
220 | styleBeforeDCKeyword = SCE_ECL_COMMENTDOC; |
221 | sc.SetState(SCE_ECL_COMMENTDOCKEYWORD); |
222 | } |
223 | } |
224 | break; |
225 | case SCE_ECL_COMMENTLINE: |
226 | if (sc.atLineStart) { |
227 | sc.SetState(SCE_ECL_DEFAULT); |
228 | } |
229 | break; |
230 | case SCE_ECL_COMMENTLINEDOC: |
231 | if (sc.atLineStart) { |
232 | sc.SetState(SCE_ECL_DEFAULT); |
233 | } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support |
234 | // Verify that we have the conditions to mark a comment-doc-keyword |
235 | if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) { |
236 | styleBeforeDCKeyword = SCE_ECL_COMMENTLINEDOC; |
237 | sc.SetState(SCE_ECL_COMMENTDOCKEYWORD); |
238 | } |
239 | } |
240 | break; |
241 | case SCE_ECL_COMMENTDOCKEYWORD: |
242 | if ((styleBeforeDCKeyword == SCE_ECL_COMMENTDOC) && sc.Match('*', '/')) { |
243 | sc.ChangeState(SCE_ECL_COMMENTDOCKEYWORDERROR); |
244 | sc.Forward(); |
245 | sc.ForwardSetState(SCE_ECL_DEFAULT); |
246 | } else if (!setDoxygen.Contains(sc.ch)) { |
247 | char s[1000]; |
248 | sc.GetCurrentLowered(s, sizeof(s)); |
249 | if (!IsASpace(sc.ch) || !keywords6.InList(s+1)) { |
250 | sc.ChangeState(SCE_ECL_COMMENTDOCKEYWORDERROR); |
251 | } |
252 | sc.SetState(styleBeforeDCKeyword); |
253 | } |
254 | break; |
255 | case SCE_ECL_STRING: |
256 | if (sc.atLineEnd) { |
257 | sc.ChangeState(SCE_ECL_STRINGEOL); |
258 | } else if (sc.ch == '\\') { |
259 | if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { |
260 | sc.Forward(); |
261 | } |
262 | } else if (sc.ch == '\"') { |
263 | sc.ForwardSetState(SCE_ECL_DEFAULT); |
264 | } |
265 | break; |
266 | case SCE_ECL_CHARACTER: |
267 | if (sc.atLineEnd) { |
268 | sc.ChangeState(SCE_ECL_STRINGEOL); |
269 | } else if (sc.ch == '\\') { |
270 | if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { |
271 | sc.Forward(); |
272 | } |
273 | } else if (sc.ch == '\'') { |
274 | sc.ForwardSetState(SCE_ECL_DEFAULT); |
275 | } |
276 | break; |
277 | case SCE_ECL_REGEX: |
278 | if (sc.atLineStart) { |
279 | sc.SetState(SCE_ECL_DEFAULT); |
280 | } else if (sc.ch == '/') { |
281 | sc.Forward(); |
282 | while ((sc.ch < 0x80) && islower(sc.ch)) |
283 | sc.Forward(); // gobble regex flags |
284 | sc.SetState(SCE_ECL_DEFAULT); |
285 | } else if (sc.ch == '\\') { |
286 | // Gobble up the quoted character |
287 | if (sc.chNext == '\\' || sc.chNext == '/') { |
288 | sc.Forward(); |
289 | } |
290 | } |
291 | break; |
292 | case SCE_ECL_STRINGEOL: |
293 | if (sc.atLineStart) { |
294 | sc.SetState(SCE_ECL_DEFAULT); |
295 | } |
296 | break; |
297 | case SCE_ECL_VERBATIM: |
298 | if (sc.ch == '\"') { |
299 | if (sc.chNext == '\"') { |
300 | sc.Forward(); |
301 | } else { |
302 | sc.ForwardSetState(SCE_ECL_DEFAULT); |
303 | } |
304 | } |
305 | break; |
306 | case SCE_ECL_UUID: |
307 | if (sc.ch == '\r' || sc.ch == '\n' || sc.ch == ')') { |
308 | sc.SetState(SCE_ECL_DEFAULT); |
309 | } |
310 | break; |
311 | } |
312 | |
313 | // Determine if a new state should be entered. |
314 | Sci_Position lineCurrent = styler.GetLine(sc.currentPos); |
315 | int lineState = styler.GetLineState(lineCurrent); |
316 | if (sc.state == SCE_ECL_DEFAULT) { |
317 | if (lineState) { |
318 | sc.SetState(lineState); |
319 | } |
320 | else if (sc.Match('@', '\"')) { |
321 | sc.SetState(SCE_ECL_VERBATIM); |
322 | sc.Forward(); |
323 | } else if (setQualified.Contains(sc.ch) && sc.chNext == '\'') { |
324 | sc.SetState(SCE_ECL_CHARACTER); |
325 | sc.Forward(); |
326 | } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) { |
327 | if (lastWordWasUUID) { |
328 | sc.SetState(SCE_ECL_UUID); |
329 | lastWordWasUUID = false; |
330 | } else { |
331 | sc.SetState(SCE_ECL_NUMBER); |
332 | } |
333 | } else if (setWordStart.Contains(sc.ch) || (sc.ch == '@')) { |
334 | if (lastWordWasUUID) { |
335 | sc.SetState(SCE_ECL_UUID); |
336 | lastWordWasUUID = false; |
337 | } else { |
338 | sc.SetState(SCE_ECL_IDENTIFIER); |
339 | } |
340 | } else if (sc.Match('/', '*')) { |
341 | if (sc.Match("/**" ) || sc.Match("/*!" )) { // Support of Qt/Doxygen doc. style |
342 | sc.SetState(SCE_ECL_COMMENTDOC); |
343 | } else { |
344 | sc.SetState(SCE_ECL_COMMENT); |
345 | } |
346 | sc.Forward(); // Eat the * so it isn't used for the end of the comment |
347 | } else if (sc.Match('/', '/')) { |
348 | if ((sc.Match("///" ) && !sc.Match("////" )) || sc.Match("//!" )) |
349 | // Support of Qt/Doxygen doc. style |
350 | sc.SetState(SCE_ECL_COMMENTLINEDOC); |
351 | else |
352 | sc.SetState(SCE_ECL_COMMENTLINE); |
353 | } else if (sc.ch == '/' && setOKBeforeRE.Contains(chPrevNonWhite)) { |
354 | sc.SetState(SCE_ECL_REGEX); // JavaScript's RegEx |
355 | // } else if (sc.ch == '\"') { |
356 | // sc.SetState(SCE_ECL_STRING); |
357 | } else if (sc.ch == '\'') { |
358 | sc.SetState(SCE_ECL_CHARACTER); |
359 | } else if (sc.ch == '#' && visibleChars == 0) { |
360 | // Preprocessor commands are alone on their line |
361 | sc.SetState(SCE_ECL_PREPROCESSOR); |
362 | // Skip whitespace between # and preprocessor word |
363 | do { |
364 | sc.Forward(); |
365 | } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More()); |
366 | if (sc.atLineEnd) { |
367 | sc.SetState(SCE_ECL_DEFAULT); |
368 | } |
369 | } else if (isoperator(static_cast<char>(sc.ch))) { |
370 | sc.SetState(SCE_ECL_OPERATOR); |
371 | } |
372 | } |
373 | |
374 | if (!IsASpace(sc.ch) && !IsSpaceEquiv(sc.state)) { |
375 | chPrevNonWhite = sc.ch; |
376 | visibleChars++; |
377 | } |
378 | continuationLine = false; |
379 | } |
380 | sc.Complete(); |
381 | |
382 | } |
383 | |
384 | static bool (int style) { |
385 | return style == SCE_ECL_COMMENT || |
386 | style == SCE_ECL_COMMENTDOC || |
387 | style == SCE_ECL_COMMENTDOCKEYWORD || |
388 | style == SCE_ECL_COMMENTDOCKEYWORDERROR; |
389 | } |
390 | |
391 | static bool MatchNoCase(Accessor & styler, Sci_PositionU & pos, const char *s) { |
392 | Sci_Position i=0; |
393 | for (; *s; i++) { |
394 | char compare_char = tolower(*s); |
395 | char styler_char = tolower(styler.SafeGetCharAt(pos+i)); |
396 | if (compare_char != styler_char) |
397 | return false; |
398 | s++; |
399 | } |
400 | pos+=i-1; |
401 | return true; |
402 | } |
403 | |
404 | |
405 | // Store both the current line's fold level and the next lines in the |
406 | // level store to make it easy to pick up with each increment |
407 | // and to make it possible to fiddle the current level for "} else {". |
408 | static void FoldEclDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, |
409 | WordList *[], Accessor &styler) { |
410 | bool = true; |
411 | bool foldPreprocessor = true; |
412 | bool foldCompact = true; |
413 | bool foldAtElse = true; |
414 | Sci_PositionU endPos = startPos + length; |
415 | int visibleChars = 0; |
416 | Sci_Position lineCurrent = styler.GetLine(startPos); |
417 | int levelCurrent = SC_FOLDLEVELBASE; |
418 | if (lineCurrent > 0) |
419 | levelCurrent = styler.LevelAt(lineCurrent-1) >> 16; |
420 | int levelMinCurrent = levelCurrent; |
421 | int levelNext = levelCurrent; |
422 | char chNext = styler[startPos]; |
423 | int styleNext = styler.StyleAt(startPos); |
424 | int style = initStyle; |
425 | for (Sci_PositionU i = startPos; i < endPos; i++) { |
426 | char ch = chNext; |
427 | chNext = styler.SafeGetCharAt(i + 1); |
428 | int stylePrev = style; |
429 | style = styleNext; |
430 | styleNext = styler.StyleAt(i + 1); |
431 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
432 | if (foldComment && IsStreamCommentStyle(style)) { |
433 | if (!IsStreamCommentStyle(stylePrev) && (stylePrev != SCE_ECL_COMMENTLINEDOC)) { |
434 | levelNext++; |
435 | } else if (!IsStreamCommentStyle(styleNext) && (styleNext != SCE_ECL_COMMENTLINEDOC) && !atEOL) { |
436 | // Comments don't end at end of line and the next character may be unstyled. |
437 | levelNext--; |
438 | } |
439 | } |
440 | if (foldComment && (style == SCE_ECL_COMMENTLINE)) { |
441 | if ((ch == '/') && (chNext == '/')) { |
442 | char chNext2 = styler.SafeGetCharAt(i + 2); |
443 | if (chNext2 == '{') { |
444 | levelNext++; |
445 | } else if (chNext2 == '}') { |
446 | levelNext--; |
447 | } |
448 | } |
449 | } |
450 | if (foldPreprocessor && (style == SCE_ECL_PREPROCESSOR)) { |
451 | if (ch == '#') { |
452 | Sci_PositionU j = i + 1; |
453 | while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) { |
454 | j++; |
455 | } |
456 | if (MatchNoCase(styler, j, "region" ) || MatchNoCase(styler, j, "if" )) { |
457 | levelNext++; |
458 | } else if (MatchNoCase(styler, j, "endregion" ) || MatchNoCase(styler, j, "end" )) { |
459 | levelNext--; |
460 | } |
461 | } |
462 | } |
463 | if (style == SCE_ECL_OPERATOR) { |
464 | if (ch == '{') { |
465 | // Measure the minimum before a '{' to allow |
466 | // folding on "} else {" |
467 | if (levelMinCurrent > levelNext) { |
468 | levelMinCurrent = levelNext; |
469 | } |
470 | levelNext++; |
471 | } else if (ch == '}') { |
472 | levelNext--; |
473 | } |
474 | } |
475 | if (style == SCE_ECL_WORD2) { |
476 | if (MatchNoCase(styler, i, "record" ) || MatchNoCase(styler, i, "transform" ) || MatchNoCase(styler, i, "type" ) || MatchNoCase(styler, i, "function" ) || |
477 | MatchNoCase(styler, i, "module" ) || MatchNoCase(styler, i, "service" ) || MatchNoCase(styler, i, "interface" ) || MatchNoCase(styler, i, "ifblock" ) || |
478 | MatchNoCase(styler, i, "macro" ) || MatchNoCase(styler, i, "beginc++" )) { |
479 | levelNext++; |
480 | } else if (MatchNoCase(styler, i, "endmacro" ) || MatchNoCase(styler, i, "endc++" ) || MatchNoCase(styler, i, "end" )) { |
481 | levelNext--; |
482 | } |
483 | } |
484 | if (atEOL || (i == endPos-1)) { |
485 | int levelUse = levelCurrent; |
486 | if (foldAtElse) { |
487 | levelUse = levelMinCurrent; |
488 | } |
489 | int lev = levelUse | levelNext << 16; |
490 | if (visibleChars == 0 && foldCompact) |
491 | lev |= SC_FOLDLEVELWHITEFLAG; |
492 | if (levelUse < levelNext) |
493 | lev |= SC_FOLDLEVELHEADERFLAG; |
494 | if (lev != styler.LevelAt(lineCurrent)) { |
495 | styler.SetLevel(lineCurrent, lev); |
496 | } |
497 | lineCurrent++; |
498 | levelCurrent = levelNext; |
499 | levelMinCurrent = levelCurrent; |
500 | if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) { |
501 | // There is an empty line at end of file so give it same level and empty |
502 | styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG); |
503 | } |
504 | visibleChars = 0; |
505 | } |
506 | if (!IsASpace(ch)) |
507 | visibleChars++; |
508 | } |
509 | } |
510 | |
511 | static const char * const EclWordListDesc[] = { |
512 | "Keywords" , |
513 | 0 |
514 | }; |
515 | |
516 | LexerModule lmECL( |
517 | SCLEX_ECL, |
518 | ColouriseEclDoc, |
519 | "ecl" , |
520 | FoldEclDoc, |
521 | EclWordListDesc); |
522 | |