1 | // Scintilla source code edit control |
2 | /** @file LexVisualProlog.cxx |
3 | ** Lexer for Visual Prolog. |
4 | **/ |
5 | // Author Thomas Linder Puls, Prolog Development Denter A/S, http://www.visual-prolog.com |
6 | // Based on Lexer for C++, C, Java, and JavaScript. |
7 | // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org> |
8 | // The License.txt file describes the conditions under which this software may be distributed. |
9 | |
10 | // The line state contains: |
11 | // In SCE_VISUALPROLOG_STRING_VERBATIM_EOL (i.e. multiline string literal): The closingQuote. |
12 | // else (for SCE_VISUALPROLOG_COMMENT_BLOCK): The comment nesting level |
13 | |
14 | #include <stdlib.h> |
15 | #include <string.h> |
16 | #include <stdio.h> |
17 | #include <stdarg.h> |
18 | #include <assert.h> |
19 | #include <ctype.h> |
20 | |
21 | #ifdef _MSC_VER |
22 | #pragma warning(disable: 4786) |
23 | #endif |
24 | |
25 | #include <string> |
26 | #include <string_view> |
27 | #include <vector> |
28 | #include <map> |
29 | #include <algorithm> |
30 | #include <functional> |
31 | |
32 | #include "ILexer.h" |
33 | #include "Scintilla.h" |
34 | #include "SciLexer.h" |
35 | |
36 | #include "WordList.h" |
37 | #include "LexAccessor.h" |
38 | #include "Accessor.h" |
39 | #include "StyleContext.h" |
40 | #include "CharacterSet.h" |
41 | #include "CharacterCategory.h" |
42 | #include "LexerModule.h" |
43 | #include "OptionSet.h" |
44 | #include "DefaultLexer.h" |
45 | |
46 | using namespace Scintilla; |
47 | using namespace Lexilla; |
48 | |
49 | // Options used for LexerVisualProlog |
50 | struct OptionsVisualProlog { |
51 | OptionsVisualProlog() { |
52 | } |
53 | }; |
54 | |
55 | static const char *const visualPrologWordLists[] = { |
56 | "Major keywords (class, predicates, ...)" , |
57 | "Minor keywords (if, then, try, ...)" , |
58 | "Directive keywords without the '#' (include, requires, ...)" , |
59 | "Documentation keywords without the '@' (short, detail, ...)" , |
60 | 0, |
61 | }; |
62 | |
63 | struct OptionSetVisualProlog : public OptionSet<OptionsVisualProlog> { |
64 | OptionSetVisualProlog() { |
65 | DefineWordListSets(visualPrologWordLists); |
66 | } |
67 | }; |
68 | |
69 | class LexerVisualProlog : public DefaultLexer { |
70 | WordList majorKeywords; |
71 | WordList minorKeywords; |
72 | WordList directiveKeywords; |
73 | WordList docKeywords; |
74 | OptionsVisualProlog options; |
75 | OptionSetVisualProlog osVisualProlog; |
76 | public: |
77 | LexerVisualProlog() : DefaultLexer("visualprolog" , SCLEX_VISUALPROLOG) { |
78 | } |
79 | virtual ~LexerVisualProlog() { |
80 | } |
81 | void SCI_METHOD Release() override { |
82 | delete this; |
83 | } |
84 | int SCI_METHOD Version() const override { |
85 | return lvRelease5; |
86 | } |
87 | const char * SCI_METHOD PropertyNames() override { |
88 | return osVisualProlog.PropertyNames(); |
89 | } |
90 | int SCI_METHOD PropertyType(const char *name) override { |
91 | return osVisualProlog.PropertyType(name); |
92 | } |
93 | const char * SCI_METHOD DescribeProperty(const char *name) override { |
94 | return osVisualProlog.DescribeProperty(name); |
95 | } |
96 | Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; |
97 | const char * SCI_METHOD PropertyGet(const char *key) override { |
98 | return osVisualProlog.PropertyGet(key); |
99 | } |
100 | const char * SCI_METHOD DescribeWordListSets() override { |
101 | return osVisualProlog.DescribeWordListSets(); |
102 | } |
103 | Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; |
104 | void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
105 | void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
106 | |
107 | void * SCI_METHOD PrivateCall(int, void *) override { |
108 | return 0; |
109 | } |
110 | |
111 | static ILexer5 *LexerFactoryVisualProlog() { |
112 | return new LexerVisualProlog(); |
113 | } |
114 | }; |
115 | |
116 | Sci_Position SCI_METHOD LexerVisualProlog::PropertySet(const char *key, const char *val) { |
117 | if (osVisualProlog.PropertySet(&options, key, val)) { |
118 | return 0; |
119 | } |
120 | return -1; |
121 | } |
122 | |
123 | Sci_Position SCI_METHOD LexerVisualProlog::WordListSet(int n, const char *wl) { |
124 | WordList *wordListN = 0; |
125 | switch (n) { |
126 | case 0: |
127 | wordListN = &majorKeywords; |
128 | break; |
129 | case 1: |
130 | wordListN = &minorKeywords; |
131 | break; |
132 | case 2: |
133 | wordListN = &directiveKeywords; |
134 | break; |
135 | case 3: |
136 | wordListN = &docKeywords; |
137 | break; |
138 | } |
139 | Sci_Position firstModification = -1; |
140 | if (wordListN) { |
141 | WordList wlNew; |
142 | wlNew.Set(wl); |
143 | if (*wordListN != wlNew) { |
144 | wordListN->Set(wl); |
145 | firstModification = 0; |
146 | } |
147 | } |
148 | return firstModification; |
149 | } |
150 | |
151 | // Functor used to truncate history |
152 | struct After { |
153 | Sci_Position line; |
154 | After(Sci_Position line_) : line(line_) {} |
155 | }; |
156 | |
157 | static bool isLowerLetter(int ch){ |
158 | return ccLl == CategoriseCharacter(ch); |
159 | } |
160 | |
161 | static bool isUpperLetter(int ch){ |
162 | return ccLu == CategoriseCharacter(ch); |
163 | } |
164 | |
165 | static bool isAlphaNum(int ch){ |
166 | CharacterCategory cc = CategoriseCharacter(ch); |
167 | return (ccLu == cc || ccLl == cc || ccLt == cc || ccLm == cc || ccLo == cc || ccNd == cc || ccNl == cc || ccNo == cc); |
168 | } |
169 | |
170 | static bool isStringVerbatimOpenClose(int ch){ |
171 | CharacterCategory cc = CategoriseCharacter(ch); |
172 | return (ccPc <= cc && cc <= ccSo); |
173 | } |
174 | |
175 | static bool isIdChar(int ch){ |
176 | return ('_') == ch || isAlphaNum(ch); |
177 | } |
178 | |
179 | static bool isOpenStringVerbatim(int next, int &closingQuote){ |
180 | switch (next) { |
181 | case L'<': |
182 | closingQuote = L'>'; |
183 | return true; |
184 | case L'>': |
185 | closingQuote = L'<'; |
186 | return true; |
187 | case L'(': |
188 | closingQuote = L')'; |
189 | return true; |
190 | case L')': |
191 | closingQuote = L'('; |
192 | return true; |
193 | case L'[': |
194 | closingQuote = L']'; |
195 | return true; |
196 | case L']': |
197 | closingQuote = L'['; |
198 | return true; |
199 | case L'{': |
200 | closingQuote = L'}'; |
201 | return true; |
202 | case L'}': |
203 | closingQuote = L'{'; |
204 | return true; |
205 | case L'_': |
206 | case L'.': |
207 | case L',': |
208 | case L';': |
209 | return false; |
210 | default: |
211 | if (isStringVerbatimOpenClose(next)) { |
212 | closingQuote = next; |
213 | return true; |
214 | } else { |
215 | return false; |
216 | } |
217 | } |
218 | } |
219 | |
220 | // Look ahead to see which colour "end" should have (takes colour after the following keyword) |
221 | static void endLookAhead(char s[], LexAccessor &styler, Sci_Position start) { |
222 | char ch = styler.SafeGetCharAt(start, '\n'); |
223 | while (' ' == ch) { |
224 | start++; |
225 | ch = styler.SafeGetCharAt(start, '\n'); |
226 | } |
227 | Sci_Position i = 0; |
228 | while (i < 100 && isLowerLetter(ch)){ |
229 | s[i] = ch; |
230 | i++; |
231 | ch = styler.SafeGetCharAt(start + i, '\n'); |
232 | } |
233 | s[i] = '\0'; |
234 | } |
235 | |
236 | static void forwardEscapeLiteral(StyleContext &sc, int EscapeState) { |
237 | sc.Forward(); |
238 | if (sc.Match('"') || sc.Match('\'') || sc.Match('\\') || sc.Match('n') || sc.Match('l') || sc.Match('r') || sc.Match('t')) { |
239 | sc.ChangeState(EscapeState); |
240 | } else if (sc.Match('u')) { |
241 | if (IsADigit(sc.chNext, 16)) { |
242 | sc.Forward(); |
243 | if (IsADigit(sc.chNext, 16)) { |
244 | sc.Forward(); |
245 | if (IsADigit(sc.chNext, 16)) { |
246 | sc.Forward(); |
247 | if (IsADigit(sc.chNext, 16)) { |
248 | sc.Forward(); |
249 | sc.ChangeState(EscapeState); |
250 | } |
251 | } |
252 | } |
253 | } |
254 | } |
255 | } |
256 | |
257 | void SCI_METHOD LexerVisualProlog::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { |
258 | LexAccessor styler(pAccess); |
259 | CharacterSet setDoxygen(CharacterSet::setAlpha, "" ); |
260 | CharacterSet setNumber(CharacterSet::setNone, "0123456789abcdefABCDEFxoXO" ); |
261 | |
262 | StyleContext sc(startPos, length, initStyle, styler, 0x7f); |
263 | |
264 | int styleBeforeDocKeyword = SCE_VISUALPROLOG_DEFAULT; |
265 | Sci_Position currentLine = styler.GetLine(startPos); |
266 | |
267 | int closingQuote = '"'; |
268 | int nestLevel = 0; |
269 | if (currentLine >= 1) |
270 | { |
271 | nestLevel = styler.GetLineState(currentLine - 1); |
272 | closingQuote = nestLevel; |
273 | } |
274 | |
275 | // Truncate ppDefineHistory before current line |
276 | |
277 | for (; sc.More(); sc.Forward()) { |
278 | |
279 | // Determine if the current state should terminate. |
280 | switch (sc.state) { |
281 | case SCE_VISUALPROLOG_OPERATOR: |
282 | sc.SetState(SCE_VISUALPROLOG_DEFAULT); |
283 | break; |
284 | case SCE_VISUALPROLOG_NUMBER: |
285 | // We accept almost anything because of hex. and number suffixes |
286 | if (!(setNumber.Contains(sc.ch)) || (sc.Match('.') && IsADigit(sc.chNext))) { |
287 | sc.SetState(SCE_VISUALPROLOG_DEFAULT); |
288 | } |
289 | break; |
290 | case SCE_VISUALPROLOG_IDENTIFIER: |
291 | if (!isIdChar(sc.ch)) { |
292 | char s[1000]; |
293 | sc.GetCurrent(s, sizeof(s)); |
294 | if (0 == strcmp(s, "end" )) { |
295 | endLookAhead(s, styler, sc.currentPos); |
296 | } |
297 | if (majorKeywords.InList(s)) { |
298 | sc.ChangeState(SCE_VISUALPROLOG_KEY_MAJOR); |
299 | } else if (minorKeywords.InList(s)) { |
300 | sc.ChangeState(SCE_VISUALPROLOG_KEY_MINOR); |
301 | } |
302 | sc.SetState(SCE_VISUALPROLOG_DEFAULT); |
303 | } |
304 | break; |
305 | case SCE_VISUALPROLOG_VARIABLE: |
306 | case SCE_VISUALPROLOG_ANONYMOUS: |
307 | if (!isIdChar(sc.ch)) { |
308 | sc.SetState(SCE_VISUALPROLOG_DEFAULT); |
309 | } |
310 | break; |
311 | case SCE_VISUALPROLOG_KEY_DIRECTIVE: |
312 | if (!isLowerLetter(sc.ch)) { |
313 | char s[1000]; |
314 | sc.GetCurrent(s, sizeof(s)); |
315 | if (!directiveKeywords.InList(s+1)) { |
316 | sc.ChangeState(SCE_VISUALPROLOG_IDENTIFIER); |
317 | } |
318 | sc.SetState(SCE_VISUALPROLOG_DEFAULT); |
319 | } |
320 | break; |
321 | case SCE_VISUALPROLOG_COMMENT_BLOCK: |
322 | if (sc.Match('*', '/')) { |
323 | sc.Forward(); |
324 | nestLevel--; |
325 | int nextState = (nestLevel == 0) ? SCE_VISUALPROLOG_DEFAULT : SCE_VISUALPROLOG_COMMENT_BLOCK; |
326 | sc.ForwardSetState(nextState); |
327 | } else if (sc.Match('/', '*')) { |
328 | sc.Forward(); |
329 | nestLevel++; |
330 | } else if (sc.Match('@')) { |
331 | styleBeforeDocKeyword = sc.state; |
332 | sc.SetState(SCE_VISUALPROLOG_COMMENT_KEY_ERROR); |
333 | } |
334 | break; |
335 | case SCE_VISUALPROLOG_COMMENT_LINE: |
336 | if (sc.atLineEnd) { |
337 | int nextState = (nestLevel == 0) ? SCE_VISUALPROLOG_DEFAULT : SCE_VISUALPROLOG_COMMENT_BLOCK; |
338 | sc.SetState(nextState); |
339 | } else if (sc.Match('@')) { |
340 | styleBeforeDocKeyword = sc.state; |
341 | sc.SetState(SCE_VISUALPROLOG_COMMENT_KEY_ERROR); |
342 | } |
343 | break; |
344 | case SCE_VISUALPROLOG_COMMENT_KEY_ERROR: |
345 | if (!setDoxygen.Contains(sc.ch) || sc.atLineEnd) { |
346 | char s[1000]; |
347 | sc.GetCurrent(s, sizeof(s)); |
348 | if (docKeywords.InList(s+1)) { |
349 | sc.ChangeState(SCE_VISUALPROLOG_COMMENT_KEY); |
350 | } |
351 | if (SCE_VISUALPROLOG_COMMENT_LINE == styleBeforeDocKeyword && sc.atLineEnd) { |
352 | // end line comment |
353 | int nextState = (nestLevel == 0) ? SCE_VISUALPROLOG_DEFAULT : SCE_VISUALPROLOG_COMMENT_BLOCK; |
354 | sc.SetState(nextState); |
355 | } else { |
356 | sc.SetState(styleBeforeDocKeyword); |
357 | if (SCE_VISUALPROLOG_COMMENT_BLOCK == styleBeforeDocKeyword && sc.Match('*', '/')) { |
358 | // we have consumed the '*' if it comes immediately after the docKeyword |
359 | sc.Forward(); |
360 | sc.Forward(); |
361 | nestLevel--; |
362 | if (0 == nestLevel) { |
363 | sc.SetState(SCE_VISUALPROLOG_DEFAULT); |
364 | } |
365 | } |
366 | } |
367 | } |
368 | break; |
369 | case SCE_VISUALPROLOG_STRING_ESCAPE: |
370 | case SCE_VISUALPROLOG_STRING_ESCAPE_ERROR: |
371 | // return to SCE_VISUALPROLOG_STRING and treat as such (fall-through) |
372 | sc.SetState(SCE_VISUALPROLOG_STRING); |
373 | // Falls through. |
374 | case SCE_VISUALPROLOG_STRING: |
375 | if (sc.atLineEnd) { |
376 | sc.SetState(SCE_VISUALPROLOG_STRING_EOL_OPEN); |
377 | } else if (sc.Match(closingQuote)) { |
378 | sc.ForwardSetState(SCE_VISUALPROLOG_DEFAULT); |
379 | } else if (sc.Match('\\')) { |
380 | sc.SetState(SCE_VISUALPROLOG_STRING_ESCAPE_ERROR); |
381 | forwardEscapeLiteral(sc, SCE_VISUALPROLOG_STRING_ESCAPE); |
382 | } |
383 | break; |
384 | case SCE_VISUALPROLOG_STRING_EOL_OPEN: |
385 | if (sc.atLineStart) { |
386 | sc.SetState(SCE_VISUALPROLOG_DEFAULT); |
387 | } |
388 | break; |
389 | case SCE_VISUALPROLOG_STRING_VERBATIM_SPECIAL: |
390 | case SCE_VISUALPROLOG_STRING_VERBATIM_EOL: |
391 | // return to SCE_VISUALPROLOG_STRING_VERBATIM and treat as such (fall-through) |
392 | sc.SetState(SCE_VISUALPROLOG_STRING_VERBATIM); |
393 | // Falls through. |
394 | case SCE_VISUALPROLOG_STRING_VERBATIM: |
395 | if (sc.atLineEnd) { |
396 | sc.SetState(SCE_VISUALPROLOG_STRING_VERBATIM_EOL); |
397 | } else if (sc.Match(closingQuote)) { |
398 | if (closingQuote == sc.chNext) { |
399 | sc.SetState(SCE_VISUALPROLOG_STRING_VERBATIM_SPECIAL); |
400 | sc.Forward(); |
401 | } else { |
402 | sc.ForwardSetState(SCE_VISUALPROLOG_DEFAULT); |
403 | } |
404 | } |
405 | break; |
406 | } |
407 | |
408 | if (sc.atLineEnd) { |
409 | // Update the line state, so it can be seen by next line |
410 | int lineState = 0; |
411 | if (SCE_VISUALPROLOG_STRING_VERBATIM_EOL == sc.state) { |
412 | lineState = closingQuote; |
413 | } else if (SCE_VISUALPROLOG_COMMENT_BLOCK == sc.state) { |
414 | lineState = nestLevel; |
415 | } |
416 | styler.SetLineState(currentLine, lineState); |
417 | currentLine++; |
418 | } |
419 | |
420 | // Determine if a new state should be entered. |
421 | if (sc.state == SCE_VISUALPROLOG_DEFAULT) { |
422 | if (sc.Match('@') && isOpenStringVerbatim(sc.chNext, closingQuote)) { |
423 | sc.SetState(SCE_VISUALPROLOG_STRING_VERBATIM); |
424 | sc.Forward(); |
425 | } else if (IsADigit(sc.ch) || (sc.Match('.') && IsADigit(sc.chNext))) { |
426 | sc.SetState(SCE_VISUALPROLOG_NUMBER); |
427 | } else if (isLowerLetter(sc.ch)) { |
428 | sc.SetState(SCE_VISUALPROLOG_IDENTIFIER); |
429 | } else if (isUpperLetter(sc.ch)) { |
430 | sc.SetState(SCE_VISUALPROLOG_VARIABLE); |
431 | } else if (sc.Match('_')) { |
432 | sc.SetState(SCE_VISUALPROLOG_ANONYMOUS); |
433 | } else if (sc.Match('/', '*')) { |
434 | sc.SetState(SCE_VISUALPROLOG_COMMENT_BLOCK); |
435 | nestLevel = 1; |
436 | sc.Forward(); // Eat the * so it isn't used for the end of the comment |
437 | } else if (sc.Match('%')) { |
438 | sc.SetState(SCE_VISUALPROLOG_COMMENT_LINE); |
439 | } else if (sc.Match('\'')) { |
440 | closingQuote = '\''; |
441 | sc.SetState(SCE_VISUALPROLOG_STRING); |
442 | } else if (sc.Match('"')) { |
443 | closingQuote = '"'; |
444 | sc.SetState(SCE_VISUALPROLOG_STRING); |
445 | } else if (sc.Match('#')) { |
446 | sc.SetState(SCE_VISUALPROLOG_KEY_DIRECTIVE); |
447 | } else if (isoperator(static_cast<char>(sc.ch)) || sc.Match('\\')) { |
448 | sc.SetState(SCE_VISUALPROLOG_OPERATOR); |
449 | } |
450 | } |
451 | |
452 | } |
453 | sc.Complete(); |
454 | styler.Flush(); |
455 | } |
456 | |
457 | // Store both the current line's fold level and the next lines in the |
458 | // level store to make it easy to pick up with each increment |
459 | // and to make it possible to fiddle the current level for "} else {". |
460 | |
461 | void SCI_METHOD LexerVisualProlog::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { |
462 | |
463 | LexAccessor styler(pAccess); |
464 | |
465 | Sci_PositionU endPos = startPos + length; |
466 | int visibleChars = 0; |
467 | Sci_Position currentLine = styler.GetLine(startPos); |
468 | int levelCurrent = SC_FOLDLEVELBASE; |
469 | if (currentLine > 0) |
470 | levelCurrent = styler.LevelAt(currentLine-1) >> 16; |
471 | int levelMinCurrent = levelCurrent; |
472 | int levelNext = levelCurrent; |
473 | char chNext = styler[startPos]; |
474 | int styleNext = styler.StyleAt(startPos); |
475 | int style = initStyle; |
476 | for (Sci_PositionU i = startPos; i < endPos; i++) { |
477 | char ch = chNext; |
478 | chNext = styler.SafeGetCharAt(i + 1); |
479 | style = styleNext; |
480 | styleNext = styler.StyleAt(i + 1); |
481 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
482 | if (style == SCE_VISUALPROLOG_OPERATOR) { |
483 | if (ch == '{') { |
484 | // Measure the minimum before a '{' to allow |
485 | // folding on "} else {" |
486 | if (levelMinCurrent > levelNext) { |
487 | levelMinCurrent = levelNext; |
488 | } |
489 | levelNext++; |
490 | } else if (ch == '}') { |
491 | levelNext--; |
492 | } |
493 | } |
494 | if (!IsASpace(ch)) |
495 | visibleChars++; |
496 | if (atEOL || (i == endPos-1)) { |
497 | int levelUse = levelCurrent; |
498 | int lev = levelUse | levelNext << 16; |
499 | if (levelUse < levelNext) |
500 | lev |= SC_FOLDLEVELHEADERFLAG; |
501 | if (lev != styler.LevelAt(currentLine)) { |
502 | styler.SetLevel(currentLine, lev); |
503 | } |
504 | currentLine++; |
505 | levelCurrent = levelNext; |
506 | levelMinCurrent = levelCurrent; |
507 | if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) { |
508 | // There is an empty line at end of file so give it same level and empty |
509 | styler.SetLevel(currentLine, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG); |
510 | } |
511 | visibleChars = 0; |
512 | } |
513 | } |
514 | } |
515 | |
516 | LexerModule lmVisualProlog(SCLEX_VISUALPROLOG, LexerVisualProlog::LexerFactoryVisualProlog, "visualprolog" , visualPrologWordLists); |
517 | |