1// Scintilla source code edit control
2/** @file LexProgress.cxx
3 ** Lexer for Progress 4GL.
4 ** Based on LexCPP.cxx of Neil Hodgson <neilh@scintilla.org>
5 **/
6// Copyright 2006-2016 by Yuval Papish <Yuval@YuvCom.com>
7// The License.txt file describes the conditions under which this software may be distributed.
8
9/** TODO:
10
11SpeedScript support in html lexer
12Differentiate between labels and variables
13 Option 1: By symbols table
14 Option 2: As a single unidentified symbol in a sytactical line
15
16**/
17
18#include <stdlib.h>
19#include <string.h>
20#include <stdio.h>
21#include <stdarg.h>
22#include <assert.h>
23#include <ctype.h>
24
25#include <string>
26#include <string_view>
27#include <vector>
28#include <map>
29#include <algorithm>
30#include <functional>
31
32#include "ILexer.h"
33#include "Scintilla.h"
34#include "SciLexer.h"
35
36#include "WordList.h"
37#include "LexAccessor.h"
38#include "StyleContext.h"
39#include "CharacterSet.h"
40#include "LexerModule.h"
41#include "OptionSet.h"
42#include "SparseState.h"
43#include "DefaultLexer.h"
44
45using namespace Scintilla;
46using namespace Lexilla;
47
48namespace {
49 // Use an unnamed namespace to protect the functions and classes from name conflicts
50
51 bool IsSpaceEquiv(int state) {
52 return (state == SCE_ABL_COMMENT ||
53 state == SCE_ABL_LINECOMMENT ||
54 state == SCE_ABL_DEFAULT);
55 }
56
57 void highlightTaskMarker(StyleContext &sc, LexAccessor &styler, WordList &markerList){
58 if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) {
59 const int lengthMarker = 50;
60 char marker[lengthMarker+1];
61 Sci_Position currPos = (Sci_Position) sc.currentPos;
62 Sci_Position i = 0;
63 while (i < lengthMarker) {
64 char ch = styler.SafeGetCharAt(currPos + i);
65 if (IsASpace(ch) || isoperator(ch)) {
66 break;
67 }
68 marker[i] = ch;
69 i++;
70 }
71 marker[i] = '\0';
72 if (markerList.InListAbbreviated (marker,'(')) {
73 sc.SetState(SCE_ABL_TASKMARKER);
74 }
75 }
76 }
77
78 bool IsStreamCommentStyle(int style) {
79 return style == SCE_ABL_COMMENT;
80 // style == SCE_ABL_LINECOMMENT; Only block comments are used for folding
81 }
82
83 // Options used for LexerABL
84 struct OptionsABL {
85 bool fold;
86 bool foldSyntaxBased;
87 bool foldComment;
88 bool foldCommentMultiline;
89 bool foldCompact;
90 OptionsABL() {
91 fold = false;
92 foldSyntaxBased = true;
93 foldComment = true;
94 foldCommentMultiline = true;
95 foldCompact = false;
96 }
97 };
98
99 const char *const ablWordLists[] = {
100 "Primary keywords and identifiers",
101 "Keywords that opens a block, only when used to begin a syntactic line",
102 "Keywords that opens a block anywhere in a syntactic line",
103 "Task Marker", /* "END MODIFY START TODO" */
104 0,
105 };
106
107 struct OptionSetABL : public OptionSet<OptionsABL> {
108 OptionSetABL() {
109 DefineProperty("fold", &OptionsABL::fold);
110
111 DefineProperty("fold.abl.syntax.based", &OptionsABL::foldSyntaxBased,
112 "Set this property to 0 to disable syntax based folding.");
113
114 DefineProperty("fold.comment", &OptionsABL::foldComment,
115 "This option enables folding multi-line comments and explicit fold points when using the ABL lexer. ");
116
117 DefineProperty("fold.abl.comment.multiline", &OptionsABL::foldCommentMultiline,
118 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
119
120 DefineProperty("fold.compact", &OptionsABL::foldCompact);
121
122 DefineWordListSets(ablWordLists);
123 }
124 };
125}
126
127class LexerABL : public DefaultLexer {
128 CharacterSet setWord;
129 CharacterSet setNegationOp;
130 CharacterSet setArithmethicOp;
131 CharacterSet setRelOp;
132 CharacterSet setLogicalOp;
133 CharacterSet setWordStart;
134 WordList keywords1; // regular keywords
135 WordList keywords2; // block opening keywords, only when isSentenceStart
136 WordList keywords3; // block opening keywords
137 WordList keywords4; // Task Marker
138 OptionsABL options;
139 OptionSetABL osABL;
140public:
141 LexerABL() :
142 DefaultLexer("abl", SCLEX_PROGRESS),
143 setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
144 setNegationOp(CharacterSet::setNone, "!"),
145 setArithmethicOp(CharacterSet::setNone, "+-/*%"),
146 setRelOp(CharacterSet::setNone, "=!<>"),
147 setLogicalOp(CharacterSet::setNone, "|&"){
148 }
149 virtual ~LexerABL() {
150 }
151 void SCI_METHOD Release() override {
152 delete this;
153 }
154 int SCI_METHOD Version() const override {
155 return lvRelease5;
156 }
157 const char * SCI_METHOD PropertyNames() override {
158 return osABL.PropertyNames();
159 }
160 int SCI_METHOD PropertyType(const char *name) override {
161 return osABL.PropertyType(name);
162 }
163 const char * SCI_METHOD DescribeProperty(const char *name) override {
164 return osABL.DescribeProperty(name);
165 }
166 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override ;
167 const char * SCI_METHOD PropertyGet(const char *key) override {
168 return osABL.PropertyGet(key);
169 }
170
171 const char * SCI_METHOD DescribeWordListSets() override {
172 return osABL.DescribeWordListSets();
173 }
174 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
175 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
176 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
177
178 void * SCI_METHOD PrivateCall(int, void *) override {
179 return 0;
180 }
181 int SCI_METHOD LineEndTypesSupported() override {
182 return SC_LINE_END_TYPE_DEFAULT;
183 }
184 static ILexer5 *LexerFactoryABL() {
185 return new LexerABL();
186 }
187};
188
189Sci_Position SCI_METHOD LexerABL::PropertySet(const char *key, const char *val) {
190 if (osABL.PropertySet(&options, key, val)) {
191 return 0;
192 }
193 return -1;
194}
195
196Sci_Position SCI_METHOD LexerABL::WordListSet(int n, const char *wl) {
197 WordList *wordListN = 0;
198 switch (n) {
199 case 0:
200 wordListN = &keywords1;
201 break;
202 case 1:
203 wordListN = &keywords2;
204 break;
205 case 2:
206 wordListN = &keywords3;
207 break;
208 case 3:
209 wordListN = &keywords4;
210 break;
211 }
212 Sci_Position firstModification = -1;
213 if (wordListN) {
214 WordList wlNew;
215 wlNew.Set(wl);
216 if (*wordListN != wlNew) {
217 wordListN->Set(wl);
218 firstModification = 0;
219 }
220 }
221 return firstModification;
222}
223
224void SCI_METHOD LexerABL::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
225 LexAccessor styler(pAccess);
226
227 setWordStart = CharacterSet(CharacterSet::setAlpha, "_", 0x80, true);
228
229 int visibleChars = 0;
230 int visibleChars1 = 0;
231 int styleBeforeTaskMarker = SCE_ABL_DEFAULT;
232 bool continuationLine = false;
233 int commentNestingLevel = 0;
234 bool isSentenceStart = true;
235 bool possibleOOLChange = false;
236
237 Sci_Position lineCurrent = styler.GetLine(startPos);
238 if (initStyle == SCE_ABL_PREPROCESSOR) {
239 // Set continuationLine if last character of previous line is '~'
240 if (lineCurrent > 0) {
241 Sci_Position endLinePrevious = styler.LineEnd(lineCurrent-1);
242 if (endLinePrevious > 0) {
243 continuationLine = styler.SafeGetCharAt(endLinePrevious-1) == '~';
244 }
245 }
246 }
247
248 // Look back to set variables that are actually invisible secondary states. The reason to avoid formal states is to cut down on state's bits
249 if (startPos > 0) {
250 Sci_Position back = startPos;
251 bool checkCommentNestingLevel = (initStyle == SCE_ABL_COMMENT);
252 bool checkIsSentenceStart = (initStyle == SCE_ABL_DEFAULT || initStyle == SCE_ABL_IDENTIFIER);
253 char ch;
254 char st;
255 char chPrev;
256 char chPrev_1;
257 char chPrev_2;
258 char chPrev_3;
259
260 while (back >= 0 && (checkCommentNestingLevel || checkIsSentenceStart)) {
261 ch = styler.SafeGetCharAt(back);
262 styler.Flush(); // looking at styles so need to flush
263 st = styler.StyleAt(back);
264
265 chPrev = styler.SafeGetCharAt(back-1);
266 // isSentenceStart is a non-visible state, used to identify where statements and preprocessor declerations can start
267 if (checkIsSentenceStart && st != SCE_ABL_COMMENT && st != SCE_ABL_LINECOMMENT && st != SCE_ABL_CHARACTER && st != SCE_ABL_STRING ) {
268 chPrev_1 = styler.SafeGetCharAt(back-2);
269 chPrev_2 = styler.SafeGetCharAt(back-3);
270 chPrev_3 = styler.SafeGetCharAt(back-4);
271 if ((chPrev == '.' || chPrev == ':' || chPrev == '}' ||
272 (chPrev_3 == 'e' && chPrev_2 == 'l' && chPrev_1 == 's' && chPrev == 'e') ||
273 (chPrev_3 == 't' && chPrev_2 == 'h' && chPrev_1 == 'e' && chPrev == 'n')) &&
274 (IsASpace(ch) || (ch == '/' && styler.SafeGetCharAt(back+1) == '*'))
275 ) {
276 checkIsSentenceStart = false;
277 isSentenceStart = true;
278 }
279 else if (IsASpace(chPrev) && ch == '{') {
280 checkIsSentenceStart = false;
281 isSentenceStart = false;
282 }
283 }
284
285 // commentNestingLevel is a non-visible state, used to identify the nesting level of a comment
286 if (checkCommentNestingLevel) {
287 if (chPrev == '/' && ch == '*') {
288 commentNestingLevel++;
289 // eat the '/' so we don't miscount a */ if we see /*/*
290 --back;
291 }
292 if (chPrev == '*' && ch == '/') {
293 commentNestingLevel--;
294 // eat the '*' so we don't miscount a /* if we see */*/
295 --back;
296 }
297 }
298 --back;
299 }
300 }
301
302 StyleContext sc(startPos, length, initStyle, styler, static_cast<unsigned char>(0xff));
303 Sci_Position lineEndNext = styler.LineEnd(lineCurrent);
304
305 for (; sc.More();) {
306 if (sc.atLineStart) {
307 visibleChars = 0;
308 visibleChars1 = 0;
309 }
310 if (sc.atLineEnd) {
311 lineCurrent++;
312 lineEndNext = styler.LineEnd(lineCurrent);
313 }
314 // Handle line continuation generically.
315 if (sc.ch == '~') {
316 if (static_cast<Sci_Position>((sc.currentPos+1)) >= lineEndNext) {
317 lineCurrent++;
318 lineEndNext = styler.LineEnd(lineCurrent);
319 sc.Forward();
320 if (sc.ch == '\r' && sc.chNext == '\n') {
321 sc.Forward();
322 }
323 continuationLine = true;
324 sc.Forward();
325 continue;
326 }
327 }
328
329 const bool atLineEndBeforeSwitch = sc.atLineEnd;
330 // Determine if the current state should terminate.
331 switch (sc.state) {
332 case SCE_ABL_OPERATOR:
333 sc.SetState(SCE_ABL_DEFAULT);
334 break;
335 case SCE_ABL_NUMBER:
336 // We accept almost anything because of hex. and maybe number suffixes and scientific notations in the future
337 if (!(setWord.Contains(sc.ch)
338 || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
339 sc.chPrev == 'p' || sc.chPrev == 'P')))) {
340 sc.SetState(SCE_ABL_DEFAULT);
341 }
342 break;
343 case SCE_ABL_IDENTIFIER:
344 if (sc.atLineStart || sc.atLineEnd || (!setWord.Contains(sc.ch) && sc.ch != '-')) {
345 char s[1000];
346 sc.GetCurrentLowered(s, sizeof(s));
347 bool isLastWordEnd = (s[0] == 'e' && s[1] =='n' && s[2] == 'd' && !IsAlphaNumeric(s[3]) && s[3] != '-'); // helps to identify "end trigger" phrase
348 if ((isSentenceStart && keywords2.InListAbbreviated (s,'(')) || (!isLastWordEnd && keywords3.InListAbbreviated (s,'('))) {
349 sc.ChangeState(SCE_ABL_BLOCK);
350 isSentenceStart = false;
351 }
352 else if (keywords1.InListAbbreviated (s,'(')) {
353 if (isLastWordEnd ||
354 (s[0] == 'f' && s[1] =='o' && s[2] == 'r' && s[3] == 'w' && s[4] =='a' && s[5] == 'r' && s[6] == 'd'&& !IsAlphaNumeric(s[7]))) {
355 sc.ChangeState(SCE_ABL_END);
356 isSentenceStart = false;
357 }
358 else if ((s[0] == 'e' && s[1] =='l' && s[2] == 's' && s[3] == 'e') ||
359 (s[0] == 't' && s[1] =='h' && s[2] == 'e' && s[3] == 'n')) {
360 sc.ChangeState(SCE_ABL_WORD);
361 isSentenceStart = true;
362 }
363 else {
364 sc.ChangeState(SCE_ABL_WORD);
365 isSentenceStart = false;
366 }
367 }
368 sc.SetState(SCE_ABL_DEFAULT);
369 }
370 break;
371 case SCE_ABL_PREPROCESSOR:
372 if (sc.atLineStart && !continuationLine) {
373 sc.SetState(SCE_ABL_DEFAULT);
374 // Force Scintilla to acknowledge changed stated even though this change might happen outside of the current line
375 possibleOOLChange = true;
376 isSentenceStart = true;
377 }
378 break;
379 case SCE_ABL_LINECOMMENT:
380 if (sc.atLineStart && !continuationLine) {
381 sc.SetState(SCE_ABL_DEFAULT);
382 isSentenceStart = true;
383 } else {
384 styleBeforeTaskMarker = SCE_ABL_LINECOMMENT;
385 highlightTaskMarker(sc, styler, keywords4);
386 }
387 break;
388 case SCE_ABL_TASKMARKER:
389 if (isoperator(sc.ch) || IsASpace(sc.ch)) {
390 sc.SetState(styleBeforeTaskMarker);
391 styleBeforeTaskMarker = SCE_ABL_DEFAULT;
392 }
393 // fall through
394 case SCE_ABL_COMMENT:
395 if (sc.Match('*', '/')) {
396 sc.Forward();
397 commentNestingLevel--;
398 if (commentNestingLevel == 0) {
399 sc.ForwardSetState(SCE_ABL_DEFAULT);
400 possibleOOLChange = true;
401 }
402 } else if (sc.Match('/', '*')) {
403 commentNestingLevel++;
404 sc.Forward();
405 }
406 if (commentNestingLevel > 0) {
407 styleBeforeTaskMarker = SCE_ABL_COMMENT;
408 possibleOOLChange = true;
409 highlightTaskMarker(sc, styler, keywords4);
410 }
411 break;
412 case SCE_ABL_STRING:
413 if (sc.ch == '~') {
414 sc.Forward(); // Skip a character after a tilde
415 } else if (sc.ch == '\"') {
416 sc.ForwardSetState(SCE_ABL_DEFAULT);
417 }
418 break;
419 case SCE_ABL_CHARACTER:
420 if (sc.ch == '~') {
421 sc.Forward(); // Skip a character after a tilde
422 } else if (sc.ch == '\'') {
423 sc.ForwardSetState(SCE_ABL_DEFAULT);
424 }
425 break;
426 }
427
428 if (sc.atLineEnd && !atLineEndBeforeSwitch) {
429 // State exit processing consumed characters up to end of line.
430 lineCurrent++;
431 lineEndNext = styler.LineEnd(lineCurrent);
432 }
433
434 // Determine if a new state should be entered.
435 if (sc.state == SCE_ABL_DEFAULT) {
436 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
437 sc.SetState(SCE_ABL_NUMBER);
438 isSentenceStart = false;
439 } else if (!sc.atLineEnd && (setWordStart.Contains(sc.ch)) && sc.chPrev != '&') {
440 sc.SetState(SCE_ABL_IDENTIFIER);
441 } else if (sc.Match('/', '*')) {
442 if (sc.chPrev == '.' || sc.chPrev == ':' || sc.chPrev == '}') {
443 isSentenceStart = true;
444 }
445 sc.SetState(SCE_ABL_COMMENT);
446 possibleOOLChange = true;
447 commentNestingLevel++;
448 sc.Forward(); // Eat the * so it isn't used for the end of the comment
449 } else if (sc.ch == '\"') {
450 sc.SetState(SCE_ABL_STRING);
451 isSentenceStart = false;
452 } else if (sc.ch == '\'') {
453 sc.SetState(SCE_ABL_CHARACTER);
454 isSentenceStart = false;
455 } else if (sc.ch == '&' && visibleChars1 == 0 && isSentenceStart) {
456 // Preprocessor commands are alone on their line
457 sc.SetState(SCE_ABL_PREPROCESSOR);
458 // Force Scintilla to acknowledge changed stated even though this change might happen outside of the current line
459 possibleOOLChange = true;
460 // Skip whitespace between & and preprocessor word
461 do {
462 sc.Forward();
463 } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
464 if (sc.atLineEnd) {
465 sc.SetState(SCE_ABL_DEFAULT);
466 }
467 } else if (sc.Match('/','/') && (IsASpace(sc.chPrev) || isSentenceStart)) {
468 // Line comments are valid after a white space or EOL
469 sc.SetState(SCE_ABL_LINECOMMENT);
470 // Skip whitespace between // and preprocessor word
471 do {
472 sc.Forward();
473 } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
474 if (sc.atLineEnd) {
475 sc.SetState(SCE_ABL_DEFAULT);
476 }
477 } else if (isoperator(sc.ch)) {
478 sc.SetState(SCE_ABL_OPERATOR);
479 /* This code allows highlight of handles. Alas, it would cause the phrase "last-event:function"
480 to be recognized as a BlockBegin */
481 isSentenceStart = false;
482 }
483 else if ((sc.chPrev == '.' || sc.chPrev == ':' || sc.chPrev == '}') && (IsASpace(sc.ch))) {
484 isSentenceStart = true;
485 }
486 }
487 if (!IsASpace(sc.ch)) {
488 visibleChars1++;
489 }
490 if (!IsASpace(sc.ch) && !IsSpaceEquiv(sc.state)) {
491 visibleChars++;
492 }
493 continuationLine = false;
494 sc.Forward();
495 }
496 if (possibleOOLChange)
497 styler.ChangeLexerState(startPos, startPos + length);
498 sc.Complete();
499}
500
501
502// Store both the current line's fold level and the next lines in the
503// level store to make it easy to pick up with each increment
504// and to make it possible to fiddle the current level for "} else {".
505
506void SCI_METHOD LexerABL::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
507
508 if (!options.fold)
509 return;
510
511 LexAccessor styler(pAccess);
512
513 Sci_PositionU endPos = startPos + length;
514 int visibleChars = 0;
515 Sci_Position lineCurrent = styler.GetLine(startPos);
516 int levelCurrent = SC_FOLDLEVELBASE;
517 if (lineCurrent > 0)
518 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
519 Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
520 int levelNext = levelCurrent;
521 char chNext = styler[startPos];
522 int styleNext = styler.StyleAt(startPos);
523 int style = initStyle;
524 for (Sci_PositionU i = startPos; i < endPos; i++) {
525 chNext = static_cast<char>(tolower(chNext)); // check tolower
526 char ch = chNext;
527 chNext = styler.SafeGetCharAt(i+1);
528 int stylePrev = style;
529 style = styleNext;
530 styleNext = styler.StyleAt(i+1);
531 bool atEOL = i == (lineStartNext-1);
532 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style)) {
533 if (!IsStreamCommentStyle(stylePrev)) {
534 levelNext++;
535 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
536 // Comments don't end at end of line and the next character may be unstyled.
537 levelNext--;
538 }
539 }
540 if (options.foldSyntaxBased) {
541 if (style == SCE_ABL_BLOCK && !IsAlphaNumeric(chNext)) {
542 levelNext++;
543 }
544 else if (style == SCE_ABL_END && (ch == 'e' || ch == 'f')) {
545 levelNext--;
546 }
547 }
548 if (!IsASpace(ch))
549 visibleChars++;
550 if (atEOL || (i == endPos-1)) {
551 int lev = levelCurrent | levelNext << 16;
552 if (visibleChars == 0 && options.foldCompact)
553 lev |= SC_FOLDLEVELWHITEFLAG;
554 if (levelCurrent < levelNext)
555 lev |= SC_FOLDLEVELHEADERFLAG;
556 if (lev != styler.LevelAt(lineCurrent)) {
557 styler.SetLevel(lineCurrent, lev);
558 }
559 lineCurrent++;
560 lineStartNext = styler.LineStart(lineCurrent+1);
561 levelCurrent = levelNext;
562 if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
563 // There is an empty line at end of file so give it same level and empty
564 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
565 }
566 visibleChars = 0;
567 }
568 }
569}
570
571LexerModule lmProgress(SCLEX_PROGRESS, LexerABL::LexerFactoryABL, "abl", ablWordLists);
572